This page is part of an archival collection and is no longer actively maintained.

It may contain outdated information and may not meet current or future WCAG accessibility standards. We provide this content, its subpages, and associated links for historical reference only. If you need assistance, please contact support@cs.washington.edu

VFML: ExampleGroupStats.h Source File
Main Page | Modules | Data Structures | File List | Globals | Related Pages

ExampleGroupStats.h

Go to the documentation of this file.
00001 #ifndef EXAMPLEGROUPSTATSH
00002 #define EXAMPLEGROUPSTATSH
00003 
00004 #include "vfml.h"
00005 
00021 typedef struct _Bin_ {
00022    float lowerBound, upperBound;
00023    float *classTotals;
00024    float exampleCount;
00025    int boundryClass;
00026    long boundryCount;
00027 } Bin, *BinPtr;
00028 
00029 BinPtr BinNew(ExampleSpecPtr spec);
00030 void BinFree(BinPtr bin);
00031 void BinWrite(BinPtr bin, FILE *out);
00032 
00033 typedef struct _ContinuousTracker_ {
00034    ExampleSpecPtr spec;
00035    float *classTotals;
00036    long exampleCount;
00037    VoidListPtr bins;
00038    float min, max;
00039    int initMinMax;
00040    int timesPrunedCount;
00041    int addingNewBins;
00042 } ContinuousTracker, *ContinuousTrackerPtr;
00043 
00044 ContinuousTrackerPtr ContinuousTrackerNew(ExampleSpecPtr spec);
00045 void ContinuousTrackerFree(ContinuousTrackerPtr ct);
00046 void ContinuousTrackerAddExample(ContinuousTrackerPtr ct, 
00047                                     float value, int theclass);
00048 float ContinuousTrackerGetPercentBelowThreshold(ContinuousTrackerPtr ct,
00049                                                       float thresh);
00050 
00051 int ContinuousTrackerNumSplitThresholds(ContinuousTrackerPtr ct);
00052 int ContinuousTrackerGetMostCommonClassInPartition(ContinuousTrackerPtr ct,
00053                         float threshold, int above);
00054 
00055 void ContinuousTrackerEntropyAttributeSplit(ContinuousTrackerPtr ct,
00056                         float *firstIndex, float *firstThresh,
00057                         float *secondIndex, float *secondThresh);
00058 void ContinuousTrackerGiniAttributeSplit(ContinuousTrackerPtr ct,
00059                         float *firstIndex, float *firstThresh,
00060                         float *secondIndex, float *secondThresh);
00061 
00062 void ContinuousTrackerDisableWorseThanEntropy(ContinuousTrackerPtr ct, 
00063                                               float entropyThresh);
00064 void ContinuousTrackerDisableWorseThanGini(ContinuousTrackerPtr ct, 
00065                                               float giniThresh);
00066 int ContinuousTrackerPruneSplitsEntropy(ContinuousTrackerPtr ct,
00067                                int maxSplits, int pruneDownTo);
00068 
00070 typedef struct _ExampleGroupStats_ {
00071    ExampleSpecPtr spec;
00072    AttributeTrackerPtr attributeTracker;
00073 
00074    long examplesSeen;
00075 
00076    long *classTotals;
00077 
00078    /* VoidListPtr for each attribute:
00079         for discrete attributes another VoidListPtr for each value, and an
00080                             array of longs with the count for each class 
00081         for continuous attributes a continuous tracker structure
00082    */
00083    VoidAListPtr stats;
00084 
00085 } ExampleGroupStats, *ExampleGroupStatsPtr;
00086 
00087 
00100 ExampleGroupStatsPtr ExampleGroupStatsNew(ExampleSpecPtr es, 
00101                                        AttributeTrackerPtr at);
00102 
00104 void ExampleGroupStatsFree(ExampleGroupStatsPtr egs);
00105 
00115 void ExampleGroupStatsDeactivate(ExampleGroupStatsPtr egs);
00116 
00118 void ExampleGroupStatsReactivate(ExampleGroupStatsPtr egs);
00119 
00121 void ExampleGroupStatsAddExample(ExampleGroupStatsPtr egs, ExamplePtr e);
00122 
00124 void ExampleGroupStatsWrite(ExampleGroupStatsPtr egs, FILE *out);
00125 
00132 long ExampleGroupStatsNumExamplesSeen(ExampleGroupStatsPtr egs);
00133 
00134 
00136 AttributeTrackerPtr ExampleGroupStatsGetAttributeTracker(
00137                                       ExampleGroupStatsPtr egs);
00138 
00145 int ExampleGroupStatsIsAttributeActive(ExampleGroupStatsPtr egs, int num);
00146 
00154 void ExampleGroupStatsIgnoreAttribute(ExampleGroupStatsPtr egs, int num);
00155 
00156 
00165 int ExampleGroupStatsGetMostCommonClassLaplace(ExampleGroupStatsPtr egs, 
00166                                                int addClass, int addCount);
00168 int ExampleGroupStatsGetMostCommonClass(ExampleGroupStatsPtr egs);
00169 
00171 long ExampleGroupStatsGetMostCommonClassCount(ExampleGroupStatsPtr egs);
00172 
00173 
00178 int ExampleGroupStatsGetMostCommonClassForAttVal(ExampleGroupStatsPtr egs,
00179                                                 int att, int val);
00180 
00182 int ExampleGroupStatsIsPure(ExampleGroupStatsPtr egs);
00183 
00188 float ExampleGroupStatsGetValuePercent(ExampleGroupStatsPtr egs, 
00189                                        int attNum, int valNum);
00190 float ExampleGroupStatsGetValueGivenClassPercent(ExampleGroupStatsPtr egs, 
00191                                         int attNum, int valNum, int classNum);
00192 
00202 double ExampleGroupStatsGetValueGivenClassMEstimate(ExampleGroupStatsPtr egs, 
00203                                         int attNum, int valNum, int classNum);
00204 
00206 float ExampleGroupStatsGetClassPercent(ExampleGroupStatsPtr egs, int classNum);
00207 
00212 float ExampleGroupStatsGetPercentBelowThreshold(ExampleGroupStatsPtr egs,
00213                                      int attNum, float thresh);
00214 
00215 /* for naive bayes */
00216 
00226 double ExampleGroupStatsGetValueGivenClassMEstimateLogP(ExampleGroupStatsPtr 
00227                              egs, int attNum, int valNum, int classNum);
00228 
00230 double ExampleGroupStatsGetClassLogP(ExampleGroupStatsPtr egs, int classNum);
00231 
00232 
00234 float ExampleGroupStatsEntropyTotal(ExampleGroupStatsPtr egs);
00235 
00240 float ExampleGroupStatsEntropyDiscreteAttributeSplit(ExampleGroupStatsPtr egs,
00241                                                            int attNum);
00242 
00251 float ExampleGroupStatsEntropyPlusDiscreteAttributeSplit(ExampleGroupStatsPtr 
00252                                  egs, int attNum, float delta);
00253 
00254 
00263 float ExampleGroupStatsEntropyMinusDiscreteAttributeSplit(ExampleGroupStatsPtr 
00264                                  egs, int attNum, float delta);
00265 
00266 
00279 void ExampleGroupStatsEntropyContinuousAttributeSplit(ExampleGroupStatsPtr egs, 
00280                  int attNum, float *firstIndex, float *firstThresh,
00281                              float *secondIndex, float *secondThresh);
00282 
00284 float ExampleGroupStatsGiniTotal(ExampleGroupStatsPtr egs);
00285 
00291 float ExampleGroupStatsGiniDiscreteAttributeSplit(ExampleGroupStatsPtr egs, 
00292                                                       int attNum);
00293 
00304 void ExampleGroupStatsGiniContinuousAttributeSplit(ExampleGroupStatsPtr egs, 
00305                  int attNum, float *firstIndex, float *firstThresh,
00306                              float *secondIndex, float *secondThresh);
00307 
00319 void ExampleGroupStatsIgnoreSplitsWorseThanEntropy(ExampleGroupStatsPtr egs,
00320                                    int attNum, float entropyThresh);
00321 
00333 void ExampleGroupStatsIgnoreSplitsWorseThanGini(ExampleGroupStatsPtr egs,
00334                                                 int attNum, float giniThresh);
00335 
00336 
00351 int ExampleGroupStatsLimitSplitsEntropy(ExampleGroupStatsPtr egs, int attNum,
00352                                int maxSplits, int pruneDownTo);
00353 
00358 void ExampleGroupStatsStopAddingSplits(ExampleGroupStatsPtr egs, int attNum);
00359 
00364 int ExampleGroupStatsNumSplitThresholds(ExampleGroupStatsPtr egs, int attNum);
00365 
00370 int ExampleGroupStatsGetMostCommonClassAboveThreshold(ExampleGroupStatsPtr egs,
00371                       int attNum, float threshold);
00372 int ExampleGroupStatsGetMostCommonClassBelowThreshold(ExampleGroupStatsPtr egs,
00377                       int attNum, float threshold);
00378 
00379 /* undocumented, used by CVFDT  */
00380 void ExampleGroupStatsRemoveExample(ExampleGroupStatsPtr egs, ExamplePtr e);
00381 
00382 #endif /* EXAMPLEGROUPSTATSH */

Generated for VFML by doxygen hosted by SourceForge.net Logo