Main Page | Modules | Data Structures | File List | Globals | Related Pages

ExampleGroupStats.h

Go to the documentation of this file.
00001 #ifndef EXAMPLEGROUPSTATSH
00002 #define EXAMPLEGROUPSTATSH
00003 
00004 #include "vfml.h"
00005 
00021 typedef struct _Bin_ {
00022    float lowerBound, upperBound;
00023    float *classTotals;
00024    float exampleCount;
00025    int boundryClass;
00026    long boundryCount;
00027 } Bin, *BinPtr;
00028 
00029 BinPtr BinNew(ExampleSpecPtr spec);
00030 void BinFree(BinPtr bin);
00031 void BinWrite(BinPtr bin, FILE *out);
00032 
00033 typedef struct _ContinuousTracker_ {
00034    ExampleSpecPtr spec;
00035    float *classTotals;
00036    long exampleCount;
00037    VoidListPtr bins;
00038    float min, max;
00039    int initMinMax;
00040    int timesPrunedCount;
00041    int addingNewBins;
00042 } ContinuousTracker, *ContinuousTrackerPtr;
00043 
00044 ContinuousTrackerPtr ContinuousTrackerNew(ExampleSpecPtr spec);
00045 void ContinuousTrackerFree(ContinuousTrackerPtr ct);
00046 void ContinuousTrackerAddExample(ContinuousTrackerPtr ct, 
00047                                     float value, int theclass);
00048 float ContinuousTrackerGetPercentBelowThreshold(ContinuousTrackerPtr ct,
00049                                                       float thresh);
00050 
00051 int ContinuousTrackerNumSplitThresholds(ContinuousTrackerPtr ct);
00052 int ContinuousTrackerGetMostCommonClassInPartition(ContinuousTrackerPtr ct,
00053                         float threshold, int above);
00054 
00055 void ContinuousTrackerEntropyAttributeSplit(ContinuousTrackerPtr ct,
00056                         float *firstIndex, float *firstThresh,
00057                         float *secondIndex, float *secondThresh);
00058 void ContinuousTrackerGiniAttributeSplit(ContinuousTrackerPtr ct,
00059                         float *firstIndex, float *firstThresh,
00060                         float *secondIndex, float *secondThresh);
00061 
00062 void ContinuousTrackerDisableWorseThanEntropy(ContinuousTrackerPtr ct, 
00063                                               float entropyThresh);
00064 void ContinuousTrackerDisableWorseThanGini(ContinuousTrackerPtr ct, 
00065                                               float giniThresh);
00066 int ContinuousTrackerPruneSplitsEntropy(ContinuousTrackerPtr ct,
00067                                int maxSplits, int pruneDownTo);
00068 
00070 typedef struct _ExampleGroupStats_ {
00071    ExampleSpecPtr spec;
00072    AttributeTrackerPtr attributeTracker;
00073 
00074    long examplesSeen;
00075 
00076    long *classTotals;
00077 
00078    /* VoidListPtr for each attribute:
00079         for discrete attributes another VoidListPtr for each value, and an
00080                             array of longs with the count for each class 
00081         for continuous attributes a continuous tracker structure
00082    */
00083    VoidAListPtr stats;
00084 
00085 } ExampleGroupStats, *ExampleGroupStatsPtr;
00086 
00087 
00100 ExampleGroupStatsPtr ExampleGroupStatsNew(ExampleSpecPtr es, 
00101                                        AttributeTrackerPtr at);
00102 
00104 void ExampleGroupStatsFree(ExampleGroupStatsPtr egs);
00105 
00115 void ExampleGroupStatsDeactivate(ExampleGroupStatsPtr egs);
00116 
00118 void ExampleGroupStatsReactivate(ExampleGroupStatsPtr egs);
00119 
00121 void ExampleGroupStatsAddExample(ExampleGroupStatsPtr egs, ExamplePtr e);
00122 
00124 void ExampleGroupStatsWrite(ExampleGroupStatsPtr egs, FILE *out);
00125 
00132 long ExampleGroupStatsNumExamplesSeen(ExampleGroupStatsPtr egs);
00133 
00134 
00136 AttributeTrackerPtr ExampleGroupStatsGetAttributeTracker(
00137                                       ExampleGroupStatsPtr egs);
00138 
00145 int ExampleGroupStatsIsAttributeActive(ExampleGroupStatsPtr egs, int num);
00146 
00154 void ExampleGroupStatsIgnoreAttribute(ExampleGroupStatsPtr egs, int num);
00155 
00156 
00165 int ExampleGroupStatsGetMostCommonClassLaplace(ExampleGroupStatsPtr egs, 
00166                                                int addClass, int addCount);
00168 int ExampleGroupStatsGetMostCommonClass(ExampleGroupStatsPtr egs);
00169 
00171 long ExampleGroupStatsGetMostCommonClassCount(ExampleGroupStatsPtr egs);
00172 
00173 
00178 int ExampleGroupStatsGetMostCommonClassForAttVal(ExampleGroupStatsPtr egs,
00179                                                 int att, int val);
00180 
00182 int ExampleGroupStatsIsPure(ExampleGroupStatsPtr egs);
00183 
00188 float ExampleGroupStatsGetValuePercent(ExampleGroupStatsPtr egs, 
00189                                        int attNum, int valNum);
00190 float ExampleGroupStatsGetValueGivenClassPercent(ExampleGroupStatsPtr egs, 
00191                                         int attNum, int valNum, int classNum);
00192 
00202 double ExampleGroupStatsGetValueGivenClassMEstimate(ExampleGroupStatsPtr egs, 
00203                                         int attNum, int valNum, int classNum);
00204 
00206 float ExampleGroupStatsGetClassPercent(ExampleGroupStatsPtr egs, int classNum);
00207 
00212 float ExampleGroupStatsGetPercentBelowThreshold(ExampleGroupStatsPtr egs,
00213                                      int attNum, float thresh);
00214 
00215 /* for naive bayes */
00216 
00226 double ExampleGroupStatsGetValueGivenClassMEstimateLogP(ExampleGroupStatsPtr 
00227                              egs, int attNum, int valNum, int classNum);
00228 
00230 double ExampleGroupStatsGetClassLogP(ExampleGroupStatsPtr egs, int classNum);
00231 
00232 
00234 float ExampleGroupStatsEntropyTotal(ExampleGroupStatsPtr egs);
00235 
00240 float ExampleGroupStatsEntropyDiscreteAttributeSplit(ExampleGroupStatsPtr egs,
00241                                                            int attNum);
00242 
00251 float ExampleGroupStatsEntropyPlusDiscreteAttributeSplit(ExampleGroupStatsPtr 
00252                                  egs, int attNum, float delta);
00253 
00254 
00263 float ExampleGroupStatsEntropyMinusDiscreteAttributeSplit(ExampleGroupStatsPtr 
00264                                  egs, int attNum, float delta);
00265 
00266 
00279 void ExampleGroupStatsEntropyContinuousAttributeSplit(ExampleGroupStatsPtr egs, 
00280                  int attNum, float *firstIndex, float *firstThresh,
00281                              float *secondIndex, float *secondThresh);
00282 
00284 float ExampleGroupStatsGiniTotal(ExampleGroupStatsPtr egs);
00285 
00291 float ExampleGroupStatsGiniDiscreteAttributeSplit(ExampleGroupStatsPtr egs, 
00292                                                       int attNum);
00293 
00304 void ExampleGroupStatsGiniContinuousAttributeSplit(ExampleGroupStatsPtr egs, 
00305                  int attNum, float *firstIndex, float *firstThresh,
00306                              float *secondIndex, float *secondThresh);
00307 
00319 void ExampleGroupStatsIgnoreSplitsWorseThanEntropy(ExampleGroupStatsPtr egs,
00320                                    int attNum, float entropyThresh);
00321 
00333 void ExampleGroupStatsIgnoreSplitsWorseThanGini(ExampleGroupStatsPtr egs,
00334                                                 int attNum, float giniThresh);
00335 
00336 
00351 int ExampleGroupStatsLimitSplitsEntropy(ExampleGroupStatsPtr egs, int attNum,
00352                                int maxSplits, int pruneDownTo);
00353 
00358 void ExampleGroupStatsStopAddingSplits(ExampleGroupStatsPtr egs, int attNum);
00359 
00364 int ExampleGroupStatsNumSplitThresholds(ExampleGroupStatsPtr egs, int attNum);
00365 
00370 int ExampleGroupStatsGetMostCommonClassAboveThreshold(ExampleGroupStatsPtr egs,
00371                       int attNum, float threshold);
00372 int ExampleGroupStatsGetMostCommonClassBelowThreshold(ExampleGroupStatsPtr egs,
00377                       int attNum, float threshold);
00378 
00379 /* undocumented, used by CVFDT  */
00380 void ExampleGroupStatsRemoveExample(ExampleGroupStatsPtr egs, ExamplePtr e);
00381 
00382 #endif /* EXAMPLEGROUPSTATSH */

Generated for VFML by doxygen hosted by SourceForge.net Logo