00001 #ifndef EXAMPLEGROUPSTATSH
00002 #define EXAMPLEGROUPSTATSH
00003
00004 #include "vfml.h"
00005
00021 typedef struct _Bin_ {
00022 float lowerBound, upperBound;
00023 float *classTotals;
00024 float exampleCount;
00025 int boundryClass;
00026 long boundryCount;
00027 } Bin, *BinPtr;
00028
00029 BinPtr BinNew(ExampleSpecPtr spec);
00030 void BinFree(BinPtr bin);
00031 void BinWrite(BinPtr bin, FILE *out);
00032
00033 typedef struct _ContinuousTracker_ {
00034 ExampleSpecPtr spec;
00035 float *classTotals;
00036 long exampleCount;
00037 VoidListPtr bins;
00038 float min, max;
00039 int initMinMax;
00040 int timesPrunedCount;
00041 int addingNewBins;
00042 } ContinuousTracker, *ContinuousTrackerPtr;
00043
00044 ContinuousTrackerPtr ContinuousTrackerNew(ExampleSpecPtr spec);
00045 void ContinuousTrackerFree(ContinuousTrackerPtr ct);
00046 void ContinuousTrackerAddExample(ContinuousTrackerPtr ct,
00047 float value, int theclass);
00048 float ContinuousTrackerGetPercentBelowThreshold(ContinuousTrackerPtr ct,
00049 float thresh);
00050
00051 int ContinuousTrackerNumSplitThresholds(ContinuousTrackerPtr ct);
00052 int ContinuousTrackerGetMostCommonClassInPartition(ContinuousTrackerPtr ct,
00053 float threshold, int above);
00054
00055 void ContinuousTrackerEntropyAttributeSplit(ContinuousTrackerPtr ct,
00056 float *firstIndex, float *firstThresh,
00057 float *secondIndex, float *secondThresh);
00058 void ContinuousTrackerGiniAttributeSplit(ContinuousTrackerPtr ct,
00059 float *firstIndex, float *firstThresh,
00060 float *secondIndex, float *secondThresh);
00061
00062 void ContinuousTrackerDisableWorseThanEntropy(ContinuousTrackerPtr ct,
00063 float entropyThresh);
00064 void ContinuousTrackerDisableWorseThanGini(ContinuousTrackerPtr ct,
00065 float giniThresh);
00066 int ContinuousTrackerPruneSplitsEntropy(ContinuousTrackerPtr ct,
00067 int maxSplits, int pruneDownTo);
00068
00070 typedef struct _ExampleGroupStats_ {
00071 ExampleSpecPtr spec;
00072 AttributeTrackerPtr attributeTracker;
00073
00074 long examplesSeen;
00075
00076 long *classTotals;
00077
00078
00079
00080
00081
00082
00083 VoidAListPtr stats;
00084
00085 } ExampleGroupStats, *ExampleGroupStatsPtr;
00086
00087
00100 ExampleGroupStatsPtr ExampleGroupStatsNew(ExampleSpecPtr es,
00101 AttributeTrackerPtr at);
00102
00104 void ExampleGroupStatsFree(ExampleGroupStatsPtr egs);
00105
00115 void ExampleGroupStatsDeactivate(ExampleGroupStatsPtr egs);
00116
00118 void ExampleGroupStatsReactivate(ExampleGroupStatsPtr egs);
00119
00121 void ExampleGroupStatsAddExample(ExampleGroupStatsPtr egs, ExamplePtr e);
00122
00124 void ExampleGroupStatsWrite(ExampleGroupStatsPtr egs, FILE *out);
00125
00132 long ExampleGroupStatsNumExamplesSeen(ExampleGroupStatsPtr egs);
00133
00134
00136 AttributeTrackerPtr ExampleGroupStatsGetAttributeTracker(
00137 ExampleGroupStatsPtr egs);
00138
00145 int ExampleGroupStatsIsAttributeActive(ExampleGroupStatsPtr egs, int num);
00146
00154 void ExampleGroupStatsIgnoreAttribute(ExampleGroupStatsPtr egs, int num);
00155
00156
00165 int ExampleGroupStatsGetMostCommonClassLaplace(ExampleGroupStatsPtr egs,
00166 int addClass, int addCount);
00168 int ExampleGroupStatsGetMostCommonClass(ExampleGroupStatsPtr egs);
00169
00171 long ExampleGroupStatsGetMostCommonClassCount(ExampleGroupStatsPtr egs);
00172
00173
00178 int ExampleGroupStatsGetMostCommonClassForAttVal(ExampleGroupStatsPtr egs,
00179 int att, int val);
00180
00182 int ExampleGroupStatsIsPure(ExampleGroupStatsPtr egs);
00183
00188 float ExampleGroupStatsGetValuePercent(ExampleGroupStatsPtr egs,
00189 int attNum, int valNum);
00190 float ExampleGroupStatsGetValueGivenClassPercent(ExampleGroupStatsPtr egs,
00191 int attNum, int valNum, int classNum);
00192
00202 double ExampleGroupStatsGetValueGivenClassMEstimate(ExampleGroupStatsPtr egs,
00203 int attNum, int valNum, int classNum);
00204
00206 float ExampleGroupStatsGetClassPercent(ExampleGroupStatsPtr egs, int classNum);
00207
00212 float ExampleGroupStatsGetPercentBelowThreshold(ExampleGroupStatsPtr egs,
00213 int attNum, float thresh);
00214
00215
00216
00226 double ExampleGroupStatsGetValueGivenClassMEstimateLogP(ExampleGroupStatsPtr
00227 egs, int attNum, int valNum, int classNum);
00228
00230 double ExampleGroupStatsGetClassLogP(ExampleGroupStatsPtr egs, int classNum);
00231
00232
00234 float ExampleGroupStatsEntropyTotal(ExampleGroupStatsPtr egs);
00235
00240 float ExampleGroupStatsEntropyDiscreteAttributeSplit(ExampleGroupStatsPtr egs,
00241 int attNum);
00242
00251 float ExampleGroupStatsEntropyPlusDiscreteAttributeSplit(ExampleGroupStatsPtr
00252 egs, int attNum, float delta);
00253
00254
00263 float ExampleGroupStatsEntropyMinusDiscreteAttributeSplit(ExampleGroupStatsPtr
00264 egs, int attNum, float delta);
00265
00266
00279 void ExampleGroupStatsEntropyContinuousAttributeSplit(ExampleGroupStatsPtr egs,
00280 int attNum, float *firstIndex, float *firstThresh,
00281 float *secondIndex, float *secondThresh);
00282
00284 float ExampleGroupStatsGiniTotal(ExampleGroupStatsPtr egs);
00285
00291 float ExampleGroupStatsGiniDiscreteAttributeSplit(ExampleGroupStatsPtr egs,
00292 int attNum);
00293
00304 void ExampleGroupStatsGiniContinuousAttributeSplit(ExampleGroupStatsPtr egs,
00305 int attNum, float *firstIndex, float *firstThresh,
00306 float *secondIndex, float *secondThresh);
00307
00319 void ExampleGroupStatsIgnoreSplitsWorseThanEntropy(ExampleGroupStatsPtr egs,
00320 int attNum, float entropyThresh);
00321
00333 void ExampleGroupStatsIgnoreSplitsWorseThanGini(ExampleGroupStatsPtr egs,
00334 int attNum, float giniThresh);
00335
00336
00351 int ExampleGroupStatsLimitSplitsEntropy(ExampleGroupStatsPtr egs, int attNum,
00352 int maxSplits, int pruneDownTo);
00353
00358 void ExampleGroupStatsStopAddingSplits(ExampleGroupStatsPtr egs, int attNum);
00359
00364 int ExampleGroupStatsNumSplitThresholds(ExampleGroupStatsPtr egs, int attNum);
00365
00370 int ExampleGroupStatsGetMostCommonClassAboveThreshold(ExampleGroupStatsPtr egs,
00371 int attNum, float threshold);
00372 int ExampleGroupStatsGetMostCommonClassBelowThreshold(ExampleGroupStatsPtr egs,
00377 int attNum, float threshold);
00378
00379
00380 void ExampleGroupStatsRemoveExample(ExampleGroupStatsPtr egs, ExamplePtr e);
00381
00382 #endif