00001 //---------------------------------------------------------------------- 00002 // File: ANNperf.h 00003 // Programmer: Sunil Arya and David Mount 00004 // Last modified: 03/04/98 (Release 0.1) 00005 // Description: Include file for ANN performance stats 00006 // 00007 // Some of the code for statistics gathering has been adapted 00008 // from the SmplStat.h package in the g++ library. 00009 //---------------------------------------------------------------------- 00010 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and 00011 // David Mount. All Rights Reserved. 00012 // 00013 // This software and related documentation is part of the Approximate 00014 // Nearest Neighbor Library (ANN). This software is provided under 00015 // the provisions of the Lesser GNU Public License (LGPL). See the 00016 // file ../ReadMe.txt for further information. 00017 // 00018 // The University of Maryland (U.M.) and the authors make no 00019 // representations about the suitability or fitness of this software for 00020 // any purpose. It is provided "as is" without express or implied 00021 // warranty. 00022 //---------------------------------------------------------------------- 00023 // History: 00024 // Revision 0.1 03/04/98 00025 // Initial release 00026 // Revision 1.0 04/01/05 00027 // Added ANN_ prefix to avoid name conflicts. 00028 //---------------------------------------------------------------------- 00029 00030 #ifndef ANNperf_H 00031 #define ANNperf_H 00032 00033 //---------------------------------------------------------------------- 00034 // basic includes 00035 //---------------------------------------------------------------------- 00036 00037 #include <ANN/ANN.h> // basic ANN includes 00038 00039 //---------------------------------------------------------------------- 00040 // kd-tree stats object 00041 // This object is used for collecting information about a kd-tree 00042 // or bd-tree. 00043 //---------------------------------------------------------------------- 00044 00045 class ANNkdStats { // stats on kd-tree 00046 public: 00047 int dim; // dimension of space 00048 int n_pts; // no. of points 00049 int bkt_size; // bucket size 00050 int n_lf; // no. of leaves (including trivial) 00051 int n_tl; // no. of trivial leaves (no points) 00052 int n_spl; // no. of splitting nodes 00053 int n_shr; // no. of shrinking nodes (for bd-trees) 00054 int depth; // depth of tree 00055 float sum_ar; // sum of leaf aspect ratios 00056 float avg_ar; // average leaf aspect ratio 00057 // 00058 // reset stats 00059 void reset(int d=0, int n=0, int bs=0) 00060 { 00061 dim = d; n_pts = n; bkt_size = bs; 00062 n_lf = n_tl = n_spl = n_shr = depth = 0; 00063 sum_ar = avg_ar = 0.0; 00064 } 00065 00066 ANNkdStats() // basic constructor 00067 { reset(); } 00068 00069 void merge(const ANNkdStats &st); // merge stats from child 00070 }; 00071 00072 //---------------------------------------------------------------------- 00073 // ANNsampStat 00074 // A sample stat collects numeric (double) samples and returns some 00075 // simple statistics. Its main functions are: 00076 // 00077 // reset() Reset to no samples. 00078 // += x Include sample x. 00079 // samples() Return number of samples. 00080 // mean() Return mean of samples. 00081 // stdDev() Return standard deviation 00082 // min() Return minimum of samples. 00083 // max() Return maximum of samples. 00084 //---------------------------------------------------------------------- 00085 class DLL_API ANNsampStat { 00086 int n; // number of samples 00087 double sum; // sum 00088 double sum2; // sum of squares 00089 double minVal, maxVal; // min and max 00090 public : 00091 void reset() // reset everything 00092 { 00093 n = 0; 00094 sum = sum2 = 0; 00095 minVal = ANN_DBL_MAX; 00096 maxVal = -ANN_DBL_MAX; 00097 } 00098 00099 ANNsampStat() { reset(); } // constructor 00100 00101 void operator+=(double x) // add sample 00102 { 00103 n++; sum += x; sum2 += x*x; 00104 if (x < minVal) minVal = x; 00105 if (x > maxVal) maxVal = x; 00106 } 00107 00108 int samples() { return n; } // number of samples 00109 00110 double mean() { return sum/n; } // mean 00111 00112 // standard deviation 00113 double stdDev() { return sqrt((sum2 - (sum*sum)/n)/(n-1));} 00114 00115 double min() { return minVal; } // minimum 00116 double max() { return maxVal; } // maximum 00117 }; 00118 00119 //---------------------------------------------------------------------- 00120 // Operation count updates 00121 //---------------------------------------------------------------------- 00122 00123 #ifdef ANN_PERF 00124 #define ANN_FLOP(n) {ann_Nfloat_ops += (n);} 00125 #define ANN_LEAF(n) {ann_Nvisit_lfs += (n);} 00126 #define ANN_SPL(n) {ann_Nvisit_spl += (n);} 00127 #define ANN_SHR(n) {ann_Nvisit_shr += (n);} 00128 #define ANN_PTS(n) {ann_Nvisit_pts += (n);} 00129 #define ANN_COORD(n) {ann_Ncoord_hts += (n);} 00130 #else 00131 #define ANN_FLOP(n) 00132 #define ANN_LEAF(n) 00133 #define ANN_SPL(n) 00134 #define ANN_SHR(n) 00135 #define ANN_PTS(n) 00136 #define ANN_COORD(n) 00137 #endif 00138 00139 //---------------------------------------------------------------------- 00140 // Performance statistics 00141 // The following data and routines are used for computing performance 00142 // statistics for nearest neighbor searching. Because these routines 00143 // can slow the code down, they can be activated and deactiviated by 00144 // defining the ANN_PERF variable, by compiling with the option: 00145 // -DANN_PERF 00146 //---------------------------------------------------------------------- 00147 00148 //---------------------------------------------------------------------- 00149 // Global counters for performance measurement 00150 // 00151 // visit_lfs The number of leaf nodes visited in the 00152 // tree. 00153 // 00154 // visit_spl The number of splitting nodes visited in the 00155 // tree. 00156 // 00157 // visit_shr The number of shrinking nodes visited in the 00158 // tree. 00159 // 00160 // visit_pts The number of points visited in all the 00161 // leaf nodes visited. Equivalently, this 00162 // is the number of points for which distance 00163 // calculations are performed. 00164 // 00165 // coord_hts The number of times a coordinate of a 00166 // data point is accessed. This is generally 00167 // less than visit_pts*d if partial distance 00168 // calculation is used. This count is low 00169 // in the sense that if a coordinate is hit 00170 // many times in the same routine we may 00171 // count it only once. 00172 // 00173 // float_ops The number of floating point operations. 00174 // This includes all operations in the heap 00175 // as well as distance calculations to boxes. 00176 // 00177 // average_err The average error of each query (the 00178 // error of the reported point to the true 00179 // nearest neighbor). For k nearest neighbors 00180 // the error is computed k times. 00181 // 00182 // rank_err The rank error of each query (the difference 00183 // in the rank of the reported point and its 00184 // true rank). 00185 // 00186 // data_pts The number of data points. This is not 00187 // a counter, but used in stats computation. 00188 //---------------------------------------------------------------------- 00189 00190 extern int ann_Ndata_pts; // number of data points 00191 extern int ann_Nvisit_lfs; // number of leaf nodes visited 00192 extern int ann_Nvisit_spl; // number of splitting nodes visited 00193 extern int ann_Nvisit_shr; // number of shrinking nodes visited 00194 extern int ann_Nvisit_pts; // visited points for one query 00195 extern int ann_Ncoord_hts; // coordinate hits for one query 00196 extern int ann_Nfloat_ops; // floating ops for one query 00197 extern ANNsampStat ann_visit_lfs; // stats on leaf nodes visits 00198 extern ANNsampStat ann_visit_spl; // stats on splitting nodes visits 00199 extern ANNsampStat ann_visit_shr; // stats on shrinking nodes visits 00200 extern ANNsampStat ann_visit_nds; // stats on total nodes visits 00201 extern ANNsampStat ann_visit_pts; // stats on points visited 00202 extern ANNsampStat ann_coord_hts; // stats on coordinate hits 00203 extern ANNsampStat ann_float_ops; // stats on floating ops 00204 //---------------------------------------------------------------------- 00205 // The following need to be part of the public interface, because 00206 // they are accessed outside the DLL in ann_test.cpp. 00207 //---------------------------------------------------------------------- 00208 DLL_API extern ANNsampStat ann_average_err; // average error 00209 DLL_API extern ANNsampStat ann_rank_err; // rank error 00210 00211 //---------------------------------------------------------------------- 00212 // Declaration of externally accessible routines for statistics 00213 //---------------------------------------------------------------------- 00214 00215 DLL_API void annResetStats(int data_size); // reset stats for a set of queries 00216 00217 DLL_API void annResetCounts(); // reset counts for one queries 00218 00219 DLL_API void annUpdateStats(); // update stats with current counts 00220 00221 DLL_API void annPrintStats(ANNbool validate); // print statistics for a run 00222 00223 #endif