00001 //---------------------------------------------------------------------- 00002 // File: kd_tree.cpp 00003 // Programmer: Sunil Arya and David Mount 00004 // Description: Basic methods for kd-trees. 00005 // Last modified: 01/04/05 (Version 1.0) 00006 //---------------------------------------------------------------------- 00007 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and 00008 // David Mount. All Rights Reserved. 00009 // 00010 // This software and related documentation is part of the Approximate 00011 // Nearest Neighbor Library (ANN). This software is provided under 00012 // the provisions of the Lesser GNU Public License (LGPL). See the 00013 // file ../ReadMe.txt for further information. 00014 // 00015 // The University of Maryland (U.M.) and the authors make no 00016 // representations about the suitability or fitness of this software for 00017 // any purpose. It is provided "as is" without express or implied 00018 // warranty. 00019 //---------------------------------------------------------------------- 00020 // History: 00021 // Revision 0.1 03/04/98 00022 // Initial release 00023 // Revision 1.0 04/01/05 00024 // Increased aspect ratio bound (ANN_AR_TOOBIG) from 100 to 1000. 00025 // Fixed leaf counts to count trivial leaves. 00026 // Added optional pa, pi arguments to Skeleton kd_tree constructor 00027 // for use in load constructor. 00028 // Added annClose() to eliminate KD_TRIVIAL memory leak. 00029 //---------------------------------------------------------------------- 00030 00031 #include "kd_tree.h" // kd-tree declarations 00032 #include "kd_split.h" // kd-tree splitting rules 00033 #include "kd_util.h" // kd-tree utilities 00034 #include <ANN/ANNperf.h> // performance evaluation 00035 00036 //---------------------------------------------------------------------- 00037 // Global data 00038 // 00039 // For some splitting rules, especially with small bucket sizes, 00040 // it is possible to generate a large number of empty leaf nodes. 00041 // To save storage we allocate a single trivial leaf node which 00042 // contains no points. For messy coding reasons it is convenient 00043 // to have it reference a trivial point index. 00044 // 00045 // KD_TRIVIAL is allocated when the first kd-tree is created. It 00046 // must *never* deallocated (since it may be shared by more than 00047 // one tree). 00048 //---------------------------------------------------------------------- 00049 static int IDX_TRIVIAL[] = {0}; // trivial point index 00050 ANNkd_leaf *KD_TRIVIAL = NULL; // trivial leaf node 00051 00052 //---------------------------------------------------------------------- 00053 // Printing the kd-tree 00054 // These routines print a kd-tree in reverse inorder (high then 00055 // root then low). (This is so that if you look at the output 00056 // from the right side it appear from left to right in standard 00057 // inorder.) When outputting leaves we output only the point 00058 // indices rather than the point coordinates. There is an option 00059 // to print the point coordinates separately. 00060 // 00061 // The tree printing routine calls the printing routines on the 00062 // individual nodes of the tree, passing in the level or depth 00063 // in the tree. The level in the tree is used to print indentation 00064 // for readability. 00065 //---------------------------------------------------------------------- 00066 00067 void ANNkd_split::print( // print splitting node 00068 int level, // depth of node in tree 00069 ostream &out) // output stream 00070 { 00071 child[ANN_HI]->print(level+1, out); // print high child 00072 out << " "; 00073 for (int i = 0; i < level; i++) // print indentation 00074 out << ".."; 00075 out << "Split cd=" << cut_dim << " cv=" << cut_val; 00076 out << " lbnd=" << cd_bnds[ANN_LO]; 00077 out << " hbnd=" << cd_bnds[ANN_HI]; 00078 out << "\n"; 00079 child[ANN_LO]->print(level+1, out); // print low child 00080 } 00081 00082 void ANNkd_leaf::print( // print leaf node 00083 int level, // depth of node in tree 00084 ostream &out) // output stream 00085 { 00086 00087 out << " "; 00088 for (int i = 0; i < level; i++) // print indentation 00089 out << ".."; 00090 00091 if (this == KD_TRIVIAL) { // canonical trivial leaf node 00092 out << "Leaf (trivial)\n"; 00093 } 00094 else{ 00095 out << "Leaf n=" << n_pts << " <"; 00096 for (int j = 0; j < n_pts; j++) { 00097 out << bkt[j]; 00098 if (j < n_pts-1) out << ","; 00099 } 00100 out << ">\n"; 00101 } 00102 } 00103 00104 void ANNkd_tree::Print( // print entire tree 00105 ANNbool with_pts, // print points as well? 00106 ostream &out) // output stream 00107 { 00108 out << "ANN Version " << ANNversion << "\n"; 00109 if (with_pts) { // print point coordinates 00110 out << " Points:\n"; 00111 for (int i = 0; i < n_pts; i++) { 00112 out << "\t" << i << ": "; 00113 annPrintPt(pts[i], dim, out); 00114 out << "\n"; 00115 } 00116 } 00117 if (root == NULL) // empty tree? 00118 out << " Null tree.\n"; 00119 else { 00120 root->print(0, out); // invoke printing at root 00121 } 00122 } 00123 00124 //---------------------------------------------------------------------- 00125 // kd_tree statistics (for performance evaluation) 00126 // This routine compute various statistics information for 00127 // a kd-tree. It is used by the implementors for performance 00128 // evaluation of the data structure. 00129 //---------------------------------------------------------------------- 00130 00131 #define MAX(a,b) ((a) > (b) ? (a) : (b)) 00132 00133 void ANNkdStats::merge(const ANNkdStats &st) // merge stats from child 00134 { 00135 n_lf += st.n_lf; n_tl += st.n_tl; 00136 n_spl += st.n_spl; n_shr += st.n_shr; 00137 depth = MAX(depth, st.depth); 00138 sum_ar += st.sum_ar; 00139 } 00140 00141 //---------------------------------------------------------------------- 00142 // Update statistics for nodes 00143 //---------------------------------------------------------------------- 00144 00145 const double ANN_AR_TOOBIG = 1000; // too big an aspect ratio 00146 00147 void ANNkd_leaf::getStats( // get subtree statistics 00148 int dim, // dimension of space 00149 ANNkdStats &st, // stats (modified) 00150 ANNorthRect &bnd_box) // bounding box 00151 { 00152 st.reset(); 00153 st.n_lf = 1; // count this leaf 00154 if (this == KD_TRIVIAL) st.n_tl = 1; // count trivial leaf 00155 double ar = annAspectRatio(dim, bnd_box); // aspect ratio of leaf 00156 // incr sum (ignore outliers) 00157 st.sum_ar += float(ar < ANN_AR_TOOBIG ? ar : ANN_AR_TOOBIG); 00158 } 00159 00160 void ANNkd_split::getStats( // get subtree statistics 00161 int dim, // dimension of space 00162 ANNkdStats &st, // stats (modified) 00163 ANNorthRect &bnd_box) // bounding box 00164 { 00165 ANNkdStats ch_stats; // stats for children 00166 // get stats for low child 00167 ANNcoord hv = bnd_box.hi[cut_dim]; // save box bounds 00168 bnd_box.hi[cut_dim] = cut_val; // upper bound for low child 00169 ch_stats.reset(); // reset 00170 child[ANN_LO]->getStats(dim, ch_stats, bnd_box); 00171 st.merge(ch_stats); // merge them 00172 bnd_box.hi[cut_dim] = hv; // restore bound 00173 // get stats for high child 00174 ANNcoord lv = bnd_box.lo[cut_dim]; // save box bounds 00175 bnd_box.lo[cut_dim] = cut_val; // lower bound for high child 00176 ch_stats.reset(); // reset 00177 child[ANN_HI]->getStats(dim, ch_stats, bnd_box); 00178 st.merge(ch_stats); // merge them 00179 bnd_box.lo[cut_dim] = lv; // restore bound 00180 00181 st.depth++; // increment depth 00182 st.n_spl++; // increment number of splits 00183 } 00184 00185 //---------------------------------------------------------------------- 00186 // getStats 00187 // Collects a number of statistics related to kd_tree or 00188 // bd_tree. 00189 //---------------------------------------------------------------------- 00190 00191 void ANNkd_tree::getStats( // get tree statistics 00192 ANNkdStats &st) // stats (modified) 00193 { 00194 st.reset(dim, n_pts, bkt_size); // reset stats 00195 // create bounding box 00196 ANNorthRect bnd_box(dim, bnd_box_lo, bnd_box_hi); 00197 if (root != NULL) { // if nonempty tree 00198 root->getStats(dim, st, bnd_box); // get statistics 00199 st.avg_ar = st.sum_ar / st.n_lf; // average leaf asp ratio 00200 } 00201 } 00202 00203 //---------------------------------------------------------------------- 00204 // kd_tree destructor 00205 // The destructor just frees the various elements that were 00206 // allocated in the construction process. 00207 //---------------------------------------------------------------------- 00208 00209 ANNkd_tree::~ANNkd_tree() // tree destructor 00210 { 00211 if (root != NULL) delete root; 00212 if (pidx != NULL) delete [] pidx; 00213 if (bnd_box_lo != NULL) annDeallocPt(bnd_box_lo); 00214 if (bnd_box_hi != NULL) annDeallocPt(bnd_box_hi); 00215 } 00216 00217 //---------------------------------------------------------------------- 00218 // This is called with all use of ANN is finished. It eliminates the 00219 // minor memory leak caused by the allocation of KD_TRIVIAL. 00220 //---------------------------------------------------------------------- 00221 void annClose() // close use of ANN 00222 { 00223 if (KD_TRIVIAL != NULL) { 00224 delete KD_TRIVIAL; 00225 KD_TRIVIAL = NULL; 00226 } 00227 } 00228 00229 //---------------------------------------------------------------------- 00230 // kd_tree constructors 00231 // There is a skeleton kd-tree constructor which sets up a 00232 // trivial empty tree. The last optional argument allows 00233 // the routine to be passed a point index array which is 00234 // assumed to be of the proper size (n). Otherwise, one is 00235 // allocated and initialized to the identity. Warning: In 00236 // either case the destructor will deallocate this array. 00237 // 00238 // As a kludge, we need to allocate KD_TRIVIAL if one has not 00239 // already been allocated. (This is because I'm too dumb to 00240 // figure out how to cause a pointer to be allocated at load 00241 // time.) 00242 //---------------------------------------------------------------------- 00243 00244 void ANNkd_tree::SkeletonTree( // construct skeleton tree 00245 int n, // number of points 00246 int dd, // dimension 00247 int bs, // bucket size 00248 ANNpointArray pa, // point array 00249 ANNidxArray pi) // point indices 00250 { 00251 dim = dd; // initialize basic elements 00252 n_pts = n; 00253 bkt_size = bs; 00254 pts = pa; // initialize points array 00255 00256 root = NULL; // no associated tree yet 00257 00258 if (pi == NULL) { // point indices provided? 00259 pidx = new ANNidx[n]; // no, allocate space for point indices 00260 for (int i = 0; i < n; i++) { 00261 pidx[i] = i; // initially identity 00262 } 00263 } 00264 else { 00265 pidx = pi; // yes, use them 00266 } 00267 00268 bnd_box_lo = bnd_box_hi = NULL; // bounding box is nonexistent 00269 if (KD_TRIVIAL == NULL) // no trivial leaf node yet? 00270 KD_TRIVIAL = new ANNkd_leaf(0, IDX_TRIVIAL); // allocate it 00271 } 00272 00273 ANNkd_tree::ANNkd_tree( // basic constructor 00274 int n, // number of points 00275 int dd, // dimension 00276 int bs) // bucket size 00277 { SkeletonTree(n, dd, bs); } // construct skeleton tree 00278 00279 //---------------------------------------------------------------------- 00280 // rkd_tree - recursive procedure to build a kd-tree 00281 // 00282 // Builds a kd-tree for points in pa as indexed through the 00283 // array pidx[0..n-1] (typically a subarray of the array used in 00284 // the top-level call). This routine permutes the array pidx, 00285 // but does not alter pa[]. 00286 // 00287 // The construction is based on a standard algorithm for constructing 00288 // the kd-tree (see Friedman, Bentley, and Finkel, ``An algorithm for 00289 // finding best matches in logarithmic expected time,'' ACM Transactions 00290 // on Mathematical Software, 3(3):209-226, 1977). The procedure 00291 // operates by a simple divide-and-conquer strategy, which determines 00292 // an appropriate orthogonal cutting plane (see below), and splits 00293 // the points. When the number of points falls below the bucket size, 00294 // we simply store the points in a leaf node's bucket. 00295 // 00296 // One of the arguments is a pointer to a splitting routine, 00297 // whose prototype is: 00298 // 00299 // void split( 00300 // ANNpointArray pa, // complete point array 00301 // ANNidxArray pidx, // point array (permuted on return) 00302 // ANNorthRect &bnds, // bounds of current cell 00303 // int n, // number of points 00304 // int dim, // dimension of space 00305 // int &cut_dim, // cutting dimension 00306 // ANNcoord &cut_val, // cutting value 00307 // int &n_lo) // no. of points on low side of cut 00308 // 00309 // This procedure selects a cutting dimension and cutting value, 00310 // partitions pa about these values, and returns the number of 00311 // points on the low side of the cut. 00312 //---------------------------------------------------------------------- 00313 00314 ANNkd_ptr rkd_tree( // recursive construction of kd-tree 00315 ANNpointArray pa, // point array 00316 ANNidxArray pidx, // point indices to store in subtree 00317 int n, // number of points 00318 int dim, // dimension of space 00319 int bsp, // bucket space 00320 ANNorthRect &bnd_box, // bounding box for current node 00321 ANNkd_splitter splitter) // splitting routine 00322 { 00323 if (n <= bsp) { // n small, make a leaf node 00324 if (n == 0) // empty leaf node 00325 return KD_TRIVIAL; // return (canonical) empty leaf 00326 else // construct the node and return 00327 return new ANNkd_leaf(n, pidx); 00328 } 00329 else { // n large, make a splitting node 00330 int cd; // cutting dimension 00331 ANNcoord cv; // cutting value 00332 int n_lo; // number on low side of cut 00333 ANNkd_node *lo, *hi; // low and high children 00334 00335 // invoke splitting procedure 00336 (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo); 00337 00338 ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension 00339 ANNcoord hv = bnd_box.hi[cd]; 00340 00341 bnd_box.hi[cd] = cv; // modify bounds for left subtree 00342 lo = rkd_tree( // build left subtree 00343 pa, pidx, n_lo, // ...from pidx[0..n_lo-1] 00344 dim, bsp, bnd_box, splitter); 00345 bnd_box.hi[cd] = hv; // restore bounds 00346 00347 bnd_box.lo[cd] = cv; // modify bounds for right subtree 00348 hi = rkd_tree( // build right subtree 00349 pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1] 00350 dim, bsp, bnd_box, splitter); 00351 bnd_box.lo[cd] = lv; // restore bounds 00352 00353 // create the splitting node 00354 ANNkd_split *ptr = new ANNkd_split(cd, cv, lv, hv, lo, hi); 00355 00356 return ptr; // return pointer to this node 00357 } 00358 } 00359 00360 //---------------------------------------------------------------------- 00361 // kd-tree constructor 00362 // This is the main constructor for kd-trees given a set of points. 00363 // It first builds a skeleton tree, then computes the bounding box 00364 // of the data points, and then invokes rkd_tree() to actually 00365 // build the tree, passing it the appropriate splitting routine. 00366 //---------------------------------------------------------------------- 00367 00368 ANNkd_tree::ANNkd_tree( // construct from point array 00369 ANNpointArray pa, // point array (with at least n pts) 00370 int n, // number of points 00371 int dd, // dimension 00372 int bs, // bucket size 00373 ANNsplitRule split) // splitting method 00374 { 00375 SkeletonTree(n, dd, bs); // set up the basic stuff 00376 pts = pa; // where the points are 00377 if (n == 0) return; // no points--no sweat 00378 00379 ANNorthRect bnd_box(dd); // bounding box for points 00380 annEnclRect(pa, pidx, n, dd, bnd_box);// construct bounding rectangle 00381 // copy to tree structure 00382 bnd_box_lo = annCopyPt(dd, bnd_box.lo); 00383 bnd_box_hi = annCopyPt(dd, bnd_box.hi); 00384 00385 switch (split) { // build by rule 00386 case ANN_KD_STD: // standard kd-splitting rule 00387 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split); 00388 break; 00389 case ANN_KD_MIDPT: // midpoint split 00390 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split); 00391 break; 00392 case ANN_KD_FAIR: // fair split 00393 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split); 00394 break; 00395 case ANN_KD_SUGGEST: // best (in our opinion) 00396 case ANN_KD_SL_MIDPT: // sliding midpoint split 00397 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split); 00398 break; 00399 case ANN_KD_SL_FAIR: // sliding fair split 00400 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_fair_split); 00401 break; 00402 default: 00403 annError("Illegal splitting method", ANNabort); 00404 } 00405 }