00001 //---------------------------------------------------------------------- 00002 // File: bd_tree.cpp 00003 // Programmer: David Mount 00004 // Description: Basic methods for bd-trees. 00005 // Last modified: 01/04/05 (Version 1.0) 00006 //---------------------------------------------------------------------- 00007 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and 00008 // David Mount. All Rights Reserved. 00009 // 00010 // This software and related documentation is part of the Approximate 00011 // Nearest Neighbor Library (ANN). This software is provided under 00012 // the provisions of the Lesser GNU Public License (LGPL). See the 00013 // file ../ReadMe.txt for further information. 00014 // 00015 // The University of Maryland (U.M.) and the authors make no 00016 // representations about the suitability or fitness of this software for 00017 // any purpose. It is provided "as is" without express or implied 00018 // warranty. 00019 //---------------------------------------------------------------------- 00020 // History: 00021 // Revision 0.1 03/04/98 00022 // Initial release 00023 // Revision l.0 04/01/05 00024 // Fixed centroid shrink threshold condition to depend on the 00025 // dimension. 00026 // Moved dump routine to kd_dump.cpp. 00027 //---------------------------------------------------------------------- 00028 00029 #include "bd_tree.h" // bd-tree declarations 00030 #include "kd_util.h" // kd-tree utilities 00031 #include "kd_split.h" // kd-tree splitting rules 00032 00033 #include <ANN/ANNperf.h> // performance evaluation 00034 00035 //---------------------------------------------------------------------- 00036 // Printing a bd-tree 00037 // These routines print a bd-tree. See the analogous procedure 00038 // in kd_tree.cpp for more information. 00039 //---------------------------------------------------------------------- 00040 00041 void ANNbd_shrink::print( // print shrinking node 00042 int level, // depth of node in tree 00043 ostream &out) // output stream 00044 { 00045 child[ANN_OUT]->print(level+1, out); // print out-child 00046 00047 out << " "; 00048 for (int i = 0; i < level; i++) // print indentation 00049 out << ".."; 00050 out << "Shrink"; 00051 for (int j = 0; j < n_bnds; j++) { // print sides, 2 per line 00052 if (j % 2 == 0) { 00053 out << "\n"; // newline and indentation 00054 for (int i = 0; i < level+2; i++) out << " "; 00055 } 00056 out << " ([" << bnds[j].cd << "]" 00057 << (bnds[j].sd > 0 ? ">=" : "< ") 00058 << bnds[j].cv << ")"; 00059 } 00060 out << "\n"; 00061 00062 child[ANN_IN]->print(level+1, out); // print in-child 00063 } 00064 00065 //---------------------------------------------------------------------- 00066 // kd_tree statistics utility (for performance evaluation) 00067 // This routine computes various statistics information for 00068 // shrinking nodes. See file kd_tree.cpp for more information. 00069 //---------------------------------------------------------------------- 00070 00071 void ANNbd_shrink::getStats( // get subtree statistics 00072 int dim, // dimension of space 00073 ANNkdStats &st, // stats (modified) 00074 ANNorthRect &bnd_box) // bounding box 00075 { 00076 ANNkdStats ch_stats; // stats for children 00077 ANNorthRect inner_box(dim); // inner box of shrink 00078 00079 annBnds2Box(bnd_box, // enclosing box 00080 dim, // dimension 00081 n_bnds, // number of bounds 00082 bnds, // bounds array 00083 inner_box); // inner box (modified) 00084 // get stats for inner child 00085 ch_stats.reset(); // reset 00086 child[ANN_IN]->getStats(dim, ch_stats, inner_box); 00087 st.merge(ch_stats); // merge them 00088 // get stats for outer child 00089 ch_stats.reset(); // reset 00090 child[ANN_OUT]->getStats(dim, ch_stats, bnd_box); 00091 st.merge(ch_stats); // merge them 00092 00093 st.depth++; // increment depth 00094 st.n_shr++; // increment number of shrinks 00095 } 00096 00097 //---------------------------------------------------------------------- 00098 // bd-tree constructor 00099 // This is the main constructor for bd-trees given a set of points. 00100 // It first builds a skeleton kd-tree as a basis, then computes the 00101 // bounding box of the data points, and then invokes rbd_tree() to 00102 // actually build the tree, passing it the appropriate splitting 00103 // and shrinking information. 00104 //---------------------------------------------------------------------- 00105 00106 ANNkd_ptr rbd_tree( // recursive construction of bd-tree 00107 ANNpointArray pa, // point array 00108 ANNidxArray pidx, // point indices to store in subtree 00109 int n, // number of points 00110 int dim, // dimension of space 00111 int bsp, // bucket space 00112 ANNorthRect &bnd_box, // bounding box for current node 00113 ANNkd_splitter splitter, // splitting routine 00114 ANNshrinkRule shrink); // shrinking rule 00115 00116 ANNbd_tree::ANNbd_tree( // construct from point array 00117 ANNpointArray pa, // point array (with at least n pts) 00118 int n, // number of points 00119 int dd, // dimension 00120 int bs, // bucket size 00121 ANNsplitRule split, // splitting rule 00122 ANNshrinkRule shrink) // shrinking rule 00123 : ANNkd_tree(n, dd, bs) // build skeleton base tree 00124 { 00125 pts = pa; // where the points are 00126 if (n == 0) return; // no points--no sweat 00127 00128 ANNorthRect bnd_box(dd); // bounding box for points 00129 // construct bounding rectangle 00130 annEnclRect(pa, pidx, n, dd, bnd_box); 00131 // copy to tree structure 00132 bnd_box_lo = annCopyPt(dd, bnd_box.lo); 00133 bnd_box_hi = annCopyPt(dd, bnd_box.hi); 00134 00135 switch (split) { // build by rule 00136 case ANN_KD_STD: // standard kd-splitting rule 00137 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink); 00138 break; 00139 case ANN_KD_MIDPT: // midpoint split 00140 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink); 00141 break; 00142 case ANN_KD_SUGGEST: // best (in our opinion) 00143 case ANN_KD_SL_MIDPT: // sliding midpoint split 00144 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink); 00145 break; 00146 case ANN_KD_FAIR: // fair split 00147 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink); 00148 break; 00149 case ANN_KD_SL_FAIR: // sliding fair split 00150 root = rbd_tree(pa, pidx, n, dd, bs, 00151 bnd_box, sl_fair_split, shrink); 00152 break; 00153 default: 00154 annError("Illegal splitting method", ANNabort); 00155 } 00156 } 00157 00158 //---------------------------------------------------------------------- 00159 // Shrinking rules 00160 //---------------------------------------------------------------------- 00161 00162 enum ANNdecomp {SPLIT, SHRINK}; // decomposition methods 00163 00164 //---------------------------------------------------------------------- 00165 // trySimpleShrink - Attempt a simple shrink 00166 // 00167 // We compute the tight bounding box of the points, and compute 00168 // the 2*dim ``gaps'' between the sides of the tight box and the 00169 // bounding box. If any of the gaps is large enough relative to 00170 // the longest side of the tight bounding box, then we shrink 00171 // all sides whose gaps are large enough. (The reason for 00172 // comparing against the tight bounding box, is that after 00173 // shrinking the longest box size will decrease, and if we use 00174 // the standard bounding box, we may decide to shrink twice in 00175 // a row. Since the tight box is fixed, we cannot shrink twice 00176 // consecutively.) 00177 //---------------------------------------------------------------------- 00178 const float BD_GAP_THRESH = 0.5; // gap threshold (must be < 1) 00179 const int BD_CT_THRESH = 2; // min number of shrink sides 00180 00181 ANNdecomp trySimpleShrink( // try a simple shrink 00182 ANNpointArray pa, // point array 00183 ANNidxArray pidx, // point indices to store in subtree 00184 int n, // number of points 00185 int dim, // dimension of space 00186 const ANNorthRect &bnd_box, // current bounding box 00187 ANNorthRect &inner_box) // inner box if shrinking (returned) 00188 { 00189 int i; 00190 // compute tight bounding box 00191 annEnclRect(pa, pidx, n, dim, inner_box); 00192 00193 ANNcoord max_length = 0; // find longest box side 00194 for (i = 0; i < dim; i++) { 00195 ANNcoord length = inner_box.hi[i] - inner_box.lo[i]; 00196 if (length > max_length) { 00197 max_length = length; 00198 } 00199 } 00200 00201 int shrink_ct = 0; // number of sides we shrunk 00202 for (i = 0; i < dim; i++) { // select which sides to shrink 00203 // gap between boxes 00204 ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i]; 00205 // big enough gap to shrink? 00206 if (gap_hi < max_length*BD_GAP_THRESH) 00207 inner_box.hi[i] = bnd_box.hi[i]; // no - expand 00208 else shrink_ct++; // yes - shrink this side 00209 00210 // repeat for high side 00211 ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i]; 00212 if (gap_lo < max_length*BD_GAP_THRESH) 00213 inner_box.lo[i] = bnd_box.lo[i]; // no - expand 00214 else shrink_ct++; // yes - shrink this side 00215 } 00216 00217 if (shrink_ct >= BD_CT_THRESH) // did we shrink enough sides? 00218 return SHRINK; 00219 else return SPLIT; 00220 } 00221 00222 //---------------------------------------------------------------------- 00223 // tryCentroidShrink - Attempt a centroid shrink 00224 // 00225 // We repeatedly apply the splitting rule, always to the larger subset 00226 // of points, until the number of points decreases by the constant 00227 // fraction BD_FRACTION. If this takes more than dim*BD_MAX_SPLIT_FAC 00228 // splits for this to happen, then we shrink to the final inner box 00229 // Otherwise we split. 00230 //---------------------------------------------------------------------- 00231 00232 const float BD_MAX_SPLIT_FAC = 0.5; // maximum number of splits allowed 00233 const float BD_FRACTION = 0.5; // ...to reduce points by this fraction 00234 // ...This must be < 1. 00235 00236 ANNdecomp tryCentroidShrink( // try a centroid shrink 00237 ANNpointArray pa, // point array 00238 ANNidxArray pidx, // point indices to store in subtree 00239 int n, // number of points 00240 int dim, // dimension of space 00241 const ANNorthRect &bnd_box, // current bounding box 00242 ANNkd_splitter splitter, // splitting procedure 00243 ANNorthRect &inner_box) // inner box if shrinking (returned) 00244 { 00245 int n_sub = n; // number of points in subset 00246 int n_goal = (int) (n*BD_FRACTION); // number of point in goal 00247 int n_splits = 0; // number of splits needed 00248 // initialize inner box to bounding box 00249 annAssignRect(dim, inner_box, bnd_box); 00250 00251 while (n_sub > n_goal) { // keep splitting until goal reached 00252 int cd; // cut dim from splitter (ignored) 00253 ANNcoord cv; // cut value from splitter (ignored) 00254 int n_lo; // number of points on low side 00255 // invoke splitting procedure 00256 (*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo); 00257 n_splits++; // increment split count 00258 00259 if (n_lo >= n_sub/2) { // most points on low side 00260 inner_box.hi[cd] = cv; // collapse high side 00261 n_sub = n_lo; // recurse on lower points 00262 } 00263 else { // most points on high side 00264 inner_box.lo[cd] = cv; // collapse low side 00265 pidx += n_lo; // recurse on higher points 00266 n_sub -= n_lo; 00267 } 00268 } 00269 if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits 00270 return SHRINK; // shrink to final subset 00271 else 00272 return SPLIT; 00273 } 00274 00275 //---------------------------------------------------------------------- 00276 // selectDecomp - select which decomposition to use 00277 //---------------------------------------------------------------------- 00278 00279 ANNdecomp selectDecomp( // select decomposition method 00280 ANNpointArray pa, // point array 00281 ANNidxArray pidx, // point indices to store in subtree 00282 int n, // number of points 00283 int dim, // dimension of space 00284 const ANNorthRect &bnd_box, // current bounding box 00285 ANNkd_splitter splitter, // splitting procedure 00286 ANNshrinkRule shrink, // shrinking rule 00287 ANNorthRect &inner_box) // inner box if shrinking (returned) 00288 { 00289 ANNdecomp decomp = SPLIT; // decomposition 00290 00291 switch (shrink) { // check shrinking rule 00292 case ANN_BD_NONE: // no shrinking allowed 00293 decomp = SPLIT; 00294 break; 00295 case ANN_BD_SUGGEST: // author's suggestion 00296 case ANN_BD_SIMPLE: // simple shrink 00297 decomp = trySimpleShrink( 00298 pa, pidx, // points and indices 00299 n, dim, // number of points and dimension 00300 bnd_box, // current bounding box 00301 inner_box); // inner box if shrinking (returned) 00302 break; 00303 case ANN_BD_CENTROID: // centroid shrink 00304 decomp = tryCentroidShrink( 00305 pa, pidx, // points and indices 00306 n, dim, // number of points and dimension 00307 bnd_box, // current bounding box 00308 splitter, // splitting procedure 00309 inner_box); // inner box if shrinking (returned) 00310 break; 00311 default: 00312 annError("Illegal shrinking rule", ANNabort); 00313 } 00314 return decomp; 00315 } 00316 00317 //---------------------------------------------------------------------- 00318 // rbd_tree - recursive procedure to build a bd-tree 00319 // 00320 // This is analogous to rkd_tree, but for bd-trees. See the 00321 // procedure rkd_tree() in kd_split.cpp for more information. 00322 // 00323 // If the number of points falls below the bucket size, then a 00324 // leaf node is created for the points. Otherwise we invoke the 00325 // procedure selectDecomp() which determines whether we are to 00326 // split or shrink. If splitting is chosen, then we essentially 00327 // do exactly as rkd_tree() would, and invoke the specified 00328 // splitting procedure to the points. Otherwise, the selection 00329 // procedure returns a bounding box, from which we extract the 00330 // appropriate shrinking bounds, and create a shrinking node. 00331 // Finally the points are subdivided, and the procedure is 00332 // invoked recursively on the two subsets to form the children. 00333 //---------------------------------------------------------------------- 00334 00335 ANNkd_ptr rbd_tree( // recursive construction of bd-tree 00336 ANNpointArray pa, // point array 00337 ANNidxArray pidx, // point indices to store in subtree 00338 int n, // number of points 00339 int dim, // dimension of space 00340 int bsp, // bucket space 00341 ANNorthRect &bnd_box, // bounding box for current node 00342 ANNkd_splitter splitter, // splitting routine 00343 ANNshrinkRule shrink) // shrinking rule 00344 { 00345 ANNdecomp decomp; // decomposition method 00346 00347 ANNorthRect inner_box(dim); // inner box (if shrinking) 00348 00349 if (n <= bsp) { // n small, make a leaf node 00350 if (n == 0) // empty leaf node 00351 return KD_TRIVIAL; // return (canonical) empty leaf 00352 else // construct the node and return 00353 return new ANNkd_leaf(n, pidx); 00354 } 00355 00356 decomp = selectDecomp( // select decomposition method 00357 pa, pidx, // points and indices 00358 n, dim, // number of points and dimension 00359 bnd_box, // current bounding box 00360 splitter, shrink, // splitting/shrinking methods 00361 inner_box); // inner box if shrinking (returned) 00362 00363 if (decomp == SPLIT) { // split selected 00364 int cd; // cutting dimension 00365 ANNcoord cv; // cutting value 00366 int n_lo; // number on low side of cut 00367 // invoke splitting procedure 00368 (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo); 00369 00370 ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension 00371 ANNcoord hv = bnd_box.hi[cd]; 00372 00373 bnd_box.hi[cd] = cv; // modify bounds for left subtree 00374 ANNkd_ptr lo = rbd_tree( // build left subtree 00375 pa, pidx, n_lo, // ...from pidx[0..n_lo-1] 00376 dim, bsp, bnd_box, splitter, shrink); 00377 bnd_box.hi[cd] = hv; // restore bounds 00378 00379 bnd_box.lo[cd] = cv; // modify bounds for right subtree 00380 ANNkd_ptr hi = rbd_tree( // build right subtree 00381 pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1] 00382 dim, bsp, bnd_box, splitter, shrink); 00383 bnd_box.lo[cd] = lv; // restore bounds 00384 // create the splitting node 00385 return new ANNkd_split(cd, cv, lv, hv, lo, hi); 00386 } 00387 else { // shrink selected 00388 int n_in; // number of points in box 00389 int n_bnds; // number of bounding sides 00390 00391 annBoxSplit( // split points around inner box 00392 pa, // points to split 00393 pidx, // point indices 00394 n, // number of points 00395 dim, // dimension 00396 inner_box, // inner box 00397 n_in); // number of points inside (returned) 00398 00399 ANNkd_ptr in = rbd_tree( // build inner subtree pidx[0..n_in-1] 00400 pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink); 00401 ANNkd_ptr out = rbd_tree( // build outer subtree pidx[n_in..n] 00402 pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink); 00403 00404 ANNorthHSArray bnds = NULL; // bounds (alloc in Box2Bnds and 00405 // ...freed in bd_shrink destroyer) 00406 00407 annBox2Bnds( // convert inner box to bounds 00408 inner_box, // inner box 00409 bnd_box, // enclosing box 00410 dim, // dimension 00411 n_bnds, // number of bounds (returned) 00412 bnds); // bounds array (modified) 00413 00414 // return shrinking node 00415 return new ANNbd_shrink(n_bnds, bnds, in, out); 00416 } 00417 }