bd_tree.cpp

Go to the documentation of this file.
00001 //----------------------------------------------------------------------
00002 // File:                        bd_tree.cpp
00003 // Programmer:          David Mount
00004 // Description:         Basic methods for bd-trees.
00005 // Last modified:       01/04/05 (Version 1.0)
00006 //----------------------------------------------------------------------
00007 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
00008 // David Mount.  All Rights Reserved.
00009 // 
00010 // This software and related documentation is part of the Approximate
00011 // Nearest Neighbor Library (ANN).  This software is provided under
00012 // the provisions of the Lesser GNU Public License (LGPL).  See the
00013 // file ../ReadMe.txt for further information.
00014 // 
00015 // The University of Maryland (U.M.) and the authors make no
00016 // representations about the suitability or fitness of this software for
00017 // any purpose.  It is provided "as is" without express or implied
00018 // warranty.
00019 //----------------------------------------------------------------------
00020 // History:
00021 //      Revision 0.1  03/04/98
00022 //              Initial release
00023 //      Revision l.0  04/01/05
00024 //              Fixed centroid shrink threshold condition to depend on the
00025 //                      dimension.
00026 //              Moved dump routine to kd_dump.cpp.
00027 //----------------------------------------------------------------------
00028 
00029 #include "bd_tree.h"                                    // bd-tree declarations
00030 #include "kd_util.h"                                    // kd-tree utilities
00031 #include "kd_split.h"                                   // kd-tree splitting rules
00032 
00033 #include <ANN/ANNperf.h>                                // performance evaluation
00034 
00035 //----------------------------------------------------------------------
00036 //      Printing a bd-tree 
00037 //              These routines print a bd-tree.   See the analogous procedure
00038 //              in kd_tree.cpp for more information.
00039 //----------------------------------------------------------------------
00040 
00041 void ANNbd_shrink::print(                               // print shrinking node
00042                 int level,                                              // depth of node in tree
00043                 ostream &out)                                   // output stream
00044 {
00045         child[ANN_OUT]->print(level+1, out);            // print out-child
00046 
00047         out << "    ";
00048         for (int i = 0; i < level; i++)                         // print indentation
00049                 out << "..";
00050         out << "Shrink";
00051         for (int j = 0; j < n_bnds; j++) {                      // print sides, 2 per line
00052                 if (j % 2 == 0) {
00053                         out << "\n";                                            // newline and indentation
00054                         for (int i = 0; i < level+2; i++) out << "  ";
00055                 }
00056                 out << "  ([" << bnds[j].cd << "]"
00057                          << (bnds[j].sd > 0 ? ">=" : "< ")
00058                          << bnds[j].cv << ")";
00059         }
00060         out << "\n";
00061 
00062         child[ANN_IN]->print(level+1, out);                     // print in-child
00063 }
00064 
00065 //----------------------------------------------------------------------
00066 //      kd_tree statistics utility (for performance evaluation)
00067 //              This routine computes various statistics information for
00068 //              shrinking nodes.  See file kd_tree.cpp for more information.
00069 //----------------------------------------------------------------------
00070 
00071 void ANNbd_shrink::getStats(                                    // get subtree statistics
00072         int                                     dim,                                    // dimension of space
00073         ANNkdStats                      &st,                                    // stats (modified)
00074         ANNorthRect                     &bnd_box)                               // bounding box
00075 {
00076         ANNkdStats ch_stats;                                            // stats for children
00077         ANNorthRect inner_box(dim);                                     // inner box of shrink
00078 
00079         annBnds2Box(bnd_box,                                            // enclosing box
00080                                 dim,                                                    // dimension
00081                                 n_bnds,                                                 // number of bounds
00082                                 bnds,                                                   // bounds array
00083                                 inner_box);                                             // inner box (modified)
00084                                                                                                 // get stats for inner child
00085         ch_stats.reset();                                                       // reset
00086         child[ANN_IN]->getStats(dim, ch_stats, inner_box);
00087         st.merge(ch_stats);                                                     // merge them
00088                                                                                                 // get stats for outer child
00089         ch_stats.reset();                                                       // reset
00090         child[ANN_OUT]->getStats(dim, ch_stats, bnd_box);
00091         st.merge(ch_stats);                                                     // merge them
00092 
00093         st.depth++;                                                                     // increment depth
00094         st.n_shr++;                                                                     // increment number of shrinks
00095 }
00096 
00097 //----------------------------------------------------------------------
00098 // bd-tree constructor
00099 //              This is the main constructor for bd-trees given a set of points.
00100 //              It first builds a skeleton kd-tree as a basis, then computes the
00101 //              bounding box of the data points, and then invokes rbd_tree() to
00102 //              actually build the tree, passing it the appropriate splitting
00103 //              and shrinking information.
00104 //----------------------------------------------------------------------
00105 
00106 ANNkd_ptr rbd_tree(                                             // recursive construction of bd-tree
00107         ANNpointArray           pa,                             // point array
00108         ANNidxArray                     pidx,                   // point indices to store in subtree
00109         int                                     n,                              // number of points
00110         int                                     dim,                    // dimension of space
00111         int                                     bsp,                    // bucket space
00112         ANNorthRect                     &bnd_box,               // bounding box for current node
00113         ANNkd_splitter          splitter,               // splitting routine
00114         ANNshrinkRule           shrink);                // shrinking rule
00115 
00116 ANNbd_tree::ANNbd_tree(                                 // construct from point array
00117         ANNpointArray           pa,                             // point array (with at least n pts)
00118         int                                     n,                              // number of points
00119         int                                     dd,                             // dimension
00120         int                                     bs,                             // bucket size
00121         ANNsplitRule            split,                  // splitting rule
00122         ANNshrinkRule           shrink)                 // shrinking rule
00123         : ANNkd_tree(n, dd, bs)                         // build skeleton base tree
00124 {
00125         pts = pa;                                                       // where the points are
00126         if (n == 0) return;                                     // no points--no sweat
00127 
00128         ANNorthRect bnd_box(dd);                        // bounding box for points
00129                                                                                 // construct bounding rectangle
00130         annEnclRect(pa, pidx, n, dd, bnd_box);
00131                                                                                 // copy to tree structure
00132         bnd_box_lo = annCopyPt(dd, bnd_box.lo);
00133         bnd_box_hi = annCopyPt(dd, bnd_box.hi);
00134 
00135         switch (split) {                                        // build by rule
00136         case ANN_KD_STD:                                        // standard kd-splitting rule
00137                 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink);
00138                 break;
00139         case ANN_KD_MIDPT:                                      // midpoint split
00140                 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink);
00141                 break;
00142         case ANN_KD_SUGGEST:                            // best (in our opinion)
00143         case ANN_KD_SL_MIDPT:                           // sliding midpoint split
00144                 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink);
00145                 break;
00146         case ANN_KD_FAIR:                                       // fair split
00147                 root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink);
00148                 break;
00149         case ANN_KD_SL_FAIR:                            // sliding fair split
00150                 root = rbd_tree(pa, pidx, n, dd, bs,
00151                                                 bnd_box, sl_fair_split, shrink);
00152                 break;
00153         default:
00154                 annError("Illegal splitting method", ANNabort);
00155         }
00156 }
00157 
00158 //----------------------------------------------------------------------
00159 //      Shrinking rules
00160 //----------------------------------------------------------------------
00161 
00162 enum ANNdecomp {SPLIT, SHRINK};                 // decomposition methods
00163 
00164 //----------------------------------------------------------------------
00165 //      trySimpleShrink - Attempt a simple shrink
00166 //
00167 //              We compute the tight bounding box of the points, and compute
00168 //              the 2*dim ``gaps'' between the sides of the tight box and the
00169 //              bounding box.  If any of the gaps is large enough relative to
00170 //              the longest side of the tight bounding box, then we shrink
00171 //              all sides whose gaps are large enough.  (The reason for
00172 //              comparing against the tight bounding box, is that after
00173 //              shrinking the longest box size will decrease, and if we use
00174 //              the standard bounding box, we may decide to shrink twice in
00175 //              a row.  Since the tight box is fixed, we cannot shrink twice
00176 //              consecutively.)
00177 //----------------------------------------------------------------------
00178 const float BD_GAP_THRESH = 0.5;                // gap threshold (must be < 1)
00179 const int   BD_CT_THRESH  = 2;                  // min number of shrink sides
00180 
00181 ANNdecomp trySimpleShrink(                              // try a simple shrink
00182         ANNpointArray           pa,                             // point array
00183         ANNidxArray                     pidx,                   // point indices to store in subtree
00184         int                                     n,                              // number of points
00185         int                                     dim,                    // dimension of space
00186         const ANNorthRect       &bnd_box,               // current bounding box
00187         ANNorthRect                     &inner_box)             // inner box if shrinking (returned)
00188 {
00189         int i;
00190                                                                                                 // compute tight bounding box
00191         annEnclRect(pa, pidx, n, dim, inner_box);
00192 
00193         ANNcoord max_length = 0;                                        // find longest box side
00194         for (i = 0; i < dim; i++) {
00195                 ANNcoord length = inner_box.hi[i] - inner_box.lo[i];
00196                 if (length > max_length) {
00197                         max_length = length;
00198                 }
00199         }
00200 
00201         int shrink_ct = 0;                                                      // number of sides we shrunk
00202         for (i = 0; i < dim; i++) {                                     // select which sides to shrink
00203                                                                                                 // gap between boxes
00204                 ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i];
00205                                                                                                 // big enough gap to shrink?
00206                 if (gap_hi < max_length*BD_GAP_THRESH)
00207                         inner_box.hi[i] = bnd_box.hi[i];        // no - expand
00208                 else shrink_ct++;                                               // yes - shrink this side
00209 
00210                                                                                                 // repeat for high side
00211                 ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i];
00212                 if (gap_lo < max_length*BD_GAP_THRESH)
00213                         inner_box.lo[i] = bnd_box.lo[i];        // no - expand
00214                 else shrink_ct++;                                               // yes - shrink this side
00215         }
00216 
00217         if (shrink_ct >= BD_CT_THRESH)                          // did we shrink enough sides?
00218                  return SHRINK;
00219         else return SPLIT;
00220 }
00221 
00222 //----------------------------------------------------------------------
00223 //      tryCentroidShrink - Attempt a centroid shrink
00224 //
00225 //      We repeatedly apply the splitting rule, always to the larger subset
00226 //      of points, until the number of points decreases by the constant
00227 //      fraction BD_FRACTION.  If this takes more than dim*BD_MAX_SPLIT_FAC
00228 //      splits for this to happen, then we shrink to the final inner box
00229 //      Otherwise we split.
00230 //----------------------------------------------------------------------
00231 
00232 const float     BD_MAX_SPLIT_FAC = 0.5;         // maximum number of splits allowed
00233 const float     BD_FRACTION = 0.5;                      // ...to reduce points by this fraction
00234                                                                                 // ...This must be < 1.
00235 
00236 ANNdecomp tryCentroidShrink(                    // try a centroid shrink
00237         ANNpointArray           pa,                             // point array
00238         ANNidxArray                     pidx,                   // point indices to store in subtree
00239         int                                     n,                              // number of points
00240         int                                     dim,                    // dimension of space
00241         const ANNorthRect       &bnd_box,               // current bounding box
00242         ANNkd_splitter          splitter,               // splitting procedure
00243         ANNorthRect                     &inner_box)             // inner box if shrinking (returned)
00244 {
00245         int n_sub = n;                                          // number of points in subset
00246         int n_goal = (int) (n*BD_FRACTION); // number of point in goal
00247         int n_splits = 0;                                       // number of splits needed
00248                                                                                 // initialize inner box to bounding box
00249         annAssignRect(dim, inner_box, bnd_box);
00250 
00251         while (n_sub > n_goal) {                        // keep splitting until goal reached
00252                 int cd;                                                 // cut dim from splitter (ignored)
00253                 ANNcoord cv;                                    // cut value from splitter (ignored)
00254                 int n_lo;                                               // number of points on low side
00255                                                                                 // invoke splitting procedure
00256                 (*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo);
00257                 n_splits++;                                             // increment split count
00258 
00259                 if (n_lo >= n_sub/2) {                  // most points on low side
00260                         inner_box.hi[cd] = cv;          // collapse high side
00261                         n_sub = n_lo;                           // recurse on lower points
00262                 }
00263                 else {                                                  // most points on high side
00264                         inner_box.lo[cd] = cv;          // collapse low side
00265                         pidx += n_lo;                           // recurse on higher points
00266                         n_sub -= n_lo;
00267                 }
00268         }
00269     if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits
00270                 return SHRINK;                                  // shrink to final subset
00271         else
00272                 return SPLIT;
00273 }
00274 
00275 //----------------------------------------------------------------------
00276 //      selectDecomp - select which decomposition to use
00277 //----------------------------------------------------------------------
00278 
00279 ANNdecomp selectDecomp(                 // select decomposition method
00280         ANNpointArray           pa,                             // point array
00281         ANNidxArray                     pidx,                   // point indices to store in subtree
00282         int                                     n,                              // number of points
00283         int                                     dim,                    // dimension of space
00284         const ANNorthRect       &bnd_box,               // current bounding box
00285         ANNkd_splitter          splitter,               // splitting procedure
00286         ANNshrinkRule           shrink,                 // shrinking rule
00287         ANNorthRect                     &inner_box)             // inner box if shrinking (returned)
00288 {
00289         ANNdecomp decomp = SPLIT;                       // decomposition
00290 
00291         switch (shrink) {                                       // check shrinking rule
00292         case ANN_BD_NONE:                                       // no shrinking allowed
00293                 decomp = SPLIT;
00294                 break;
00295         case ANN_BD_SUGGEST:                            // author's suggestion
00296         case ANN_BD_SIMPLE:                                     // simple shrink
00297                 decomp = trySimpleShrink(
00298                                 pa, pidx,                               // points and indices
00299                                 n, dim,                                 // number of points and dimension
00300                                 bnd_box,                                // current bounding box
00301                                 inner_box);                             // inner box if shrinking (returned)
00302                 break;
00303         case ANN_BD_CENTROID:                           // centroid shrink
00304                 decomp = tryCentroidShrink(
00305                                 pa, pidx,                               // points and indices
00306                                 n, dim,                                 // number of points and dimension
00307                                 bnd_box,                                // current bounding box
00308                                 splitter,                               // splitting procedure
00309                                 inner_box);                             // inner box if shrinking (returned)
00310                 break;
00311         default:
00312                 annError("Illegal shrinking rule", ANNabort);
00313         }
00314         return decomp;
00315 }
00316 
00317 //----------------------------------------------------------------------
00318 //      rbd_tree - recursive procedure to build a bd-tree
00319 //
00320 //              This is analogous to rkd_tree, but for bd-trees.  See the
00321 //              procedure rkd_tree() in kd_split.cpp for more information.
00322 //
00323 //              If the number of points falls below the bucket size, then a
00324 //              leaf node is created for the points.  Otherwise we invoke the
00325 //              procedure selectDecomp() which determines whether we are to
00326 //              split or shrink.  If splitting is chosen, then we essentially
00327 //              do exactly as rkd_tree() would, and invoke the specified
00328 //              splitting procedure to the points.  Otherwise, the selection
00329 //              procedure returns a bounding box, from which we extract the
00330 //              appropriate shrinking bounds, and create a shrinking node.
00331 //              Finally the points are subdivided, and the procedure is
00332 //              invoked recursively on the two subsets to form the children.
00333 //----------------------------------------------------------------------
00334 
00335 ANNkd_ptr rbd_tree(                             // recursive construction of bd-tree
00336         ANNpointArray           pa,                             // point array
00337         ANNidxArray                     pidx,                   // point indices to store in subtree
00338         int                                     n,                              // number of points
00339         int                                     dim,                    // dimension of space
00340         int                                     bsp,                    // bucket space
00341         ANNorthRect                     &bnd_box,               // bounding box for current node
00342         ANNkd_splitter          splitter,               // splitting routine
00343         ANNshrinkRule           shrink)                 // shrinking rule
00344 {
00345         ANNdecomp decomp;                                       // decomposition method
00346 
00347         ANNorthRect inner_box(dim);                     // inner box (if shrinking)
00348 
00349         if (n <= bsp) {                                         // n small, make a leaf node
00350                 if (n == 0)                                             // empty leaf node
00351                         return KD_TRIVIAL;                      // return (canonical) empty leaf
00352                 else                                                    // construct the node and return
00353                         return new ANNkd_leaf(n, pidx); 
00354         }
00355         
00356         decomp = selectDecomp(                          // select decomposition method
00357                                 pa, pidx,                               // points and indices
00358                                 n, dim,                                 // number of points and dimension
00359                                 bnd_box,                                // current bounding box
00360                                 splitter, shrink,               // splitting/shrinking methods
00361                                 inner_box);                             // inner box if shrinking (returned)
00362         
00363         if (decomp == SPLIT) {                          // split selected
00364                 int cd;                                                 // cutting dimension
00365                 ANNcoord cv;                                    // cutting value
00366                 int n_lo;                                               // number on low side of cut
00367                                                                                 // invoke splitting procedure
00368                 (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);
00369 
00370                 ANNcoord lv = bnd_box.lo[cd];   // save bounds for cutting dimension
00371                 ANNcoord hv = bnd_box.hi[cd];
00372 
00373                 bnd_box.hi[cd] = cv;                    // modify bounds for left subtree
00374                 ANNkd_ptr lo = rbd_tree(                // build left subtree
00375                                 pa, pidx, n_lo,                 // ...from pidx[0..n_lo-1]
00376                                 dim, bsp, bnd_box, splitter, shrink);
00377                 bnd_box.hi[cd] = hv;                    // restore bounds
00378 
00379                 bnd_box.lo[cd] = cv;                    // modify bounds for right subtree
00380                 ANNkd_ptr hi = rbd_tree(                // build right subtree
00381                                 pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
00382                                 dim, bsp, bnd_box, splitter, shrink);
00383                 bnd_box.lo[cd] = lv;                    // restore bounds
00384                                                                                 // create the splitting node
00385                 return new ANNkd_split(cd, cv, lv, hv, lo, hi);
00386         }
00387         else {                                                          // shrink selected
00388                 int n_in;                                               // number of points in box
00389                 int n_bnds;                                             // number of bounding sides
00390 
00391                 annBoxSplit(                                    // split points around inner box
00392                                 pa,                                             // points to split
00393                                 pidx,                                   // point indices
00394                                 n,                                              // number of points
00395                                 dim,                                    // dimension
00396                                 inner_box,                              // inner box
00397                                 n_in);                                  // number of points inside (returned)
00398 
00399                 ANNkd_ptr in = rbd_tree(                // build inner subtree pidx[0..n_in-1]
00400                                 pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink);
00401                 ANNkd_ptr out = rbd_tree(               // build outer subtree pidx[n_in..n]
00402                                 pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink);
00403 
00404                 ANNorthHSArray bnds = NULL;             // bounds (alloc in Box2Bnds and
00405                                                                                 // ...freed in bd_shrink destroyer)
00406 
00407                 annBox2Bnds(                                    // convert inner box to bounds
00408                                 inner_box,                              // inner box
00409                                 bnd_box,                                // enclosing box
00410                                 dim,                                    // dimension
00411                                 n_bnds,                                 // number of bounds (returned)
00412                                 bnds);                                  // bounds array (modified)
00413 
00414                                                                                 // return shrinking node
00415                 return new ANNbd_shrink(n_bnds, bnds, in, out);
00416         }
00417 } 

Generated on Wed Nov 23 18:59:57 2011 for FreeCAD by  doxygen 1.6.1