kd_tree.cpp

Go to the documentation of this file.
00001 //----------------------------------------------------------------------
00002 // File:                        kd_tree.cpp
00003 // Programmer:          Sunil Arya and David Mount
00004 // Description:         Basic methods for kd-trees.
00005 // Last modified:       01/04/05 (Version 1.0)
00006 //----------------------------------------------------------------------
00007 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
00008 // David Mount.  All Rights Reserved.
00009 // 
00010 // This software and related documentation is part of the Approximate
00011 // Nearest Neighbor Library (ANN).  This software is provided under
00012 // the provisions of the Lesser GNU Public License (LGPL).  See the
00013 // file ../ReadMe.txt for further information.
00014 // 
00015 // The University of Maryland (U.M.) and the authors make no
00016 // representations about the suitability or fitness of this software for
00017 // any purpose.  It is provided "as is" without express or implied
00018 // warranty.
00019 //----------------------------------------------------------------------
00020 // History:
00021 //      Revision 0.1  03/04/98
00022 //              Initial release
00023 //      Revision 1.0  04/01/05
00024 //              Increased aspect ratio bound (ANN_AR_TOOBIG) from 100 to 1000.
00025 //              Fixed leaf counts to count trivial leaves.
00026 //              Added optional pa, pi arguments to Skeleton kd_tree constructor
00027 //                      for use in load constructor.
00028 //              Added annClose() to eliminate KD_TRIVIAL memory leak.
00029 //----------------------------------------------------------------------
00030 
00031 #include "kd_tree.h"                                    // kd-tree declarations
00032 #include "kd_split.h"                                   // kd-tree splitting rules
00033 #include "kd_util.h"                                    // kd-tree utilities
00034 #include <ANN/ANNperf.h>                                // performance evaluation
00035 
00036 //----------------------------------------------------------------------
00037 //      Global data
00038 //
00039 //      For some splitting rules, especially with small bucket sizes,
00040 //      it is possible to generate a large number of empty leaf nodes.
00041 //      To save storage we allocate a single trivial leaf node which
00042 //      contains no points.  For messy coding reasons it is convenient
00043 //      to have it reference a trivial point index.
00044 //
00045 //      KD_TRIVIAL is allocated when the first kd-tree is created.  It
00046 //      must *never* deallocated (since it may be shared by more than
00047 //      one tree).
00048 //----------------------------------------------------------------------
00049 static int                              IDX_TRIVIAL[] = {0};    // trivial point index
00050 ANNkd_leaf                              *KD_TRIVIAL = NULL;             // trivial leaf node
00051 
00052 //----------------------------------------------------------------------
00053 //      Printing the kd-tree 
00054 //              These routines print a kd-tree in reverse inorder (high then
00055 //              root then low).  (This is so that if you look at the output
00056 //              from the right side it appear from left to right in standard
00057 //              inorder.)  When outputting leaves we output only the point
00058 //              indices rather than the point coordinates. There is an option
00059 //              to print the point coordinates separately.
00060 //
00061 //              The tree printing routine calls the printing routines on the
00062 //              individual nodes of the tree, passing in the level or depth
00063 //              in the tree.  The level in the tree is used to print indentation
00064 //              for readability.
00065 //----------------------------------------------------------------------
00066 
00067 void ANNkd_split::print(                                // print splitting node
00068                 int level,                                              // depth of node in tree
00069                 ostream &out)                                   // output stream
00070 {
00071         child[ANN_HI]->print(level+1, out);     // print high child
00072         out << "    ";
00073         for (int i = 0; i < level; i++)         // print indentation
00074                 out << "..";
00075         out << "Split cd=" << cut_dim << " cv=" << cut_val;
00076         out << " lbnd=" << cd_bnds[ANN_LO];
00077         out << " hbnd=" << cd_bnds[ANN_HI];
00078         out << "\n";
00079         child[ANN_LO]->print(level+1, out);     // print low child
00080 }
00081 
00082 void ANNkd_leaf::print(                                 // print leaf node
00083                 int level,                                              // depth of node in tree
00084                 ostream &out)                                   // output stream
00085 {
00086 
00087         out << "    ";
00088         for (int i = 0; i < level; i++)         // print indentation
00089                 out << "..";
00090 
00091         if (this == KD_TRIVIAL) {                       // canonical trivial leaf node
00092                 out << "Leaf (trivial)\n";
00093         }
00094         else{
00095                 out << "Leaf n=" << n_pts << " <";
00096                 for (int j = 0; j < n_pts; j++) {
00097                         out << bkt[j];
00098                         if (j < n_pts-1) out << ",";
00099                 }
00100                 out << ">\n";
00101         }
00102 }
00103 
00104 void ANNkd_tree::Print(                                 // print entire tree
00105                 ANNbool with_pts,                               // print points as well?
00106                 ostream &out)                                   // output stream
00107 {
00108         out << "ANN Version " << ANNversion << "\n";
00109         if (with_pts) {                                         // print point coordinates
00110                 out << "    Points:\n";
00111                 for (int i = 0; i < n_pts; i++) {
00112                         out << "\t" << i << ": ";
00113                         annPrintPt(pts[i], dim, out);
00114                         out << "\n";
00115                 }
00116         }
00117         if (root == NULL)                                       // empty tree?
00118                 out << "    Null tree.\n";
00119         else {
00120                 root->print(0, out);                    // invoke printing at root
00121         }
00122 }
00123 
00124 //----------------------------------------------------------------------
00125 //      kd_tree statistics (for performance evaluation)
00126 //              This routine compute various statistics information for
00127 //              a kd-tree.  It is used by the implementors for performance
00128 //              evaluation of the data structure.
00129 //----------------------------------------------------------------------
00130 
00131 #define MAX(a,b)                ((a) > (b) ? (a) : (b))
00132 
00133 void ANNkdStats::merge(const ANNkdStats &st)    // merge stats from child 
00134 {
00135         n_lf += st.n_lf;                        n_tl += st.n_tl;
00136         n_spl += st.n_spl;                      n_shr += st.n_shr;
00137         depth = MAX(depth, st.depth);
00138         sum_ar += st.sum_ar;
00139 }
00140 
00141 //----------------------------------------------------------------------
00142 //      Update statistics for nodes
00143 //----------------------------------------------------------------------
00144 
00145 const double ANN_AR_TOOBIG = 1000;                              // too big an aspect ratio
00146 
00147 void ANNkd_leaf::getStats(                                              // get subtree statistics
00148         int                                     dim,                                    // dimension of space
00149         ANNkdStats                      &st,                                    // stats (modified)
00150         ANNorthRect                     &bnd_box)                               // bounding box
00151 {
00152         st.reset();
00153         st.n_lf = 1;                                                            // count this leaf
00154         if (this == KD_TRIVIAL) st.n_tl = 1;            // count trivial leaf
00155         double ar = annAspectRatio(dim, bnd_box);       // aspect ratio of leaf
00156                                                                                                 // incr sum (ignore outliers)
00157         st.sum_ar += float(ar < ANN_AR_TOOBIG ? ar : ANN_AR_TOOBIG);
00158 }
00159 
00160 void ANNkd_split::getStats(                                             // get subtree statistics
00161         int                                     dim,                                    // dimension of space
00162         ANNkdStats                      &st,                                    // stats (modified)
00163         ANNorthRect                     &bnd_box)                               // bounding box
00164 {
00165         ANNkdStats ch_stats;                                            // stats for children
00166                                                                                                 // get stats for low child
00167         ANNcoord hv = bnd_box.hi[cut_dim];                      // save box bounds
00168         bnd_box.hi[cut_dim] = cut_val;                          // upper bound for low child
00169         ch_stats.reset();                                                       // reset
00170         child[ANN_LO]->getStats(dim, ch_stats, bnd_box);
00171         st.merge(ch_stats);                                                     // merge them
00172         bnd_box.hi[cut_dim] = hv;                                       // restore bound
00173                                                                                                 // get stats for high child
00174         ANNcoord lv = bnd_box.lo[cut_dim];                      // save box bounds
00175         bnd_box.lo[cut_dim] = cut_val;                          // lower bound for high child
00176         ch_stats.reset();                                                       // reset
00177         child[ANN_HI]->getStats(dim, ch_stats, bnd_box);
00178         st.merge(ch_stats);                                                     // merge them
00179         bnd_box.lo[cut_dim] = lv;                                       // restore bound
00180 
00181         st.depth++;                                                                     // increment depth
00182         st.n_spl++;                                                                     // increment number of splits
00183 }
00184 
00185 //----------------------------------------------------------------------
00186 //      getStats
00187 //              Collects a number of statistics related to kd_tree or
00188 //              bd_tree.
00189 //----------------------------------------------------------------------
00190 
00191 void ANNkd_tree::getStats(                                              // get tree statistics
00192         ANNkdStats                      &st)                                    // stats (modified)
00193 {
00194         st.reset(dim, n_pts, bkt_size);                         // reset stats
00195                                                                                                 // create bounding box
00196         ANNorthRect bnd_box(dim, bnd_box_lo, bnd_box_hi);
00197         if (root != NULL) {                                                     // if nonempty tree
00198                 root->getStats(dim, st, bnd_box);               // get statistics
00199                 st.avg_ar = st.sum_ar / st.n_lf;                // average leaf asp ratio
00200         }
00201 }
00202 
00203 //----------------------------------------------------------------------
00204 //      kd_tree destructor
00205 //              The destructor just frees the various elements that were
00206 //              allocated in the construction process.
00207 //----------------------------------------------------------------------
00208 
00209 ANNkd_tree::~ANNkd_tree()                               // tree destructor
00210 {
00211         if (root != NULL) delete root;
00212         if (pidx != NULL) delete [] pidx;
00213         if (bnd_box_lo != NULL) annDeallocPt(bnd_box_lo);
00214         if (bnd_box_hi != NULL) annDeallocPt(bnd_box_hi);
00215 }
00216 
00217 //----------------------------------------------------------------------
00218 //      This is called with all use of ANN is finished.  It eliminates the
00219 //      minor memory leak caused by the allocation of KD_TRIVIAL.
00220 //----------------------------------------------------------------------
00221 void annClose()                         // close use of ANN
00222 {
00223         if (KD_TRIVIAL != NULL) {
00224                 delete KD_TRIVIAL;
00225                 KD_TRIVIAL = NULL;
00226         }
00227 }
00228 
00229 //----------------------------------------------------------------------
00230 //      kd_tree constructors
00231 //              There is a skeleton kd-tree constructor which sets up a
00232 //              trivial empty tree.      The last optional argument allows
00233 //              the routine to be passed a point index array which is
00234 //              assumed to be of the proper size (n).  Otherwise, one is
00235 //              allocated and initialized to the identity.      Warning: In
00236 //              either case the destructor will deallocate this array.
00237 //
00238 //              As a kludge, we need to allocate KD_TRIVIAL if one has not
00239 //              already been allocated.  (This is because I'm too dumb to
00240 //              figure out how to cause a pointer to be allocated at load
00241 //              time.)
00242 //----------------------------------------------------------------------
00243 
00244 void ANNkd_tree::SkeletonTree(                  // construct skeleton tree
00245                 int n,                                                  // number of points
00246                 int dd,                                                 // dimension
00247                 int bs,                                                 // bucket size
00248                 ANNpointArray pa,                               // point array
00249                 ANNidxArray pi)                                 // point indices
00250 {
00251         dim = dd;                                                       // initialize basic elements
00252         n_pts = n;
00253         bkt_size = bs;
00254         pts = pa;                                                       // initialize points array
00255 
00256         root = NULL;                                            // no associated tree yet
00257 
00258         if (pi == NULL) {                                       // point indices provided?
00259                 pidx = new ANNidx[n];                   // no, allocate space for point indices
00260                 for (int i = 0; i < n; i++) {
00261                         pidx[i] = i;                            // initially identity
00262                 }
00263         }
00264         else {
00265                 pidx = pi;                                              // yes, use them
00266         }
00267 
00268         bnd_box_lo = bnd_box_hi = NULL;         // bounding box is nonexistent
00269         if (KD_TRIVIAL == NULL)                         // no trivial leaf node yet?
00270                 KD_TRIVIAL = new ANNkd_leaf(0, IDX_TRIVIAL);    // allocate it
00271 }
00272 
00273 ANNkd_tree::ANNkd_tree(                                 // basic constructor
00274                 int n,                                                  // number of points
00275                 int dd,                                                 // dimension
00276                 int bs)                                                 // bucket size
00277 {  SkeletonTree(n, dd, bs);  }                  // construct skeleton tree
00278 
00279 //----------------------------------------------------------------------
00280 //      rkd_tree - recursive procedure to build a kd-tree
00281 //
00282 //              Builds a kd-tree for points in pa as indexed through the
00283 //              array pidx[0..n-1] (typically a subarray of the array used in
00284 //              the top-level call).  This routine permutes the array pidx,
00285 //              but does not alter pa[].
00286 //
00287 //              The construction is based on a standard algorithm for constructing
00288 //              the kd-tree (see Friedman, Bentley, and Finkel, ``An algorithm for
00289 //              finding best matches in logarithmic expected time,'' ACM Transactions
00290 //              on Mathematical Software, 3(3):209-226, 1977).  The procedure
00291 //              operates by a simple divide-and-conquer strategy, which determines
00292 //              an appropriate orthogonal cutting plane (see below), and splits
00293 //              the points.  When the number of points falls below the bucket size,
00294 //              we simply store the points in a leaf node's bucket.
00295 //
00296 //              One of the arguments is a pointer to a splitting routine,
00297 //              whose prototype is:
00298 //              
00299 //                              void split(
00300 //                                              ANNpointArray pa,  // complete point array
00301 //                                              ANNidxArray pidx,  // point array (permuted on return)
00302 //                                              ANNorthRect &bnds, // bounds of current cell
00303 //                                              int n,                     // number of points
00304 //                                              int dim,                   // dimension of space
00305 //                                              int &cut_dim,      // cutting dimension
00306 //                                              ANNcoord &cut_val, // cutting value
00307 //                                              int &n_lo)                 // no. of points on low side of cut
00308 //
00309 //              This procedure selects a cutting dimension and cutting value,
00310 //              partitions pa about these values, and returns the number of
00311 //              points on the low side of the cut.
00312 //----------------------------------------------------------------------
00313 
00314 ANNkd_ptr rkd_tree(                             // recursive construction of kd-tree
00315         ANNpointArray           pa,                             // point array
00316         ANNidxArray                     pidx,                   // point indices to store in subtree
00317         int                                     n,                              // number of points
00318         int                                     dim,                    // dimension of space
00319         int                                     bsp,                    // bucket space
00320         ANNorthRect                     &bnd_box,               // bounding box for current node
00321         ANNkd_splitter          splitter)               // splitting routine
00322 {
00323         if (n <= bsp) {                                         // n small, make a leaf node
00324                 if (n == 0)                                             // empty leaf node
00325                         return KD_TRIVIAL;                      // return (canonical) empty leaf
00326                 else                                                    // construct the node and return
00327                         return new ANNkd_leaf(n, pidx); 
00328         }
00329         else {                                                          // n large, make a splitting node
00330                 int cd;                                                 // cutting dimension
00331                 ANNcoord cv;                                    // cutting value
00332                 int n_lo;                                               // number on low side of cut
00333                 ANNkd_node *lo, *hi;                    // low and high children
00334 
00335                                                                                 // invoke splitting procedure
00336                 (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);
00337 
00338                 ANNcoord lv = bnd_box.lo[cd];   // save bounds for cutting dimension
00339                 ANNcoord hv = bnd_box.hi[cd];
00340 
00341                 bnd_box.hi[cd] = cv;                    // modify bounds for left subtree
00342                 lo = rkd_tree(                                  // build left subtree
00343                                 pa, pidx, n_lo,                 // ...from pidx[0..n_lo-1]
00344                                 dim, bsp, bnd_box, splitter);
00345                 bnd_box.hi[cd] = hv;                    // restore bounds
00346 
00347                 bnd_box.lo[cd] = cv;                    // modify bounds for right subtree
00348                 hi = rkd_tree(                                  // build right subtree
00349                                 pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
00350                                 dim, bsp, bnd_box, splitter);
00351                 bnd_box.lo[cd] = lv;                    // restore bounds
00352 
00353                                                                                 // create the splitting node
00354                 ANNkd_split *ptr = new ANNkd_split(cd, cv, lv, hv, lo, hi);
00355 
00356                 return ptr;                                             // return pointer to this node
00357         }
00358 } 
00359 
00360 //----------------------------------------------------------------------
00361 // kd-tree constructor
00362 //              This is the main constructor for kd-trees given a set of points.
00363 //              It first builds a skeleton tree, then computes the bounding box
00364 //              of the data points, and then invokes rkd_tree() to actually
00365 //              build the tree, passing it the appropriate splitting routine.
00366 //----------------------------------------------------------------------
00367 
00368 ANNkd_tree::ANNkd_tree(                                 // construct from point array
00369         ANNpointArray           pa,                             // point array (with at least n pts)
00370         int                                     n,                              // number of points
00371         int                                     dd,                             // dimension
00372         int                                     bs,                             // bucket size
00373         ANNsplitRule            split)                  // splitting method
00374 {
00375         SkeletonTree(n, dd, bs);                        // set up the basic stuff
00376         pts = pa;                                                       // where the points are
00377         if (n == 0) return;                                     // no points--no sweat
00378 
00379         ANNorthRect bnd_box(dd);                        // bounding box for points
00380         annEnclRect(pa, pidx, n, dd, bnd_box);// construct bounding rectangle
00381                                                                                 // copy to tree structure
00382         bnd_box_lo = annCopyPt(dd, bnd_box.lo);
00383         bnd_box_hi = annCopyPt(dd, bnd_box.hi);
00384 
00385         switch (split) {                                        // build by rule
00386         case ANN_KD_STD:                                        // standard kd-splitting rule
00387                 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split);
00388                 break;
00389         case ANN_KD_MIDPT:                                      // midpoint split
00390                 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split);
00391                 break;
00392         case ANN_KD_FAIR:                                       // fair split
00393                 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split);
00394                 break;
00395         case ANN_KD_SUGGEST:                            // best (in our opinion)
00396         case ANN_KD_SL_MIDPT:                           // sliding midpoint split
00397                 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split);
00398                 break;
00399         case ANN_KD_SL_FAIR:                            // sliding fair split
00400                 root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_fair_split);
00401                 break;
00402         default:
00403                 annError("Illegal splitting method", ANNabort);
00404         }
00405 }

Generated on Wed Nov 23 19:00:20 2011 for FreeCAD by  doxygen 1.6.1