kd_split.cpp

Go to the documentation of this file.
00001 //----------------------------------------------------------------------
00002 // File:                        kd_split.cpp
00003 // Programmer:          Sunil Arya and David Mount
00004 // Description:         Methods for splitting kd-trees
00005 // Last modified:       01/04/05 (Version 1.0)
00006 //----------------------------------------------------------------------
00007 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
00008 // David Mount.  All Rights Reserved.
00009 // 
00010 // This software and related documentation is part of the Approximate
00011 // Nearest Neighbor Library (ANN).  This software is provided under
00012 // the provisions of the Lesser GNU Public License (LGPL).  See the
00013 // file ../ReadMe.txt for further information.
00014 // 
00015 // The University of Maryland (U.M.) and the authors make no
00016 // representations about the suitability or fitness of this software for
00017 // any purpose.  It is provided "as is" without express or implied
00018 // warranty.
00019 //----------------------------------------------------------------------
00020 // History:
00021 //      Revision 0.1  03/04/98
00022 //              Initial release
00023 //      Revision 1.0  04/01/05
00024 //----------------------------------------------------------------------
00025 
00026 #include "kd_tree.h"                                    // kd-tree definitions
00027 #include "kd_util.h"                                    // kd-tree utilities
00028 #include "kd_split.h"                                   // splitting functions
00029 
00030 //----------------------------------------------------------------------
00031 //      Constants
00032 //----------------------------------------------------------------------
00033 
00034 const double ERR = 0.001;                               // a small value
00035 const double FS_ASPECT_RATIO = 3.0;             // maximum allowed aspect ratio
00036                                                                                 // in fair split. Must be >= 2.
00037 
00038 //----------------------------------------------------------------------
00039 //      kd_split - Bentley's standard splitting routine for kd-trees
00040 //              Find the dimension of the greatest spread, and split
00041 //              just before the median point along this dimension.
00042 //----------------------------------------------------------------------
00043 
00044 void kd_split(
00045         ANNpointArray           pa,                             // point array (permuted on return)
00046         ANNidxArray                     pidx,                   // point indices
00047         const ANNorthRect       &bnds,                  // bounding rectangle for cell
00048         int                                     n,                              // number of points
00049         int                                     dim,                    // dimension of space
00050         int                                     &cut_dim,               // cutting dimension (returned)
00051         ANNcoord                        &cut_val,               // cutting value (returned)
00052         int                                     &n_lo)                  // num of points on low side (returned)
00053 {
00054                                                                                 // find dimension of maximum spread
00055         cut_dim = annMaxSpread(pa, pidx, n, dim);
00056         n_lo = n/2;                                                     // median rank
00057                                                                                 // split about median
00058         annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo);
00059 }
00060 
00061 //----------------------------------------------------------------------
00062 //      midpt_split - midpoint splitting rule for box-decomposition trees
00063 //
00064 //              This is the simplest splitting rule that guarantees boxes
00065 //              of bounded aspect ratio.  It simply cuts the box with the
00066 //              longest side through its midpoint.  If there are ties, it
00067 //              selects the dimension with the maximum point spread.
00068 //
00069 //              WARNING: This routine (while simple) doesn't seem to work
00070 //              well in practice in high dimensions, because it tends to
00071 //              generate a large number of trivial and/or unbalanced splits.
00072 //              Either kd_split(), sl_midpt_split(), or fair_split() are
00073 //              recommended, instead.
00074 //----------------------------------------------------------------------
00075 
00076 void midpt_split(
00077         ANNpointArray           pa,                             // point array
00078         ANNidxArray                     pidx,                   // point indices (permuted on return)
00079         const ANNorthRect       &bnds,                  // bounding rectangle for cell
00080         int                                     n,                              // number of points
00081         int                                     dim,                    // dimension of space
00082         int                                     &cut_dim,               // cutting dimension (returned)
00083         ANNcoord                        &cut_val,               // cutting value (returned)
00084         int                                     &n_lo)                  // num of points on low side (returned)
00085 {
00086         int d;
00087 
00088         ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
00089         for (d = 1; d < dim; d++) {                     // find length of longest box side
00090                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00091                 if (length > max_length) {
00092                         max_length = length;
00093                 }
00094         }
00095         ANNcoord max_spread = -1;                       // find long side with most spread
00096         for (d = 0; d < dim; d++) {
00097                                                                                 // is it among longest?
00098                 if (double(bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) {
00099                                                                                 // compute its spread
00100                         ANNcoord spr = annSpread(pa, pidx, n, d);
00101                         if (spr > max_spread) {         // is it max so far?
00102                                 max_spread = spr;
00103                                 cut_dim = d;
00104                         }
00105                 }
00106         }
00107                                                                                 // split along cut_dim at midpoint
00108         cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim]) / 2;
00109                                                                                 // permute points accordingly
00110         int br1, br2;
00111         annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00112         //------------------------------------------------------------------
00113         //      On return:              pa[0..br1-1] < cut_val
00114         //                                      pa[br1..br2-1] == cut_val
00115         //                                      pa[br2..n-1] > cut_val
00116         //
00117         //      We can set n_lo to any value in the range [br1..br2].
00118         //      We choose split so that points are most evenly divided.
00119         //------------------------------------------------------------------
00120         if (br1 > n/2) n_lo = br1;
00121         else if (br2 < n/2) n_lo = br2;
00122         else n_lo = n/2;
00123 }
00124 
00125 //----------------------------------------------------------------------
00126 //      sl_midpt_split - sliding midpoint splitting rule
00127 //
00128 //              This is a modification of midpt_split, which has the nonsensical
00129 //              name "sliding midpoint".  The idea is that we try to use the
00130 //              midpoint rule, by bisecting the longest side.  If there are
00131 //              ties, the dimension with the maximum spread is selected.  If,
00132 //              however, the midpoint split produces a trivial split (no points
00133 //              on one side of the splitting plane) then we slide the splitting
00134 //              (maintaining its orientation) until it produces a nontrivial
00135 //              split. For example, if the splitting plane is along the x-axis,
00136 //              and all the data points have x-coordinate less than the x-bisector,
00137 //              then the split is taken along the maximum x-coordinate of the
00138 //              data points.
00139 //
00140 //              Intuitively, this rule cannot generate trivial splits, and
00141 //              hence avoids midpt_split's tendency to produce trees with
00142 //              a very large number of nodes.
00143 //
00144 //----------------------------------------------------------------------
00145 
00146 void sl_midpt_split(
00147         ANNpointArray           pa,                             // point array
00148         ANNidxArray                     pidx,                   // point indices (permuted on return)
00149         const ANNorthRect       &bnds,                  // bounding rectangle for cell
00150         int                                     n,                              // number of points
00151         int                                     dim,                    // dimension of space
00152         int                                     &cut_dim,               // cutting dimension (returned)
00153         ANNcoord                        &cut_val,               // cutting value (returned)
00154         int                                     &n_lo)                  // num of points on low side (returned)
00155 {
00156         int d;
00157 
00158         ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
00159         for (d = 1; d < dim; d++) {                     // find length of longest box side
00160                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00161                 if (length > max_length) {
00162                         max_length = length;
00163                 }
00164         }
00165         ANNcoord max_spread = -1;                       // find long side with most spread
00166         for (d = 0; d < dim; d++) {
00167                                                                                 // is it among longest?
00168                 if ((bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) {
00169                                                                                 // compute its spread
00170                         ANNcoord spr = annSpread(pa, pidx, n, d);
00171                         if (spr > max_spread) {         // is it max so far?
00172                                 max_spread = spr;
00173                                 cut_dim = d;
00174                         }
00175                 }
00176         }
00177                                                                                 // ideal split at midpoint
00178         ANNcoord ideal_cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim])/2;
00179 
00180         ANNcoord min, max;
00181         annMinMax(pa, pidx, n, cut_dim, min, max);      // find min/max coordinates
00182 
00183         if (ideal_cut_val < min)                        // slide to min or max as needed
00184                 cut_val = min;
00185         else if (ideal_cut_val > max)
00186                 cut_val = max;
00187         else
00188                 cut_val = ideal_cut_val;
00189 
00190                                                                                 // permute points accordingly
00191         int br1, br2;
00192         annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00193         //------------------------------------------------------------------
00194         //      On return:              pa[0..br1-1] < cut_val
00195         //                                      pa[br1..br2-1] == cut_val
00196         //                                      pa[br2..n-1] > cut_val
00197         //
00198         //      We can set n_lo to any value in the range [br1..br2] to satisfy
00199         //      the exit conditions of the procedure.
00200         //
00201         //      if ideal_cut_val < min (implying br2 >= 1),
00202         //                      then we select n_lo = 1 (so there is one point on left) and
00203         //      if ideal_cut_val > max (implying br1 <= n-1),
00204         //                      then we select n_lo = n-1 (so there is one point on right).
00205         //      Otherwise, we select n_lo as close to n/2 as possible within
00206         //                      [br1..br2].
00207         //------------------------------------------------------------------
00208         if (ideal_cut_val < min) n_lo = 1;
00209         else if (ideal_cut_val > max) n_lo = n-1;
00210         else if (br1 > n/2) n_lo = br1;
00211         else if (br2 < n/2) n_lo = br2;
00212         else n_lo = n/2;
00213 }
00214 
00215 //----------------------------------------------------------------------
00216 //      fair_split - fair-split splitting rule
00217 //
00218 //              This is a compromise between the kd-tree splitting rule (which
00219 //              always splits data points at their median) and the midpoint
00220 //              splitting rule (which always splits a box through its center.
00221 //              The goal of this procedure is to achieve both nicely balanced
00222 //              splits, and boxes of bounded aspect ratio.
00223 //
00224 //              A constant FS_ASPECT_RATIO is defined. Given a box, those sides
00225 //              which can be split so that the ratio of the longest to shortest
00226 //              side does not exceed ASPECT_RATIO are identified.  Among these
00227 //              sides, we select the one in which the points have the largest
00228 //              spread. We then split the points in a manner which most evenly
00229 //              distributes the points on either side of the splitting plane,
00230 //              subject to maintaining the bound on the ratio of long to short
00231 //              sides. To determine that the aspect ratio will be preserved,
00232 //              we determine the longest side (other than this side), and
00233 //              determine how narrowly we can cut this side, without causing the
00234 //              aspect ratio bound to be exceeded (small_piece).
00235 //
00236 //              This procedure is more robust than either kd_split or midpt_split,
00237 //              but is more complicated as well.  When point distribution is
00238 //              extremely skewed, this degenerates to midpt_split (actually
00239 //              1/3 point split), and when the points are most evenly distributed,
00240 //              this degenerates to kd-split.
00241 //----------------------------------------------------------------------
00242 
00243 void fair_split(
00244         ANNpointArray           pa,                             // point array
00245         ANNidxArray                     pidx,                   // point indices (permuted on return)
00246         const ANNorthRect       &bnds,                  // bounding rectangle for cell
00247         int                                     n,                              // number of points
00248         int                                     dim,                    // dimension of space
00249         int                                     &cut_dim,               // cutting dimension (returned)
00250         ANNcoord                        &cut_val,               // cutting value (returned)
00251         int                                     &n_lo)                  // num of points on low side (returned)
00252 {
00253         int d;
00254         ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
00255         cut_dim = 0;
00256         for (d = 1; d < dim; d++) {                     // find length of longest box side
00257                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00258                 if (length > max_length) {
00259                         max_length = length;
00260                         cut_dim = d;
00261                 }
00262         }
00263 
00264         ANNcoord max_spread = 0;                        // find legal cut with max spread
00265         cut_dim = 0;
00266         for (d = 0; d < dim; d++) {
00267                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00268                                                                                 // is this side midpoint splitable
00269                                                                                 // without violating aspect ratio?
00270                 if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) {
00271                                                                                 // compute spread along this dim
00272                         ANNcoord spr = annSpread(pa, pidx, n, d);
00273                         if (spr > max_spread) {         // best spread so far
00274                                 max_spread = spr;
00275                                 cut_dim = d;                    // this is dimension to cut
00276                         }
00277                 }
00278         }
00279 
00280         max_length = 0;                                         // find longest side other than cut_dim
00281         for (d = 0; d < dim; d++) {
00282                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00283                 if (d != cut_dim && length > max_length)
00284                         max_length = length;
00285         }
00286                                                                                 // consider most extreme splits
00287         ANNcoord small_piece = max_length / FS_ASPECT_RATIO;
00288         ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut
00289         ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut
00290 
00291         int br1, br2;
00292                                                                                 // is median below lo_cut ?
00293         if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) {
00294                 cut_val = lo_cut;                               // cut at lo_cut
00295                 annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00296                 n_lo = br1;
00297         }
00298                                                                                 // is median above hi_cut?
00299         else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) {
00300                 cut_val = hi_cut;                               // cut at hi_cut
00301                 annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00302                 n_lo = br2;
00303         }
00304         else {                                                          // median cut preserves asp ratio
00305                 n_lo = n/2;                                             // split about median
00306                 annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo);
00307         }
00308 }
00309 
00310 //----------------------------------------------------------------------
00311 //      sl_fair_split - sliding fair split splitting rule
00312 //
00313 //              Sliding fair split is a splitting rule that combines the
00314 //              strengths of both fair split with sliding midpoint split.
00315 //              Fair split tends to produce balanced splits when the points
00316 //              are roughly uniformly distributed, but it can produce many
00317 //              trivial splits when points are highly clustered.  Sliding
00318 //              midpoint never produces trivial splits, and shrinks boxes
00319 //              nicely if points are highly clustered, but it may produce
00320 //              rather unbalanced splits when points are unclustered but not
00321 //              quite uniform.
00322 //
00323 //              Sliding fair split is based on the theory that there are two
00324 //              types of splits that are "good": balanced splits that produce
00325 //              fat boxes, and unbalanced splits provided the cell with fewer
00326 //              points is fat.
00327 //
00328 //              This splitting rule operates by first computing the longest
00329 //              side of the current bounding box.  Then it asks which sides
00330 //              could be split (at the midpoint) and still satisfy the aspect
00331 //              ratio bound with respect to this side.  Among these, it selects
00332 //              the side with the largest spread (as fair split would).  It
00333 //              then considers the most extreme cuts that would be allowed by
00334 //              the aspect ratio bound.  This is done by dividing the longest
00335 //              side of the box by the aspect ratio bound.      If the median cut
00336 //              lies between these extreme cuts, then we use the median cut.
00337 //              If not, then consider the extreme cut that is closer to the
00338 //              median.  If all the points lie to one side of this cut, then
00339 //              we slide the cut until it hits the first point.  This may
00340 //              violate the aspect ratio bound, but will never generate empty
00341 //              cells.  However the sibling of every such skinny cell is fat,
00342 //              and hence packing arguments still apply.
00343 //
00344 //----------------------------------------------------------------------
00345 
00346 void sl_fair_split(
00347         ANNpointArray           pa,                             // point array
00348         ANNidxArray                     pidx,                   // point indices (permuted on return)
00349         const ANNorthRect       &bnds,                  // bounding rectangle for cell
00350         int                                     n,                              // number of points
00351         int                                     dim,                    // dimension of space
00352         int                                     &cut_dim,               // cutting dimension (returned)
00353         ANNcoord                        &cut_val,               // cutting value (returned)
00354         int                                     &n_lo)                  // num of points on low side (returned)
00355 {
00356         int d;
00357         ANNcoord min, max;                                      // min/max coordinates
00358         int br1, br2;                                           // split break points
00359 
00360         ANNcoord max_length = bnds.hi[0] - bnds.lo[0];
00361         cut_dim = 0;
00362         for (d = 1; d < dim; d++) {                     // find length of longest box side
00363                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00364                 if (length      > max_length) {
00365                         max_length = length;
00366                         cut_dim = d;
00367                 }
00368         }
00369 
00370         ANNcoord max_spread = 0;                        // find legal cut with max spread
00371         cut_dim = 0;
00372         for (d = 0; d < dim; d++) {
00373                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00374                                                                                 // is this side midpoint splitable
00375                                                                                 // without violating aspect ratio?
00376                 if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) {
00377                                                                                 // compute spread along this dim
00378                         ANNcoord spr = annSpread(pa, pidx, n, d);
00379                         if (spr > max_spread) {         // best spread so far
00380                                 max_spread = spr;
00381                                 cut_dim = d;                    // this is dimension to cut
00382                         }
00383                 }
00384         }
00385 
00386         max_length = 0;                                         // find longest side other than cut_dim
00387         for (d = 0; d < dim; d++) {
00388                 ANNcoord length = bnds.hi[d] - bnds.lo[d];
00389                 if (d != cut_dim && length > max_length)
00390                         max_length = length;
00391         }
00392                                                                                 // consider most extreme splits
00393         ANNcoord small_piece = max_length / FS_ASPECT_RATIO;
00394         ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut
00395         ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut
00396                                                                                 // find min and max along cut_dim
00397         annMinMax(pa, pidx, n, cut_dim, min, max);
00398                                                                                 // is median below lo_cut?
00399         if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) {
00400                 if (max > lo_cut) {                             // are any points above lo_cut?
00401                         cut_val = lo_cut;                       // cut at lo_cut
00402                         annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00403                         n_lo = br1;                                     // balance if there are ties
00404                 }
00405                 else {                                                  // all points below lo_cut
00406                         cut_val = max;                          // cut at max value
00407                         annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00408                         n_lo = n-1;
00409                 }
00410         }
00411                                                                                 // is median above hi_cut?
00412         else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) {
00413                 if (min < hi_cut) {                             // are any points below hi_cut?
00414                         cut_val = hi_cut;                       // cut at hi_cut
00415                         annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00416                         n_lo = br2;                                     // balance if there are ties
00417                 }
00418                 else {                                                  // all points above hi_cut
00419                         cut_val = min;                          // cut at min value
00420                         annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00421                         n_lo = 1;
00422                 }
00423         }
00424         else {                                                          // median cut is good enough
00425                 n_lo = n/2;                                             // split about median
00426                 annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo);
00427         }
00428 }

Generated on Wed Nov 23 19:00:20 2011 for FreeCAD by  doxygen 1.6.1