00001 //---------------------------------------------------------------------- 00002 // File: kd_util.cpp 00003 // Programmer: Sunil Arya and David Mount 00004 // Description: Common utilities for kd-trees 00005 // Last modified: 01/04/05 (Version 1.0) 00006 //---------------------------------------------------------------------- 00007 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and 00008 // David Mount. All Rights Reserved. 00009 // 00010 // This software and related documentation is part of the Approximate 00011 // Nearest Neighbor Library (ANN). This software is provided under 00012 // the provisions of the Lesser GNU Public License (LGPL). See the 00013 // file ../ReadMe.txt for further information. 00014 // 00015 // The University of Maryland (U.M.) and the authors make no 00016 // representations about the suitability or fitness of this software for 00017 // any purpose. It is provided "as is" without express or implied 00018 // warranty. 00019 //---------------------------------------------------------------------- 00020 // History: 00021 // Revision 0.1 03/04/98 00022 // Initial release 00023 //---------------------------------------------------------------------- 00024 00025 #include "kd_util.h" // kd-utility declarations 00026 00027 #include <ANN/ANNperf.h> // performance evaluation 00028 00029 //---------------------------------------------------------------------- 00030 // The following routines are utility functions for manipulating 00031 // points sets, used in determining splitting planes for kd-tree 00032 // construction. 00033 //---------------------------------------------------------------------- 00034 00035 //---------------------------------------------------------------------- 00036 // NOTE: Virtually all point indexing is done through an index (i.e. 00037 // permutation) array pidx. Consequently, a reference to the d-th 00038 // coordinate of the i-th point is pa[pidx[i]][d]. The macro PA(i,d) 00039 // is a shorthand for this. 00040 //---------------------------------------------------------------------- 00041 // standard 2-d indirect indexing 00042 #define PA(i,d) (pa[pidx[(i)]][(d)]) 00043 // accessing a single point 00044 #define PP(i) (pa[pidx[(i)]]) 00045 00046 //---------------------------------------------------------------------- 00047 // annAspectRatio 00048 // Compute the aspect ratio (ratio of longest to shortest side) 00049 // of a rectangle. 00050 //---------------------------------------------------------------------- 00051 00052 double annAspectRatio( 00053 int dim, // dimension 00054 const ANNorthRect &bnd_box) // bounding cube 00055 { 00056 ANNcoord length = bnd_box.hi[0] - bnd_box.lo[0]; 00057 ANNcoord min_length = length; // min side length 00058 ANNcoord max_length = length; // max side length 00059 for (int d = 0; d < dim; d++) { 00060 length = bnd_box.hi[d] - bnd_box.lo[d]; 00061 if (length < min_length) min_length = length; 00062 if (length > max_length) max_length = length; 00063 } 00064 return max_length/min_length; 00065 } 00066 00067 //---------------------------------------------------------------------- 00068 // annEnclRect, annEnclCube 00069 // These utilities compute the smallest rectangle and cube enclosing 00070 // a set of points, respectively. 00071 //---------------------------------------------------------------------- 00072 00073 void annEnclRect( 00074 ANNpointArray pa, // point array 00075 ANNidxArray pidx, // point indices 00076 int n, // number of points 00077 int dim, // dimension 00078 ANNorthRect &bnds) // bounding cube (returned) 00079 { 00080 for (int d = 0; d < dim; d++) { // find smallest enclosing rectangle 00081 ANNcoord lo_bnd = PA(0,d); // lower bound on dimension d 00082 ANNcoord hi_bnd = PA(0,d); // upper bound on dimension d 00083 for (int i = 0; i < n; i++) { 00084 if (PA(i,d) < lo_bnd) lo_bnd = PA(i,d); 00085 else if (PA(i,d) > hi_bnd) hi_bnd = PA(i,d); 00086 } 00087 bnds.lo[d] = lo_bnd; 00088 bnds.hi[d] = hi_bnd; 00089 } 00090 } 00091 00092 void annEnclCube( // compute smallest enclosing cube 00093 ANNpointArray pa, // point array 00094 ANNidxArray pidx, // point indices 00095 int n, // number of points 00096 int dim, // dimension 00097 ANNorthRect &bnds) // bounding cube (returned) 00098 { 00099 int d; 00100 // compute smallest enclosing rect 00101 annEnclRect(pa, pidx, n, dim, bnds); 00102 00103 ANNcoord max_len = 0; // max length of any side 00104 for (d = 0; d < dim; d++) { // determine max side length 00105 ANNcoord len = bnds.hi[d] - bnds.lo[d]; 00106 if (len > max_len) { // update max_len if longest 00107 max_len = len; 00108 } 00109 } 00110 for (d = 0; d < dim; d++) { // grow sides to match max 00111 ANNcoord len = bnds.hi[d] - bnds.lo[d]; 00112 ANNcoord half_diff = (max_len - len) / 2; 00113 bnds.lo[d] -= half_diff; 00114 bnds.hi[d] += half_diff; 00115 } 00116 } 00117 00118 //---------------------------------------------------------------------- 00119 // annBoxDistance - utility routine which computes distance from point to 00120 // box (Note: most distances to boxes are computed using incremental 00121 // distance updates, not this function.) 00122 //---------------------------------------------------------------------- 00123 00124 ANNdist annBoxDistance( // compute distance from point to box 00125 const ANNpoint q, // the point 00126 const ANNpoint lo, // low point of box 00127 const ANNpoint hi, // high point of box 00128 int dim) // dimension of space 00129 { 00130 register ANNdist dist = 0.0; // sum of squared distances 00131 register ANNdist t; 00132 00133 for (register int d = 0; d < dim; d++) { 00134 if (q[d] < lo[d]) { // q is left of box 00135 t = ANNdist(lo[d]) - ANNdist(q[d]); 00136 dist = ANN_SUM(dist, ANN_POW(t)); 00137 } 00138 else if (q[d] > hi[d]) { // q is right of box 00139 t = ANNdist(q[d]) - ANNdist(hi[d]); 00140 dist = ANN_SUM(dist, ANN_POW(t)); 00141 } 00142 } 00143 ANN_FLOP(4*dim) // increment floating op count 00144 00145 return dist; 00146 } 00147 00148 //---------------------------------------------------------------------- 00149 // annSpread - find spread along given dimension 00150 // annMinMax - find min and max coordinates along given dimension 00151 // annMaxSpread - find dimension of max spread 00152 //---------------------------------------------------------------------- 00153 00154 ANNcoord annSpread( // compute point spread along dimension 00155 ANNpointArray pa, // point array 00156 ANNidxArray pidx, // point indices 00157 int n, // number of points 00158 int d) // dimension to check 00159 { 00160 ANNcoord min = PA(0,d); // compute max and min coords 00161 ANNcoord max = PA(0,d); 00162 for (int i = 1; i < n; i++) { 00163 ANNcoord c = PA(i,d); 00164 if (c < min) min = c; 00165 else if (c > max) max = c; 00166 } 00167 return (max - min); // total spread is difference 00168 } 00169 00170 void annMinMax( // compute min and max coordinates along dim 00171 ANNpointArray pa, // point array 00172 ANNidxArray pidx, // point indices 00173 int n, // number of points 00174 int d, // dimension to check 00175 ANNcoord &min, // minimum value (returned) 00176 ANNcoord &max) // maximum value (returned) 00177 { 00178 min = PA(0,d); // compute max and min coords 00179 max = PA(0,d); 00180 for (int i = 1; i < n; i++) { 00181 ANNcoord c = PA(i,d); 00182 if (c < min) min = c; 00183 else if (c > max) max = c; 00184 } 00185 } 00186 00187 int annMaxSpread( // compute dimension of max spread 00188 ANNpointArray pa, // point array 00189 ANNidxArray pidx, // point indices 00190 int n, // number of points 00191 int dim) // dimension of space 00192 { 00193 int max_dim = 0; // dimension of max spread 00194 ANNcoord max_spr = 0; // amount of max spread 00195 00196 if (n == 0) return max_dim; // no points, who cares? 00197 00198 for (int d = 0; d < dim; d++) { // compute spread along each dim 00199 ANNcoord spr = annSpread(pa, pidx, n, d); 00200 if (spr > max_spr) { // bigger than current max 00201 max_spr = spr; 00202 max_dim = d; 00203 } 00204 } 00205 return max_dim; 00206 } 00207 00208 //---------------------------------------------------------------------- 00209 // annMedianSplit - split point array about its median 00210 // Splits a subarray of points pa[0..n] about an element of given 00211 // rank (median: n_lo = n/2) with respect to dimension d. It places 00212 // the element of rank n_lo-1 correctly (because our splitting rule 00213 // takes the mean of these two). On exit, the array is permuted so 00214 // that: 00215 // 00216 // pa[0..n_lo-2][d] <= pa[n_lo-1][d] <= pa[n_lo][d] <= pa[n_lo+1..n-1][d]. 00217 // 00218 // The mean of pa[n_lo-1][d] and pa[n_lo][d] is returned as the 00219 // splitting value. 00220 // 00221 // All indexing is done indirectly through the index array pidx. 00222 // 00223 // This function uses the well known selection algorithm due to 00224 // C.A.R. Hoare. 00225 //---------------------------------------------------------------------- 00226 00227 // swap two points in pa array 00228 #define PASWAP(a,b) { int tmp = pidx[a]; pidx[a] = pidx[b]; pidx[b] = tmp; } 00229 00230 void annMedianSplit( 00231 ANNpointArray pa, // points to split 00232 ANNidxArray pidx, // point indices 00233 int n, // number of points 00234 int d, // dimension along which to split 00235 ANNcoord &cv, // cutting value 00236 int n_lo) // split into n_lo and n-n_lo 00237 { 00238 int l = 0; // left end of current subarray 00239 int r = n-1; // right end of current subarray 00240 while (l < r) { 00241 register int i = (r+l)/2; // select middle as pivot 00242 register int k; 00243 00244 if (PA(i,d) > PA(r,d)) // make sure last > pivot 00245 PASWAP(i,r) 00246 PASWAP(l,i); // move pivot to first position 00247 00248 ANNcoord c = PA(l,d); // pivot value 00249 i = l; 00250 k = r; 00251 for(;;) { // pivot about c 00252 while (PA(++i,d) < c) ; 00253 while (PA(--k,d) > c) ; 00254 if (i < k) PASWAP(i,k) else break; 00255 } 00256 PASWAP(l,k); // pivot winds up in location k 00257 00258 if (k > n_lo) r = k-1; // recurse on proper subarray 00259 else if (k < n_lo) l = k+1; 00260 else break; // got the median exactly 00261 } 00262 if (n_lo > 0) { // search for next smaller item 00263 ANNcoord c = PA(0,d); // candidate for max 00264 int k = 0; // candidate's index 00265 for (int i = 1; i < n_lo; i++) { 00266 if (PA(i,d) > c) { 00267 c = PA(i,d); 00268 k = i; 00269 } 00270 } 00271 PASWAP(n_lo-1, k); // max among pa[0..n_lo-1] to pa[n_lo-1] 00272 } 00273 // cut value is midpoint value 00274 cv = (PA(n_lo-1,d) + PA(n_lo,d))/2.0; 00275 } 00276 00277 //---------------------------------------------------------------------- 00278 // annPlaneSplit - split point array about a cutting plane 00279 // Split the points in an array about a given plane along a 00280 // given cutting dimension. On exit, br1 and br2 are set so 00281 // that: 00282 // 00283 // pa[ 0 ..br1-1] < cv 00284 // pa[br1..br2-1] == cv 00285 // pa[br2.. n -1] > cv 00286 // 00287 // All indexing is done indirectly through the index array pidx. 00288 // 00289 //---------------------------------------------------------------------- 00290 00291 void annPlaneSplit( // split points by a plane 00292 ANNpointArray pa, // points to split 00293 ANNidxArray pidx, // point indices 00294 int n, // number of points 00295 int d, // dimension along which to split 00296 ANNcoord cv, // cutting value 00297 int &br1, // first break (values < cv) 00298 int &br2) // second break (values == cv) 00299 { 00300 int l = 0; 00301 int r = n-1; 00302 for(;;) { // partition pa[0..n-1] about cv 00303 while (l < n && PA(l,d) < cv) l++; 00304 while (r >= 0 && PA(r,d) >= cv) r--; 00305 if (l > r) break; 00306 PASWAP(l,r); 00307 l++; r--; 00308 } 00309 br1 = l; // now: pa[0..br1-1] < cv <= pa[br1..n-1] 00310 r = n-1; 00311 for(;;) { // partition pa[br1..n-1] about cv 00312 while (l < n && PA(l,d) <= cv) l++; 00313 while (r >= br1 && PA(r,d) > cv) r--; 00314 if (l > r) break; 00315 PASWAP(l,r); 00316 l++; r--; 00317 } 00318 br2 = l; // now: pa[br1..br2-1] == cv < pa[br2..n-1] 00319 } 00320 00321 00322 //---------------------------------------------------------------------- 00323 // annBoxSplit - split point array about a orthogonal rectangle 00324 // Split the points in an array about a given orthogonal 00325 // rectangle. On exit, n_in is set to the number of points 00326 // that are inside (or on the boundary of) the rectangle. 00327 // 00328 // All indexing is done indirectly through the index array pidx. 00329 // 00330 //---------------------------------------------------------------------- 00331 00332 void annBoxSplit( // split points by a box 00333 ANNpointArray pa, // points to split 00334 ANNidxArray pidx, // point indices 00335 int n, // number of points 00336 int dim, // dimension of space 00337 ANNorthRect &box, // the box 00338 int &n_in) // number of points inside (returned) 00339 { 00340 int l = 0; 00341 int r = n-1; 00342 for(;;) { // partition pa[0..n-1] about box 00343 while (l < n && box.inside(dim, PP(l))) l++; 00344 while (r >= 0 && !box.inside(dim, PP(r))) r--; 00345 if (l > r) break; 00346 PASWAP(l,r); 00347 l++; r--; 00348 } 00349 n_in = l; // now: pa[0..n_in-1] inside and rest outside 00350 } 00351 00352 //---------------------------------------------------------------------- 00353 // annSplitBalance - compute balance factor for a given plane split 00354 // Balance factor is defined as the number of points lying 00355 // below the splitting value minus n/2 (median). Thus, a 00356 // median split has balance 0, left of this is negative and 00357 // right of this is positive. (The points are unchanged.) 00358 //---------------------------------------------------------------------- 00359 00360 int annSplitBalance( // determine balance factor of a split 00361 ANNpointArray pa, // points to split 00362 ANNidxArray pidx, // point indices 00363 int n, // number of points 00364 int d, // dimension along which to split 00365 ANNcoord cv) // cutting value 00366 { 00367 int n_lo = 0; 00368 for(int i = 0; i < n; i++) { // count number less than cv 00369 if (PA(i,d) < cv) n_lo++; 00370 } 00371 return n_lo - n/2; 00372 } 00373 00374 //---------------------------------------------------------------------- 00375 // annBox2Bnds - convert bounding box to list of bounds 00376 // Given two boxes, an inner box enclosed within a bounding 00377 // box, this routine determines all the sides for which the 00378 // inner box is strictly contained with the bounding box, 00379 // and adds an appropriate entry to a list of bounds. Then 00380 // we allocate storage for the final list of bounds, and return 00381 // the resulting list and its size. 00382 //---------------------------------------------------------------------- 00383 00384 void annBox2Bnds( // convert inner box to bounds 00385 const ANNorthRect &inner_box, // inner box 00386 const ANNorthRect &bnd_box, // enclosing box 00387 int dim, // dimension of space 00388 int &n_bnds, // number of bounds (returned) 00389 ANNorthHSArray &bnds) // bounds array (returned) 00390 { 00391 int i; 00392 n_bnds = 0; // count number of bounds 00393 for (i = 0; i < dim; i++) { 00394 if (inner_box.lo[i] > bnd_box.lo[i]) // low bound is inside 00395 n_bnds++; 00396 if (inner_box.hi[i] < bnd_box.hi[i]) // high bound is inside 00397 n_bnds++; 00398 } 00399 00400 bnds = new ANNorthHalfSpace[n_bnds]; // allocate appropriate size 00401 00402 int j = 0; 00403 for (i = 0; i < dim; i++) { // fill the array 00404 if (inner_box.lo[i] > bnd_box.lo[i]) { 00405 bnds[j].cd = i; 00406 bnds[j].cv = inner_box.lo[i]; 00407 bnds[j].sd = +1; 00408 j++; 00409 } 00410 if (inner_box.hi[i] < bnd_box.hi[i]) { 00411 bnds[j].cd = i; 00412 bnds[j].cv = inner_box.hi[i]; 00413 bnds[j].sd = -1; 00414 j++; 00415 } 00416 } 00417 } 00418 00419 //---------------------------------------------------------------------- 00420 // annBnds2Box - convert list of bounds to bounding box 00421 // Given an enclosing box and a list of bounds, this routine 00422 // computes the corresponding inner box. It is assumed that 00423 // the box points have been allocated already. 00424 //---------------------------------------------------------------------- 00425 00426 void annBnds2Box( 00427 const ANNorthRect &bnd_box, // enclosing box 00428 int dim, // dimension of space 00429 int n_bnds, // number of bounds 00430 ANNorthHSArray bnds, // bounds array 00431 ANNorthRect &inner_box) // inner box (returned) 00432 { 00433 annAssignRect(dim, inner_box, bnd_box); // copy bounding box to inner 00434 00435 for (int i = 0; i < n_bnds; i++) { 00436 bnds[i].project(inner_box.lo); // project each endpoint 00437 bnds[i].project(inner_box.hi); 00438 } 00439 }