00001 //---------------------------------------------------------------------- 00002 // File: kd_search.cpp 00003 // Programmer: Sunil Arya and David Mount 00004 // Description: Standard kd-tree search 00005 // Last modified: 01/04/05 (Version 1.0) 00006 //---------------------------------------------------------------------- 00007 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and 00008 // David Mount. All Rights Reserved. 00009 // 00010 // This software and related documentation is part of the Approximate 00011 // Nearest Neighbor Library (ANN). This software is provided under 00012 // the provisions of the Lesser GNU Public License (LGPL). See the 00013 // file ../ReadMe.txt for further information. 00014 // 00015 // The University of Maryland (U.M.) and the authors make no 00016 // representations about the suitability or fitness of this software for 00017 // any purpose. It is provided "as is" without express or implied 00018 // warranty. 00019 //---------------------------------------------------------------------- 00020 // History: 00021 // Revision 0.1 03/04/98 00022 // Initial release 00023 // Revision 1.0 04/01/05 00024 // Changed names LO, HI to ANN_LO, ANN_HI 00025 //---------------------------------------------------------------------- 00026 00027 #include "kd_search.h" // kd-search declarations 00028 00029 //---------------------------------------------------------------------- 00030 // Approximate nearest neighbor searching by kd-tree search 00031 // The kd-tree is searched for an approximate nearest neighbor. 00032 // The point is returned through one of the arguments, and the 00033 // distance returned is the squared distance to this point. 00034 // 00035 // The method used for searching the kd-tree is an approximate 00036 // adaptation of the search algorithm described by Friedman, 00037 // Bentley, and Finkel, ``An algorithm for finding best matches 00038 // in logarithmic expected time,'' ACM Transactions on Mathematical 00039 // Software, 3(3):209-226, 1977). 00040 // 00041 // The algorithm operates recursively. When first encountering a 00042 // node of the kd-tree we first visit the child which is closest to 00043 // the query point. On return, we decide whether we want to visit 00044 // the other child. If the box containing the other child exceeds 00045 // 1/(1+eps) times the current best distance, then we skip it (since 00046 // any point found in this child cannot be closer to the query point 00047 // by more than this factor.) Otherwise, we visit it recursively. 00048 // The distance between a box and the query point is computed exactly 00049 // (not approximated as is often done in kd-tree), using incremental 00050 // distance updates, as described by Arya and Mount in ``Algorithms 00051 // for fast vector quantization,'' Proc. of DCC '93: Data Compression 00052 // Conference, eds. J. A. Storer and M. Cohn, IEEE Press, 1993, 00053 // 381-390. 00054 // 00055 // The main entry points is annkSearch() which sets things up and 00056 // then call the recursive routine ann_search(). This is a recursive 00057 // routine which performs the processing for one node in the kd-tree. 00058 // There are two versions of this virtual procedure, one for splitting 00059 // nodes and one for leaves. When a splitting node is visited, we 00060 // determine which child to visit first (the closer one), and visit 00061 // the other child on return. When a leaf is visited, we compute 00062 // the distances to the points in the buckets, and update information 00063 // on the closest points. 00064 // 00065 // Some trickery is used to incrementally update the distance from 00066 // a kd-tree rectangle to the query point. This comes about from 00067 // the fact that which each successive split, only one component 00068 // (along the dimension that is split) of the squared distance to 00069 // the child rectangle is different from the squared distance to 00070 // the parent rectangle. 00071 //---------------------------------------------------------------------- 00072 00073 //---------------------------------------------------------------------- 00074 // To keep argument lists short, a number of global variables 00075 // are maintained which are common to all the recursive calls. 00076 // These are given below. 00077 //---------------------------------------------------------------------- 00078 00079 int ANNkdDim; // dimension of space 00080 ANNpoint ANNkdQ; // query point 00081 double ANNkdMaxErr; // max tolerable squared error 00082 ANNpointArray ANNkdPts; // the points 00083 ANNmin_k *ANNkdPointMK; // set of k closest points 00084 00085 //---------------------------------------------------------------------- 00086 // annkSearch - search for the k nearest neighbors 00087 //---------------------------------------------------------------------- 00088 00089 void ANNkd_tree::annkSearch( 00090 ANNpoint q, // the query point 00091 int k, // number of near neighbors to return 00092 ANNidxArray nn_idx, // nearest neighbor indices (returned) 00093 ANNdistArray dd, // the approximate nearest neighbor 00094 double eps) // the error bound 00095 { 00096 00097 ANNkdDim = dim; // copy arguments to static equivs 00098 ANNkdQ = q; 00099 ANNkdPts = pts; 00100 ANNptsVisited = 0; // initialize count of points visited 00101 00102 if (k > n_pts) { // too many near neighbors? 00103 annError("Requesting more near neighbors than data points", ANNabort); 00104 } 00105 00106 ANNkdMaxErr = ANN_POW(1.0 + eps); 00107 ANN_FLOP(2) // increment floating op count 00108 00109 ANNkdPointMK = new ANNmin_k(k); // create set for closest k points 00110 // search starting at the root 00111 root->ann_search(annBoxDistance(q, bnd_box_lo, bnd_box_hi, dim)); 00112 00113 for (int i = 0; i < k; i++) { // extract the k-th closest points 00114 dd[i] = ANNkdPointMK->ith_smallest_key(i); 00115 nn_idx[i] = ANNkdPointMK->ith_smallest_info(i); 00116 } 00117 delete ANNkdPointMK; // deallocate closest point set 00118 } 00119 00120 //---------------------------------------------------------------------- 00121 // kd_split::ann_search - search a splitting node 00122 //---------------------------------------------------------------------- 00123 00124 void ANNkd_split::ann_search(ANNdist box_dist) 00125 { 00126 // check dist calc term condition 00127 if (ANNmaxPtsVisited != 0 && ANNptsVisited > ANNmaxPtsVisited) return; 00128 00129 // distance to cutting plane 00130 ANNcoord cut_diff = ANNkdQ[cut_dim] - cut_val; 00131 00132 if (cut_diff < 0) { // left of cutting plane 00133 child[ANN_LO]->ann_search(box_dist);// visit closer child first 00134 00135 ANNcoord box_diff = cd_bnds[ANN_LO] - ANNkdQ[cut_dim]; 00136 if (box_diff < 0) // within bounds - ignore 00137 box_diff = 0; 00138 // distance to further box 00139 box_dist = (ANNdist) ANN_SUM(box_dist, 00140 ANN_DIFF(ANN_POW(box_diff), ANN_POW(cut_diff))); 00141 00142 // visit further child if close enough 00143 if (box_dist * ANNkdMaxErr < ANNkdPointMK->max_key()) 00144 child[ANN_HI]->ann_search(box_dist); 00145 00146 } 00147 else { // right of cutting plane 00148 child[ANN_HI]->ann_search(box_dist);// visit closer child first 00149 00150 ANNcoord box_diff = ANNkdQ[cut_dim] - cd_bnds[ANN_HI]; 00151 if (box_diff < 0) // within bounds - ignore 00152 box_diff = 0; 00153 // distance to further box 00154 box_dist = (ANNdist) ANN_SUM(box_dist, 00155 ANN_DIFF(ANN_POW(box_diff), ANN_POW(cut_diff))); 00156 00157 // visit further child if close enough 00158 if (box_dist * ANNkdMaxErr < ANNkdPointMK->max_key()) 00159 child[ANN_LO]->ann_search(box_dist); 00160 00161 } 00162 ANN_FLOP(10) // increment floating ops 00163 ANN_SPL(1) // one more splitting node visited 00164 } 00165 00166 //---------------------------------------------------------------------- 00167 // kd_leaf::ann_search - search points in a leaf node 00168 // Note: The unreadability of this code is the result of 00169 // some fine tuning to replace indexing by pointer operations. 00170 //---------------------------------------------------------------------- 00171 00172 void ANNkd_leaf::ann_search(ANNdist box_dist) 00173 { 00174 register ANNdist dist; // distance to data point 00175 register ANNcoord* pp; // data coordinate pointer 00176 register ANNcoord* qq; // query coordinate pointer 00177 register ANNdist min_dist; // distance to k-th closest point 00178 register ANNcoord t; 00179 register int d; 00180 00181 min_dist = ANNkdPointMK->max_key(); // k-th smallest distance so far 00182 00183 for (int i = 0; i < n_pts; i++) { // check points in bucket 00184 00185 pp = ANNkdPts[bkt[i]]; // first coord of next data point 00186 qq = ANNkdQ; // first coord of query point 00187 dist = 0; 00188 00189 for(d = 0; d < ANNkdDim; d++) { 00190 ANN_COORD(1) // one more coordinate hit 00191 ANN_FLOP(4) // increment floating ops 00192 00193 t = *(qq++) - *(pp++); // compute length and adv coordinate 00194 // exceeds dist to k-th smallest? 00195 if( (dist = ANN_SUM(dist, ANN_POW(t))) > min_dist) { 00196 break; 00197 } 00198 } 00199 00200 if (d >= ANNkdDim && // among the k best? 00201 (ANN_ALLOW_SELF_MATCH || dist!=0)) { // and no self-match problem 00202 // add it to the list 00203 ANNkdPointMK->insert(dist, bkt[i]); 00204 min_dist = ANNkdPointMK->max_key(); 00205 } 00206 } 00207 ANN_LEAF(1) // one more leaf node visited 00208 ANN_PTS(n_pts) // increment points visited 00209 ANNptsVisited += n_pts; // increment number of points visited 00210 }