ANN.h

Go to the documentation of this file.
00001 //----------------------------------------------------------------------
00002 // File:                        ANN.h
00003 // Programmer:          Sunil Arya and David Mount
00004 // Last modified:       05/03/05 (Release 1.1)
00005 // Description:         Basic include file for approximate nearest
00006 //                                      neighbor searching.
00007 //----------------------------------------------------------------------
00008 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
00009 // David Mount.  All Rights Reserved.
00010 // 
00011 // This software and related documentation is part of the Approximate
00012 // Nearest Neighbor Library (ANN).  This software is provided under
00013 // the provisions of the Lesser GNU Public License (LGPL).  See the
00014 // file ../ReadMe.txt for further information.
00015 // 
00016 // The University of Maryland (U.M.) and the authors make no
00017 // representations about the suitability or fitness of this software for
00018 // any purpose.  It is provided "as is" without express or implied
00019 // warranty.
00020 //----------------------------------------------------------------------
00021 // History:
00022 //      Revision 0.1  03/04/98
00023 //              Initial release
00024 //      Revision 1.0  04/01/05
00025 //              Added copyright and revision information
00026 //              Added ANNcoordPrec for coordinate precision.
00027 //              Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
00028 //              Cleaned up C++ structure for modern compilers
00029 //      Revision 1.1  05/03/05
00030 //              Added fixed-radius k-NN searching
00031 //----------------------------------------------------------------------
00032 
00033 //----------------------------------------------------------------------
00034 // ANN - approximate nearest neighbor searching
00035 //      ANN is a library for approximate nearest neighbor searching,
00036 //      based on the use of standard and priority search in kd-trees
00037 //      and balanced box-decomposition (bbd) trees. Here are some
00038 //      references to the main algorithmic techniques used here:
00039 //
00040 //              kd-trees:
00041 //                      Friedman, Bentley, and Finkel, ``An algorithm for finding
00042 //                              best matches in logarithmic expected time,'' ACM
00043 //                              Transactions on Mathematical Software, 3(3):209-226, 1977.
00044 //
00045 //              Priority search in kd-trees:
00046 //                      Arya and Mount, ``Algorithms for fast vector quantization,''
00047 //                              Proc. of DCC '93: Data Compression Conference, eds. J. A.
00048 //                              Storer and M. Cohn, IEEE Press, 1993, 381-390.
00049 //
00050 //              Approximate nearest neighbor search and bbd-trees:
00051 //                      Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
00052 //                              algorithm for approximate nearest neighbor searching,''
00053 //                              5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
00054 //                              1994, 573-582.
00055 //----------------------------------------------------------------------
00056 
00057 #ifndef ANN_H
00058 #define ANN_H
00059 
00060 #ifdef WIN32
00061   //----------------------------------------------------------------------
00062   // For Microsoft Visual C++, externally accessible symbols must be
00063   // explicitly indicated with DLL_API, which is somewhat like "extern."
00064   //
00065   // The following ifdef block is the standard way of creating macros
00066   // which make exporting from a DLL simpler. All files within this DLL
00067   // are compiled with the DLL_EXPORTS preprocessor symbol defined on the
00068   // command line. In contrast, projects that use (or import) the DLL
00069   // objects do not define the DLL_EXPORTS symbol. This way any other
00070   // project whose source files include this file see DLL_API functions as
00071   // being imported from a DLL, wheras this DLL sees symbols defined with
00072   // this macro as being exported.
00073   //----------------------------------------------------------------------
00074   #ifdef DLL_EXPORTS
00075          #define DLL_API __declspec(dllexport)
00076   #else
00077         #define DLL_API __declspec(dllimport)
00078   #endif
00079   //----------------------------------------------------------------------
00080   // DLL_API is ignored for all other systems
00081   //----------------------------------------------------------------------
00082 #else
00083   #define DLL_API
00084 #endif
00085 
00086 //----------------------------------------------------------------------
00087 //  basic includes
00088 //----------------------------------------------------------------------
00089 
00090 #include <cmath>                        // math includes
00091 #include <iostream>                     // I/O streams
00092 
00093 //----------------------------------------------------------------------
00094 // Limits
00095 // There are a number of places where we use the maximum double value as
00096 // default initializers (and others may be used, depending on the
00097 // data/distance representation). These can usually be found in limits.h
00098 // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
00099 //
00100 // Not all systems have these files.  If you are using such a system,
00101 // you should set the preprocessor symbol ANN_NO_LIMITS_H when
00102 // compiling, and modify the statements below to generate the
00103 // appropriate value. For practical purposes, this does not need to be
00104 // the maximum double value. It is sufficient that it be at least as
00105 // large than the maximum squared distance between between any two
00106 // points.
00107 //----------------------------------------------------------------------
00108 #ifdef ANN_NO_LIMITS_H                                  // limits.h unavailable
00109   #include <cvalues>                                    // replacement for limits.h
00110   const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double
00111 #else
00112   #include <climits>
00113   #include <cfloat>
00114   const double ANN_DBL_MAX = DBL_MAX;
00115 #endif
00116 
00117 #define ANNversion              "1.1.1"                 // ANN version and information
00118 #define ANNversionCmt   ""
00119 #define ANNcopyright    "David M. Mount and Sunil Arya"
00120 #define ANNlatestRev    "Aug 4, 2006"
00121 
00122 //----------------------------------------------------------------------
00123 //      ANNbool
00124 //      This is a simple boolean type. Although ANSI C++ is supposed
00125 //      to support the type bool, some compilers do not have it.
00126 //----------------------------------------------------------------------
00127 
00128 enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++)
00129 
00130 //----------------------------------------------------------------------
00131 //      ANNcoord, ANNdist
00132 //              ANNcoord and ANNdist are the types used for representing
00133 //              point coordinates and distances.  They can be modified by the
00134 //              user, with some care.  It is assumed that they are both numeric
00135 //              types, and that ANNdist is generally of an equal or higher type
00136 //              from ANNcoord.  A variable of type ANNdist should be large
00137 //              enough to store the sum of squared components of a variable
00138 //              of type ANNcoord for the number of dimensions needed in the
00139 //              application.  For example, the following combinations are
00140 //              legal:
00141 //
00142 //              ANNcoord                ANNdist
00143 //              ---------               -------------------------------
00144 //              short                   short, int, long, float, double
00145 //              int                             int, long, float, double
00146 //              long                    long, float, double
00147 //              float                   float, double
00148 //              double                  double
00149 //
00150 //              It is the user's responsibility to make sure that overflow does
00151 //              not occur in distance calculation.
00152 //----------------------------------------------------------------------
00153 
00154 typedef double  ANNcoord;                               // coordinate data type
00155 typedef double  ANNdist;                                // distance data type
00156 
00157 //----------------------------------------------------------------------
00158 //      ANNidx
00159 //              ANNidx is a point index.  When the data structure is built, the
00160 //              points are given as an array.  Nearest neighbor results are
00161 //              returned as an integer index into this array.  To make it
00162 //              clearer when this is happening, we define the integer type
00163 //              ANNidx.  Indexing starts from 0.
00164 //              
00165 //              For fixed-radius near neighbor searching, it is possible that
00166 //              there are not k nearest neighbors within the search radius.  To
00167 //              indicate this, the algorithm returns ANN_NULL_IDX as its result.
00168 //              It should be distinguishable from any valid array index.
00169 //----------------------------------------------------------------------
00170 
00171 typedef int             ANNidx;                                 // point index
00172 const ANNidx    ANN_NULL_IDX = -1;              // a NULL point index
00173 
00174 //----------------------------------------------------------------------
00175 //      Infinite distance:
00176 //              The code assumes that there is an "infinite distance" which it
00177 //              uses to initialize distances before performing nearest neighbor
00178 //              searches.  It should be as larger or larger than any legitimate
00179 //              nearest neighbor distance.
00180 //
00181 //              On most systems, these should be found in the standard include
00182 //              file <limits.h> or possibly <float.h>.  If you do not have these
00183 //              file, some suggested values are listed below, assuming 64-bit
00184 //              long, 32-bit int and 16-bit short.
00185 //
00186 //              ANNdist ANN_DIST_INF    Values (see <limits.h> or <float.h>)
00187 //              ------- ------------    ------------------------------------
00188 //              double  DBL_MAX                 1.79769313486231570e+308
00189 //              float   FLT_MAX                 3.40282346638528860e+38
00190 //              long    LONG_MAX                0x7fffffffffffffff
00191 //              int             INT_MAX                 0x7fffffff
00192 //              short   SHRT_MAX                0x7fff
00193 //----------------------------------------------------------------------
00194 
00195 const ANNdist   ANN_DIST_INF = ANN_DBL_MAX;
00196 
00197 //----------------------------------------------------------------------
00198 //      Significant digits for tree dumps:
00199 //              When floating point coordinates are used, the routine that dumps
00200 //              a tree needs to know roughly how many significant digits there
00201 //              are in a ANNcoord, so it can output points to full precision.
00202 //              This is defined to be ANNcoordPrec.  On most systems these
00203 //              values can be found in the standard include files <limits.h> or
00204 //              <float.h>.  For integer types, the value is essentially ignored.
00205 //
00206 //              ANNcoord ANNcoordPrec   Values (see <limits.h> or <float.h>)
00207 //              -------- ------------   ------------------------------------
00208 //              double   DBL_DIG                15
00209 //              float    FLT_DIG                6
00210 //              long     doesn't matter 19
00211 //              int              doesn't matter 10
00212 //              short    doesn't matter 5
00213 //----------------------------------------------------------------------
00214 
00215 #ifdef DBL_DIG                                                  // number of sig. bits in ANNcoord
00216         const int        ANNcoordPrec   = DBL_DIG;
00217 #else
00218         const int        ANNcoordPrec   = 15;   // default precision
00219 #endif
00220 
00221 //----------------------------------------------------------------------
00222 // Self match?
00223 //      In some applications, the nearest neighbor of a point is not
00224 //      allowed to be the point itself. This occurs, for example, when
00225 //      computing all nearest neighbors in a set.  By setting the
00226 //      parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
00227 //      is the closest point whose distance from the query point is
00228 //      strictly positive.
00229 //----------------------------------------------------------------------
00230 
00231 const ANNbool   ANN_ALLOW_SELF_MATCH    = ANNtrue;
00232 
00233 //----------------------------------------------------------------------
00234 //      Norms and metrics:
00235 //              ANN supports any Minkowski norm for defining distance.  In
00236 //              particular, for any p >= 1, the L_p Minkowski norm defines the
00237 //              length of a d-vector (v0, v1, ..., v(d-1)) to be
00238 //
00239 //                              (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
00240 //
00241 //              (where ^ denotes exponentiation, and |.| denotes absolute
00242 //              value).  The distance between two points is defined to be the
00243 //              norm of the vector joining them.  Some common distance metrics
00244 //              include
00245 //
00246 //                              Euclidean metric                p = 2
00247 //                              Manhattan metric                p = 1
00248 //                              Max metric                              p = infinity
00249 //
00250 //              In the case of the max metric, the norm is computed by taking
00251 //              the maxima of the absolute values of the components.  ANN is
00252 //              highly "coordinate-based" and does not support general distances
00253 //              functions (e.g. those obeying just the triangle inequality).  It
00254 //              also does not support distance functions based on
00255 //              inner-products.
00256 //
00257 //              For the purpose of computing nearest neighbors, it is not
00258 //              necessary to compute the final power (1/p).  Thus the only
00259 //              component that is used by the program is |v(i)|^p.
00260 //
00261 //              ANN parameterizes the distance computation through the following
00262 //              macros.  (Macros are used rather than procedures for
00263 //              efficiency.) Recall that the distance between two points is
00264 //              given by the length of the vector joining them, and the length
00265 //              or norm of a vector v is given by formula:
00266 //
00267 //                              |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
00268 //
00269 //              where ROOT, POW are unary functions and # is an associative and
00270 //              commutative binary operator mapping the following types:
00271 //
00272 //                      **      POW:    ANNcoord                                --> ANNdist
00273 //                      **      #:              ANNdist x ANNdist               --> ANNdist
00274 //                      **      ROOT:   ANNdist (>0)                    --> double
00275 //
00276 //              For early termination in distance calculation (partial distance
00277 //              calculation) we assume that POW and # together are monotonically
00278 //              increasing on sequences of arguments, meaning that for all
00279 //              v0..vk and y:
00280 //
00281 //              POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
00282 //
00283 //      Incremental Distance Calculation:
00284 //              The program uses an optimized method of computing distances for
00285 //              kd-trees and bd-trees, called incremental distance calculation.
00286 //              It is used when distances are to be updated when only a single
00287 //              coordinate of a point has been changed.  In order to use this,
00288 //              we assume that there is an incremental update function DIFF(x,y)
00289 //              for #, such that if:
00290 //
00291 //                                      s = x0 # ... # xi # ... # xk 
00292 //
00293 //              then if s' is equal to s but with xi replaced by y, that is, 
00294 //              
00295 //                                      s' = x0 # ... # y # ... # xk
00296 //
00297 //              then the length of s' can be computed by:
00298 //
00299 //                                      |s'| = |s| # DIFF(xi,y).
00300 //
00301 //              Thus, if # is + then DIFF(xi,y) is (yi-x).  For the L_infinity
00302 //              norm we make use of the fact that in the program this function
00303 //              is only invoked when y > xi, and hence DIFF(xi,y)=y.
00304 //
00305 //              Finally, for approximate nearest neighbor queries we assume
00306 //              that POW and ROOT are related such that
00307 //
00308 //                                      v*ROOT(x) = ROOT(POW(v)*x)
00309 //
00310 //              Here are the values for the various Minkowski norms:
00311 //
00312 //              L_p:    p even:                                                 p odd:
00313 //                              -------------------------               ------------------------
00314 //                              POW(v)                  = v^p                   POW(v)                  = |v|^p
00315 //                              ROOT(x)                 = x^(1/p)               ROOT(x)                 = x^(1/p)
00316 //                              #                               = +                             #                               = +
00317 //                              DIFF(x,y)               = y - x                 DIFF(x,y)               = y - x 
00318 //
00319 //              L_inf:
00320 //                              POW(v)                  = |v|
00321 //                              ROOT(x)                 = x
00322 //                              #                               = max
00323 //                              DIFF(x,y)               = y
00324 //
00325 //              By default the Euclidean norm is assumed.  To change the norm,
00326 //              uncomment the appropriate set of macros below.
00327 //----------------------------------------------------------------------
00328 
00329 //----------------------------------------------------------------------
00330 //      Use the following for the Euclidean norm
00331 //----------------------------------------------------------------------
00332 #define ANN_POW(v)                      ((v)*(v))
00333 #define ANN_ROOT(x)                     sqrt(x)
00334 #define ANN_SUM(x,y)            ((x) + (y))
00335 #define ANN_DIFF(x,y)           ((y) - (x))
00336 
00337 //----------------------------------------------------------------------
00338 //      Use the following for the L_1 (Manhattan) norm
00339 //----------------------------------------------------------------------
00340 // #define ANN_POW(v)           fabs(v)
00341 // #define ANN_ROOT(x)          (x)
00342 // #define ANN_SUM(x,y)         ((x) + (y))
00343 // #define ANN_DIFF(x,y)        ((y) - (x))
00344 
00345 //----------------------------------------------------------------------
00346 //      Use the following for a general L_p norm
00347 //----------------------------------------------------------------------
00348 // #define ANN_POW(v)           pow(fabs(v),p)
00349 // #define ANN_ROOT(x)          pow(fabs(x),1/p)
00350 // #define ANN_SUM(x,y)         ((x) + (y))
00351 // #define ANN_DIFF(x,y)        ((y) - (x))
00352 
00353 //----------------------------------------------------------------------
00354 //      Use the following for the L_infinity (Max) norm
00355 //----------------------------------------------------------------------
00356 // #define ANN_POW(v)           fabs(v)
00357 // #define ANN_ROOT(x)          (x)
00358 // #define ANN_SUM(x,y)         ((x) > (y) ? (x) : (y))
00359 // #define ANN_DIFF(x,y)        (y)
00360 
00361 //----------------------------------------------------------------------
00362 //      Array types
00363 //              The following array types are of basic interest.  A point is
00364 //              just a dimensionless array of coordinates, a point array is a
00365 //              dimensionless array of points.  A distance array is a
00366 //              dimensionless array of distances and an index array is a
00367 //              dimensionless array of point indices.  The latter two are used
00368 //              when returning the results of k-nearest neighbor queries.
00369 //----------------------------------------------------------------------
00370 
00371 typedef ANNcoord* ANNpoint;                     // a point
00372 typedef ANNpoint* ANNpointArray;        // an array of points 
00373 typedef ANNdist*  ANNdistArray;         // an array of distances 
00374 typedef ANNidx*   ANNidxArray;          // an array of point indices
00375 
00376 //----------------------------------------------------------------------
00377 //      Basic point and array utilities:
00378 //              The following procedures are useful supplements to ANN's nearest
00379 //              neighbor capabilities.
00380 //
00381 //              annDist():
00382 //                      Computes the (squared) distance between a pair of points.
00383 //                      Note that this routine is not used internally by ANN for
00384 //                      computing distance calculations.  For reasons of efficiency
00385 //                      this is done using incremental distance calculation.  Thus,
00386 //                      this routine cannot be modified as a method of changing the
00387 //                      metric.
00388 //
00389 //              Because points (somewhat like strings in C) are stored as
00390 //              pointers.  Consequently, creating and destroying copies of
00391 //              points may require storage allocation.  These procedures do
00392 //              this.
00393 //
00394 //              annAllocPt() and annDeallocPt():
00395 //                              Allocate a deallocate storage for a single point, and
00396 //                              return a pointer to it.  The argument to AllocPt() is
00397 //                              used to initialize all components.
00398 //
00399 //              annAllocPts() and annDeallocPts():
00400 //                              Allocate and deallocate an array of points as well a
00401 //                              place to store their coordinates, and initializes the
00402 //                              points to point to their respective coordinates.  It
00403 //                              allocates point storage in a contiguous block large
00404 //                              enough to store all the points.  It performs no
00405 //                              initialization.
00406 //
00407 //              annCopyPt():
00408 //                              Creates a copy of a given point, allocating space for
00409 //                              the new point.  It returns a pointer to the newly
00410 //                              allocated copy.
00411 //----------------------------------------------------------------------
00412    
00413 DLL_API ANNdist annDist(
00414         int                             dim,            // dimension of space
00415         ANNpoint                p,                      // points
00416         ANNpoint                q);
00417 
00418 DLL_API ANNpoint annAllocPt(
00419         int                             dim,            // dimension
00420         ANNcoord                c = 0);         // coordinate value (all equal)
00421 
00422 DLL_API ANNpointArray annAllocPts(
00423         int                             n,                      // number of points
00424         int                             dim);           // dimension
00425 
00426 DLL_API void annDeallocPt(
00427         ANNpoint                &p);            // deallocate 1 point
00428    
00429 DLL_API void annDeallocPts(
00430         ANNpointArray   &pa);           // point array
00431 
00432 DLL_API ANNpoint annCopyPt(
00433         int                             dim,            // dimension
00434         ANNpoint                source);        // point to copy
00435 
00436 //----------------------------------------------------------------------
00437 //Overall structure: ANN supports a number of different data structures
00438 //for approximate and exact nearest neighbor searching.  These are:
00439 //
00440 //              ANNbruteForce   A simple brute-force search structure.
00441 //              ANNkd_tree              A kd-tree tree search structure.  ANNbd_tree
00442 //              A bd-tree tree search structure (a kd-tree with shrink
00443 //              capabilities).
00444 //
00445 //              At a minimum, each of these data structures support k-nearest
00446 //              neighbor queries.  The nearest neighbor query, annkSearch,
00447 //              returns an integer identifier and the distance to the nearest
00448 //              neighbor(s) and annRangeSearch returns the nearest points that
00449 //              lie within a given query ball.
00450 //
00451 //              Each structure is built by invoking the appropriate constructor
00452 //              and passing it (at a minimum) the array of points, the total
00453 //              number of points and the dimension of the space.  Each structure
00454 //              is also assumed to support a destructor and member functions
00455 //              that return basic information about the point set.
00456 //
00457 //              Note that the array of points is not copied by the data
00458 //              structure (for reasons of space efficiency), and it is assumed
00459 //              to be constant throughout the lifetime of the search structure.
00460 //
00461 //              The search algorithm, annkSearch, is given the query point (q),
00462 //              and the desired number of nearest neighbors to report (k), and
00463 //              the error bound (eps) (whose default value is 0, implying exact
00464 //              nearest neighbors).  It returns two arrays which are assumed to
00465 //              contain at least k elements: one (nn_idx) contains the indices
00466 //              (within the point array) of the nearest neighbors and the other
00467 //              (dd) contains the squared distances to these nearest neighbors.
00468 //
00469 //              The search algorithm, annkFRSearch, is a fixed-radius kNN
00470 //              search.  In addition to a query point, it is given a (squared)
00471 //              radius bound.  (This is done for consistency, because the search
00472 //              returns distances as squared quantities.) It does two things.
00473 //              First, it computes the k nearest neighbors within the radius
00474 //              bound, and second, it returns the total number of points lying
00475 //              within the radius bound. It is permitted to set k = 0, in which
00476 //              case it effectively answers a range counting query.  If the
00477 //              error bound epsilon is positive, then the search is approximate
00478 //              in the sense that it is free to ignore any point that lies
00479 //              outside a ball of radius r/(1+epsilon), where r is the given
00480 //              (unsquared) radius bound.
00481 //
00482 //              The generic object from which all the search structures are
00483 //              dervied is given below.  It is a virtual object, and is useless
00484 //              by itself.
00485 //----------------------------------------------------------------------
00486 
00487 class DLL_API ANNpointSet {
00488 public:
00489         virtual ~ANNpointSet() {}                       // virtual distructor
00490 
00491         virtual void annkSearch(                        // approx k near neighbor search
00492                 ANNpoint                q,                              // query point
00493                 int                             k,                              // number of near neighbors to return
00494                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00495                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00496                 double                  eps=0.0                 // error bound
00497                 ) = 0;                                                  // pure virtual (defined elsewhere)
00498 
00499         virtual int annkFRSearch(                       // approx fixed-radius kNN search
00500                 ANNpoint                q,                              // query point
00501                 ANNdist                 sqRad,                  // squared radius
00502                 int                             k = 0,                  // number of near neighbors to return
00503                 ANNidxArray             nn_idx = NULL,  // nearest neighbor array (modified)
00504                 ANNdistArray    dd = NULL,              // dist to near neighbors (modified)
00505                 double                  eps=0.0                 // error bound
00506                 ) = 0;                                                  // pure virtual (defined elsewhere)
00507 
00508         virtual int theDim() = 0;                       // return dimension of space
00509         virtual int nPoints() = 0;                      // return number of points
00510                                                                                 // return pointer to points
00511         virtual ANNpointArray thePoints() = 0;
00512 };
00513 
00514 //----------------------------------------------------------------------
00515 //      Brute-force nearest neighbor search:
00516 //              The brute-force search structure is very simple but inefficient.
00517 //              It has been provided primarily for the sake of comparison with
00518 //              and validation of the more complex search structures.
00519 //
00520 //              Query processing is the same as described above, but the value
00521 //              of epsilon is ignored, since all distance calculations are
00522 //              performed exactly.
00523 //
00524 //              WARNING: This data structure is very slow, and should not be
00525 //              used unless the number of points is very small.
00526 //
00527 //              Internal information:
00528 //              ---------------------
00529 //              This data structure bascially consists of the array of points
00530 //              (each a pointer to an array of coordinates).  The search is
00531 //              performed by a simple linear scan of all the points.
00532 //----------------------------------------------------------------------
00533 
00534 class DLL_API ANNbruteForce: public ANNpointSet {
00535         int                             dim;                            // dimension
00536         int                             n_pts;                          // number of points
00537         ANNpointArray   pts;                            // point array
00538 public:
00539         ANNbruteForce(                                          // constructor from point array
00540                 ANNpointArray   pa,                             // point array
00541                 int                             n,                              // number of points
00542                 int                             dd);                    // dimension
00543 
00544         ~ANNbruteForce();                                       // destructor
00545 
00546         void annkSearch(                                        // approx k near neighbor search
00547                 ANNpoint                q,                              // query point
00548                 int                             k,                              // number of near neighbors to return
00549                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00550                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00551                 double                  eps=0.0);               // error bound
00552 
00553         int annkFRSearch(                                       // approx fixed-radius kNN search
00554                 ANNpoint                q,                              // query point
00555                 ANNdist                 sqRad,                  // squared radius
00556                 int                             k = 0,                  // number of near neighbors to return
00557                 ANNidxArray             nn_idx = NULL,  // nearest neighbor array (modified)
00558                 ANNdistArray    dd = NULL,              // dist to near neighbors (modified)
00559                 double                  eps=0.0);               // error bound
00560 
00561         int theDim()                                            // return dimension of space
00562                 { return dim; }
00563 
00564         int nPoints()                                           // return number of points
00565                 { return n_pts; }
00566 
00567         ANNpointArray thePoints()                       // return pointer to points
00568                 {  return pts;  }
00569 };
00570 
00571 //----------------------------------------------------------------------
00572 // kd- and bd-tree splitting and shrinking rules
00573 //              kd-trees supports a collection of different splitting rules.
00574 //              In addition to the standard kd-tree splitting rule proposed
00575 //              by Friedman, Bentley, and Finkel, we have introduced a
00576 //              number of other splitting rules, which seem to perform
00577 //              as well or better (for the distributions we have tested).
00578 //
00579 //              The splitting methods given below allow the user to tailor
00580 //              the data structure to the particular data set.  They are
00581 //              are described in greater details in the kd_split.cc source
00582 //              file.  The method ANN_KD_SUGGEST is the method chosen (rather
00583 //              subjectively) by the implementors as the one giving the
00584 //              fastest performance, and is the default splitting method.
00585 //
00586 //              As with splitting rules, there are a number of different
00587 //              shrinking rules.  The shrinking rule ANN_BD_NONE does no
00588 //              shrinking (and hence produces a kd-tree tree).  The rule
00589 //              ANN_BD_SUGGEST uses the implementors favorite rule.
00590 //----------------------------------------------------------------------
00591 
00592 enum ANNsplitRule {
00593                 ANN_KD_STD                              = 0,    // the optimized kd-splitting rule
00594                 ANN_KD_MIDPT                    = 1,    // midpoint split
00595                 ANN_KD_FAIR                             = 2,    // fair split
00596                 ANN_KD_SL_MIDPT                 = 3,    // sliding midpoint splitting method
00597                 ANN_KD_SL_FAIR                  = 4,    // sliding fair split method
00598                 ANN_KD_SUGGEST                  = 5};   // the authors' suggestion for best
00599 const int ANN_N_SPLIT_RULES             = 6;    // number of split rules
00600 
00601 enum ANNshrinkRule {
00602                 ANN_BD_NONE                             = 0,    // no shrinking at all (just kd-tree)
00603                 ANN_BD_SIMPLE                   = 1,    // simple splitting
00604                 ANN_BD_CENTROID                 = 2,    // centroid splitting
00605                 ANN_BD_SUGGEST                  = 3};   // the authors' suggested choice
00606 const int ANN_N_SHRINK_RULES    = 4;    // number of shrink rules
00607 
00608 //----------------------------------------------------------------------
00609 //      kd-tree:
00610 //              The main search data structure supported by ANN is a kd-tree.
00611 //              The main constructor is given a set of points and a choice of
00612 //              splitting method to use in building the tree.
00613 //
00614 //              Construction:
00615 //              -------------
00616 //              The constructor is given the point array, number of points,
00617 //              dimension, bucket size (default = 1), and the splitting rule
00618 //              (default = ANN_KD_SUGGEST).  The point array is not copied, and
00619 //              is assumed to be kept constant throughout the lifetime of the
00620 //              search structure.  There is also a "load" constructor that
00621 //              builds a tree from a file description that was created by the
00622 //              Dump operation.
00623 //
00624 //              Search:
00625 //              -------
00626 //              There are two search methods:
00627 //
00628 //                      Standard search (annkSearch()):
00629 //                              Searches nodes in tree-traversal order, always visiting
00630 //                              the closer child first.
00631 //                      Priority search (annkPriSearch()):
00632 //                              Searches nodes in order of increasing distance of the
00633 //                              associated cell from the query point.  For many
00634 //                              distributions the standard search seems to work just
00635 //                              fine, but priority search is safer for worst-case
00636 //                              performance.
00637 //
00638 //              Printing:
00639 //              ---------
00640 //              There are two methods provided for printing the tree.  Print()
00641 //              is used to produce a "human-readable" display of the tree, with
00642 //              indenation, which is handy for debugging.  Dump() produces a
00643 //              format that is suitable reading by another program.  There is a
00644 //              "load" constructor, which constructs a tree which is assumed to
00645 //              have been saved by the Dump() procedure.
00646 //              
00647 //              Performance and Structure Statistics:
00648 //              -------------------------------------
00649 //              The procedure getStats() collects statistics information on the
00650 //              tree (its size, height, etc.)  See ANNperf.h for information on
00651 //              the stats structure it returns.
00652 //
00653 //              Internal information:
00654 //              ---------------------
00655 //              The data structure consists of three major chunks of storage.
00656 //              The first (implicit) storage are the points themselves (pts),
00657 //              which have been provided by the users as an argument to the
00658 //              constructor, or are allocated dynamically if the tree is built
00659 //              using the load constructor).  These should not be changed during
00660 //              the lifetime of the search structure.  It is the user's
00661 //              responsibility to delete these after the tree is destroyed.
00662 //
00663 //              The second is the tree itself (which is dynamically allocated in
00664 //              the constructor) and is given as a pointer to its root node
00665 //              (root).  These nodes are automatically deallocated when the tree
00666 //              is deleted.  See the file src/kd_tree.h for further information
00667 //              on the structure of the tree nodes.
00668 //
00669 //              Each leaf of the tree does not contain a pointer directly to a
00670 //              point, but rather contains a pointer to a "bucket", which is an
00671 //              array consisting of point indices.  The third major chunk of
00672 //              storage is an array (pidx), which is a large array in which all
00673 //              these bucket subarrays reside.  (The reason for storing them
00674 //              separately is the buckets are typically small, but of varying
00675 //              sizes.  This was done to avoid fragmentation.)  This array is
00676 //              also deallocated when the tree is deleted.
00677 //
00678 //              In addition to this, the tree consists of a number of other
00679 //              pieces of information which are used in searching and for
00680 //              subsequent tree operations.  These consist of the following:
00681 //
00682 //              dim                                             Dimension of space
00683 //              n_pts                                   Number of points currently in the tree
00684 //              n_max                                   Maximum number of points that are allowed
00685 //                                                              in the tree
00686 //              bkt_size                                Maximum bucket size (no. of points per leaf)
00687 //              bnd_box_lo                              Bounding box low point
00688 //              bnd_box_hi                              Bounding box high point
00689 //              splitRule                               Splitting method used
00690 //
00691 //----------------------------------------------------------------------
00692 
00693 //----------------------------------------------------------------------
00694 // Some types and objects used by kd-tree functions
00695 // See src/kd_tree.h and src/kd_tree.cpp for definitions
00696 //----------------------------------------------------------------------
00697 class ANNkdStats;                               // stats on kd-tree
00698 class ANNkd_node;                               // generic node in a kd-tree
00699 typedef ANNkd_node*     ANNkd_ptr;      // pointer to a kd-tree node
00700 
00701 class DLL_API ANNkd_tree: public ANNpointSet {
00702 protected:
00703         int                             dim;                            // dimension of space
00704         int                             n_pts;                          // number of points in tree
00705         int                             bkt_size;                       // bucket size
00706         ANNpointArray   pts;                            // the points
00707         ANNidxArray             pidx;                           // point indices (to pts array)
00708         ANNkd_ptr               root;                           // root of kd-tree
00709         ANNpoint                bnd_box_lo;                     // bounding box low point
00710         ANNpoint                bnd_box_hi;                     // bounding box high point
00711 
00712         void SkeletonTree(                                      // construct skeleton tree
00713                 int                             n,                              // number of points
00714                 int                             dd,                             // dimension
00715                 int                             bs,                             // bucket size
00716                 ANNpointArray pa = NULL,                // point array (optional)
00717                 ANNidxArray pi = NULL);                 // point indices (optional)
00718 
00719 public:
00720         ANNkd_tree(                                                     // build skeleton tree
00721                 int                             n = 0,                  // number of points
00722                 int                             dd = 0,                 // dimension
00723                 int                             bs = 1);                // bucket size
00724 
00725         ANNkd_tree(                                                     // build from point array
00726                 ANNpointArray   pa,                             // point array
00727                 int                             n,                              // number of points
00728                 int                             dd,                             // dimension
00729                 int                             bs = 1,                 // bucket size
00730                 ANNsplitRule    split = ANN_KD_SUGGEST);        // splitting method
00731 
00732         ANNkd_tree(                                                     // build from dump file
00733                 std::istream&   in);                    // input stream for dump file
00734 
00735         ~ANNkd_tree();                                          // tree destructor
00736 
00737         void annkSearch(                                        // approx k near neighbor search
00738                 ANNpoint                q,                              // query point
00739                 int                             k,                              // number of near neighbors to return
00740                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00741                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00742                 double                  eps=0.0);               // error bound
00743 
00744         void annkPriSearch(                             // priority k near neighbor search
00745                 ANNpoint                q,                              // query point
00746                 int                             k,                              // number of near neighbors to return
00747                 ANNidxArray             nn_idx,                 // nearest neighbor array (modified)
00748                 ANNdistArray    dd,                             // dist to near neighbors (modified)
00749                 double                  eps=0.0);               // error bound
00750 
00751         int annkFRSearch(                                       // approx fixed-radius kNN search
00752                 ANNpoint                q,                              // the query point
00753                 ANNdist                 sqRad,                  // squared radius of query ball
00754                 int                             k,                              // number of neighbors to return
00755                 ANNidxArray             nn_idx = NULL,  // nearest neighbor array (modified)
00756                 ANNdistArray    dd = NULL,              // dist to near neighbors (modified)
00757                 double                  eps=0.0);               // error bound
00758 
00759         int theDim()                                            // return dimension of space
00760                 { return dim; }
00761 
00762         int nPoints()                                           // return number of points
00763                 { return n_pts; }
00764 
00765         ANNpointArray thePoints()                       // return pointer to points
00766                 {  return pts;  }
00767 
00768         virtual void Print(                                     // print the tree (for debugging)
00769                 ANNbool                 with_pts,               // print points as well?
00770                 std::ostream&   out);                   // output stream
00771 
00772         virtual void Dump(                                      // dump entire tree
00773                 ANNbool                 with_pts,               // print points as well?
00774                 std::ostream&   out);                   // output stream
00775                                                                 
00776         virtual void getStats(                          // compute tree statistics
00777                 ANNkdStats&             st);                    // the statistics (modified)
00778 };                                                              
00779 
00780 //----------------------------------------------------------------------
00781 //      Box decomposition tree (bd-tree)
00782 //              The bd-tree is inherited from a kd-tree.  The main difference
00783 //              in the bd-tree and the kd-tree is a new type of internal node
00784 //              called a shrinking node (in the kd-tree there is only one type
00785 //              of internal node, a splitting node).  The shrinking node
00786 //              makes it possible to generate balanced trees in which the
00787 //              cells have bounded aspect ratio, by allowing the decomposition
00788 //              to zoom in on regions of dense point concentration.  Although
00789 //              this is a nice idea in theory, few point distributions are so
00790 //              densely clustered that this is really needed.
00791 //----------------------------------------------------------------------
00792 
00793 class DLL_API ANNbd_tree: public ANNkd_tree {
00794 public:
00795         ANNbd_tree(                                                     // build skeleton tree
00796                 int                             n,                              // number of points
00797                 int                             dd,                             // dimension
00798                 int                             bs = 1)                 // bucket size
00799                 : ANNkd_tree(n, dd, bs) {}              // build base kd-tree
00800 
00801         ANNbd_tree(                                                     // build from point array
00802                 ANNpointArray   pa,                             // point array
00803                 int                             n,                              // number of points
00804                 int                             dd,                             // dimension
00805                 int                             bs = 1,                 // bucket size
00806                 ANNsplitRule    split  = ANN_KD_SUGGEST,        // splitting rule
00807                 ANNshrinkRule   shrink = ANN_BD_SUGGEST);       // shrinking rule
00808 
00809         ANNbd_tree(                                                     // build from dump file
00810                 std::istream&   in);                    // input stream for dump file
00811 };
00812 
00813 //----------------------------------------------------------------------
00814 //      Other functions
00815 //      annMaxPtsVisit          Sets a limit on the maximum number of points
00816 //                                              to visit in the search.
00817 //  annClose                    Can be called when all use of ANN is finished.
00818 //                                              It clears up a minor memory leak.
00819 //----------------------------------------------------------------------
00820 
00821 DLL_API void annMaxPtsVisit(    // max. pts to visit in search
00822         int                             maxPts);        // the limit
00823 
00824 DLL_API void annClose();                // called to end use of ANN
00825 
00826 #endif