GeographicLib 2.1.2
NearestNeighbor.hpp
Go to the documentation of this file.
1/**
2 * \file NearestNeighbor.hpp
3 * \brief Header for GeographicLib::NearestNeighbor class
4 *
5 * Copyright (c) Charles Karney (2016-2020) <charles@karney.com> and licensed
6 * under the MIT/X11 License. For more information, see
7 * https://geographiclib.sourceforge.io/
8 **********************************************************************/
9
10#if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
11#define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
12
13#include <algorithm> // for nth_element, max_element, etc.
14#include <vector>
15#include <queue> // for priority_queue
16#include <utility> // for swap + pair
17#include <cstring>
18#include <limits>
19#include <cmath>
20#include <iostream>
21#include <sstream>
22// Only for GeographicLib::GeographicErr
24
25#if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
26 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
27#include <boost/serialization/nvp.hpp>
28#include <boost/serialization/split_member.hpp>
29#include <boost/serialization/array.hpp>
30#include <boost/serialization/vector.hpp>
31#endif
32
33#if defined(_MSC_VER)
34// Squelch warnings about constant conditional expressions
35# pragma warning (push)
36# pragma warning (disable: 4127)
37#endif
38
39namespace GeographicLib {
40
41 /**
42 * \brief Nearest-neighbor calculations
43 *
44 * This class solves the nearest-neighbor problm using a vantage-point tree
45 * as described in \ref nearest.
46 *
47 * This class is templated so that it can handle arbitrary metric spaces as
48 * follows:
49 *
50 * @tparam dist_t the type used for measuring distances; it can be a real or
51 * signed integer type; in typical geodetic applications, \e dist_t might
52 * be <code>double</code>.
53 * @tparam pos_t the type for specifying the positions of points; geodetic
54 * application might bundle the latitude and longitude into a
55 * <code>std::pair<dist_t, dist_t></code>.
56 * @tparam distfun_t the type of a function object which takes takes two
57 * positions (of type \e pos_t) and returns the distance (of type \e
58 * dist_t); in geodetic applications, this might be a class which is
59 * constructed with a Geodesic object and which implements a member
60 * function with a signature <code>dist_t operator() (const pos_t&, const
61 * pos_t&) const</code>, which returns the geodesic distance between two
62 * points.
63 *
64 * \note The distance measure must satisfy the triangle inequality, \f$
65 * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c. The
66 * geodesic distance (given by Geodesic::Inverse) does, while the great
67 * ellipse distance and the rhumb line distance <i>do not</i>. If you use
68 * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 +
69 * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the
70 * square root in the interests of "efficiency"; the squared distance does
71 * not satisfy the triangle inequality!
72 *
73 * \note This is a "header-only" implementation and, as such, depends in a
74 * minimal way on the rest of GeographicLib (the only dependency is through
75 * the use of GeographicLib::GeographicErr for handling and run-time
76 * exceptions). Therefore, it is easy to extract this class from the rest of
77 * GeographicLib and use it as a stand-alone facility.
78 *
79 * The \e dist_t type must support numeric_limits queries (specifically:
80 * is_signed, is_integer, max(), digits).
81 *
82 * The NearestNeighbor object is constructed with a vector of points (type \e
83 * pos_t) and a distance function (type \e distfun_t). However the object
84 * does \e not store the points. When querying the object with Search(),
85 * it's necessary to supply the same vector of points and the same distance
86 * function.
87 *
88 * There's no capability in this implementation to add or remove points from
89 * the set. Instead Initialize() should be called to re-initialize the
90 * object with the modified vector of points.
91 *
92 * Because of the overhead in constructing a NearestNeighbor object for a
93 * large set of points, functions Save() and Load() are provided to save the
94 * object to an external file. operator<<(), operator>>() and <a
95 * href="https://www.boost.org/libs/serialization/doc"> Boost
96 * serialization</a> can also be used to save and restore a NearestNeighbor
97 * object. This is illustrated in the example.
98 *
99 * Example of use:
100 * \include example-NearestNeighbor.cpp
101 **********************************************************************/
102 template<typename dist_t, typename pos_t, class distfun_t>
104 // For tracking changes to the I/O format
105 static const int version = 1;
106 // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow
107 // 4 slots (and this accommodates the default value bucket = 4).
108 static const int maxbucket =
109 (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
110 (4 * sizeof(dist_t)) / sizeof(int) : 2));
111 public:
112
113 /**
114 * Default constructor for NearestNeighbor.
115 *
116 * This is equivalent to specifying an empty set of points.
117 **********************************************************************/
118 NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
119
120 /**
121 * Constructor for NearestNeighbor.
122 *
123 * @param[in] pts a vector of points to include in the set.
124 * @param[in] dist the distance function object.
125 * @param[in] bucket the size of the buckets at the leaf nodes; this must
126 * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
127 * @exception GeographicErr if the value of \e bucket is out of bounds or
128 * the size of \e pts is too big for an int.
129 * @exception std::bad_alloc if memory for the tree can't be allocated.
130 *
131 * \e pts may contain coincident points (i.e., the distance between them
132 * vanishes); these are treated as distinct.
133 *
134 * The choice of \e bucket is a tradeoff between space and efficiency. A
135 * larger \e bucket decreases the size of the NearestNeighbor object which
136 * scales as pts.size() / max(1, bucket) and reduces the number of distance
137 * calculations to construct the object by log2(bucket) * pts.size().
138 * However each search then requires about bucket additional distance
139 * calculations.
140 *
141 * \warning The distances computed by \e dist must satisfy the standard
142 * metric conditions. If not, the results are undefined. Neither the data
143 * in \e pts nor the query points should contain NaNs or infinities because
144 * such data violates the metric conditions.
145 *
146 * \warning The same arguments \e pts and \e dist must be provided
147 * to the Search() function.
148 **********************************************************************/
149 NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
150 int bucket = 4) {
151 Initialize(pts, dist, bucket);
152 }
153
154 /**
155 * Initialize or re-initialize NearestNeighbor.
156 *
157 * @param[in] pts a vector of points to include in the tree.
158 * @param[in] dist the distance function object.
159 * @param[in] bucket the size of the buckets at the leaf nodes; this must
160 * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
161 * @exception GeographicErr if the value of \e bucket is out of bounds or
162 * the size of \e pts is too big for an int.
163 * @exception std::bad_alloc if memory for the tree can't be allocated.
164 *
165 * See also the documentation on the constructor.
166 *
167 * If an exception is thrown, the state of the NearestNeighbor is
168 * unchanged.
169 **********************************************************************/
170 void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
171 int bucket = 4) {
172 static_assert(std::numeric_limits<dist_t>::is_signed,
173 "dist_t must be a signed type");
174 if (!( 0 <= bucket && bucket <= maxbucket ))
176 ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
177 if (pts.size() > size_t(std::numeric_limits<int>::max()))
178 throw GeographicLib::GeographicErr("pts array too big");
179 // the pair contains distance+id
180 std::vector<item> ids(pts.size());
181 for (int k = int(ids.size()); k--;)
182 ids[k] = std::make_pair(dist_t(0), k);
183 int cost = 0;
184 std::vector<Node> tree;
185 init(pts, dist, bucket, tree, ids, cost,
186 0, int(ids.size()), int(ids.size()/2));
187 _tree.swap(tree);
188 _numpoints = int(pts.size());
189 _bucket = bucket;
190 _mc = _sc = 0;
191 _cost = cost; _c1 = _k = _cmax = 0;
192 _cmin = std::numeric_limits<int>::max();
193 }
194
195 /**
196 * Search the NearestNeighbor.
197 *
198 * @param[in] pts the vector of points used for initialization.
199 * @param[in] dist the distance function object used for initialization.
200 * @param[in] query the query point.
201 * @param[out] ind a vector of indices to the closest points found.
202 * @param[in] k the number of points to search for (default = 1).
203 * @param[in] maxdist only return points with distances of \e maxdist or
204 * less from \e query (default is the maximum \e dist_t).
205 * @param[in] mindist only return points with distances of more than
206 * \e mindist from \e query (default = &minus;1).
207 * @param[in] exhaustive whether to do an exhaustive search (default true).
208 * @param[in] tol the tolerance on the results (default 0).
209 * @return the distance to the closest point found (&minus;1 if no points
210 * are found).
211 * @exception GeographicErr if \e pts has a different size from that used
212 * to construct the object.
213 *
214 * The indices returned in \e ind are sorted by distance from \e query
215 * (closest first).
216 *
217 * The simplest invocation is with just the 4 non-optional arguments. This
218 * returns the closest distance and the index to the closest point in
219 * <i>ind</i><sub>0</sub>. If there are several points equally close, then
220 * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them. If
221 * there's no closest point (because the set of points is empty), then \e
222 * ind is empty and &minus;1 is returned.
223 *
224 * With \e exhaustive = true and \e tol = 0 (their default values), this
225 * finds the indices of \e k closest neighbors to \e query whose distances
226 * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e
227 * maxdist have their default values, then these bounds have no effect. If
228 * \e query is one of the points in the tree, then set \e mindist = 0 to
229 * prevent this point (and other coincident points) from being returned.
230 *
231 * If \e exhaustive = false, exit as soon as \e k results satisfying the
232 * distance criteria are found. If less than \e k results are returned
233 * then the search was exhaustive even if \e exhaustive = false.
234 *
235 * If \e tol is positive, do an approximate search; in this case the
236 * results are to be interpreted as follows: if the <i>k</i>'th distance is
237 * \e dk, then all results with distances less than or equal \e dk &minus;
238 * \e tol are correct; all others are suspect &mdash; there may be other
239 * closer results with distances greater or equal to \e dk &minus; \e tol.
240 * If less than \e k results are found, then the search is exact.
241 *
242 * \e mindist should be used to exclude a "small" neighborhood of the query
243 * point (relative to the average spacing of the data). If \e mindist is
244 * large, the efficiency of the search deteriorates.
245 *
246 * \note Only the shortest distance is returned (as as the function value).
247 * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub>
248 * for \e j > 0) can be found by invoking \e dist again.
249 *
250 * \warning The arguments \e pts and \e dist must be identical to those
251 * used to initialize the NearestNeighbor; if not, this function will
252 * return some meaningless result (however, if the size of \e pts is wrong,
253 * this function throw an exception).
254 *
255 * \warning The query point cannot be a NaN or infinite because then the
256 * metric conditions are violated.
257 **********************************************************************/
258 dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
259 const pos_t& query,
260 std::vector<int>& ind,
261 int k = 1,
262 dist_t maxdist = std::numeric_limits<dist_t>::max(),
263 dist_t mindist = -1,
264 bool exhaustive = true,
265 dist_t tol = 0) const {
266 if (_numpoints != int(pts.size()))
267 throw GeographicLib::GeographicErr("pts array has wrong size");
268 std::priority_queue<item> results;
269 if (_numpoints > 0 && k > 0 && maxdist > mindist) {
270 // distance to the kth closest point so far
271 dist_t tau = maxdist;
272 // first is negative of how far query is outside boundary of node
273 // +1 if on boundary or inside
274 // second is node index
275 std::priority_queue<item> todo;
276 todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
277 int c = 0;
278 while (!todo.empty()) {
279 int n = todo.top().second;
280 dist_t d = -todo.top().first;
281 todo.pop();
282 dist_t tau1 = tau - tol;
283 // compare tau and d again since tau may have become smaller.
284 if (!( n >= 0 && tau1 >= d )) continue;
285 const Node& current = _tree[n];
286 dist_t dst = 0; // to suppress warning about uninitialized variable
287 bool exitflag = false, leaf = current.index < 0;
288 for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
289 int index = leaf ? current.leaves[i] : current.index;
290 if (index < 0) break;
291 dst = dist(pts[index], query);
292 ++c;
293
294 if (dst > mindist && dst <= tau) {
295 if (int(results.size()) == k) results.pop();
296 results.push(std::make_pair(dst, index));
297 if (int(results.size()) == k) {
298 if (exhaustive)
299 tau = results.top().first;
300 else {
301 exitflag = true;
302 break;
303 }
304 if (tau <= tol) {
305 exitflag = true;
306 break;
307 }
308 }
309 }
310 }
311 if (exitflag) break;
312
313 if (current.index < 0) continue;
314 tau1 = tau - tol;
315 for (int l = 0; l < 2; ++l) {
316 if (current.data.child[l] >= 0 &&
317 dst + current.data.upper[l] >= mindist) {
318 if (dst < current.data.lower[l]) {
319 d = current.data.lower[l] - dst;
320 if (tau1 >= d)
321 todo.push(std::make_pair(-d, current.data.child[l]));
322 } else if (dst > current.data.upper[l]) {
323 d = dst - current.data.upper[l];
324 if (tau1 >= d)
325 todo.push(std::make_pair(-d, current.data.child[l]));
326 } else
327 todo.push(std::make_pair(dist_t(1), current.data.child[l]));
328 }
329 }
330 }
331 ++_k;
332 _c1 += c;
333 double omc = _mc;
334 _mc += (c - omc) / _k;
335 _sc += (c - omc) * (c - _mc);
336 if (c > _cmax) _cmax = c;
337 if (c < _cmin) _cmin = c;
338 }
339
340 dist_t d = -1;
341 ind.resize(results.size());
342
343 for (int i = int(ind.size()); i--;) {
344 ind[i] = int(results.top().second);
345 if (i == 0) d = results.top().first;
346 results.pop();
347 }
348 return d;
349
350 }
351
352 /**
353 * @return the total number of points in the set.
354 **********************************************************************/
355 int NumPoints() const { return _numpoints; }
356
357 /**
358 * Write the object to an I/O stream.
359 *
360 * @param[in,out] os the stream to write to.
361 * @param[in] bin if true (the default) save in binary mode.
362 * @exception std::bad_alloc if memory for the string representation of the
363 * object can't be allocated.
364 *
365 * The counters tracking the statistics of searches are not saved; however
366 * the initializtion cost is saved. The format of the binary saves is \e
367 * not portable.
368 *
369 * \note <a href="https://www.boost.org/libs/serialization/doc">
370 * Boost serialization</a> can also be used to save and restore a
371 * NearestNeighbor object. This requires that the
372 * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
373 **********************************************************************/
374 void Save(std::ostream& os, bool bin = true) const {
375 int realspec = std::numeric_limits<dist_t>::digits *
376 (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
377 if (bin) {
378 char id[] = "NearestNeighbor_";
379 os.write(id, 16);
380 int buf[6];
381 buf[0] = version;
382 buf[1] = realspec;
383 buf[2] = _bucket;
384 buf[3] = _numpoints;
385 buf[4] = int(_tree.size());
386 buf[5] = _cost;
387 os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
388 for (int i = 0; i < int(_tree.size()); ++i) {
389 const Node& node = _tree[i];
390 os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
391 if (node.index >= 0) {
392 os.write(reinterpret_cast<const char *>(node.data.lower),
393 2 * sizeof(dist_t));
394 os.write(reinterpret_cast<const char *>(node.data.upper),
395 2 * sizeof(dist_t));
396 os.write(reinterpret_cast<const char *>(node.data.child),
397 2 * sizeof(int));
398 } else {
399 os.write(reinterpret_cast<const char *>(node.leaves),
400 _bucket * sizeof(int));
401 }
402 }
403 } else {
404 std::stringstream ostring;
405 // Ensure enough precision for type dist_t. With C++11, max_digits10
406 // can be used instead.
407 if (!std::numeric_limits<dist_t>::is_integer) {
408 static const int prec
409 = int(std::ceil(std::numeric_limits<dist_t>::digits *
410 std::log10(2.0) + 1));
411 ostring.precision(prec);
412 }
413 ostring << version << " " << realspec << " " << _bucket << " "
414 << _numpoints << " " << _tree.size() << " " << _cost;
415 for (int i = 0; i < int(_tree.size()); ++i) {
416 const Node& node = _tree[i];
417 ostring << "\n" << node.index;
418 if (node.index >= 0) {
419 for (int l = 0; l < 2; ++l)
420 ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
421 << " " << node.data.child[l];
422 } else {
423 for (int l = 0; l < _bucket; ++l)
424 ostring << " " << node.leaves[l];
425 }
426 }
427 os << ostring.str();
428 }
429 }
430
431 /**
432 * Read the object from an I/O stream.
433 *
434 * @param[in,out] is the stream to read from
435 * @param[in] bin if true (the default) load in binary mode.
436 * @exception GeographicErr if the state read from \e is is illegal.
437 * @exception std::bad_alloc if memory for the tree can't be allocated.
438 *
439 * The counters tracking the statistics of searches are reset by this
440 * operation. Binary data must have been saved on a machine with the same
441 * architecture. If an exception is thrown, the state of the
442 * NearestNeighbor is unchanged.
443 *
444 * \note <a href="https://www.boost.org/libs/serialization/doc">
445 * Boost serialization</a> can also be used to save and restore a
446 * NearestNeighbor object. This requires that the
447 * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
448 *
449 * \warning The same arguments \e pts and \e dist used for
450 * initialization must be provided to the Search() function.
451 **********************************************************************/
452 void Load(std::istream& is, bool bin = true) {
453 int version1, realspec, bucket, numpoints, treesize, cost;
454 if (bin) {
455 char id[17];
456 is.read(id, 16);
457 id[16] = '\0';
458 if (!(std::strcmp(id, "NearestNeighbor_") == 0))
459 throw GeographicLib::GeographicErr("Bad ID");
460 is.read(reinterpret_cast<char *>(&version1), sizeof(int));
461 is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
462 is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
463 is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
464 is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
465 is.read(reinterpret_cast<char *>(&cost), sizeof(int));
466 } else {
467 if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
468 >> cost ))
469 throw GeographicLib::GeographicErr("Bad header");
470 }
471 if (!( version1 == version ))
472 throw GeographicLib::GeographicErr("Incompatible version");
473 if (!( realspec == std::numeric_limits<dist_t>::digits *
474 (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
475 throw GeographicLib::GeographicErr("Different dist_t types");
476 if (!( 0 <= bucket && bucket <= maxbucket ))
477 throw GeographicLib::GeographicErr("Bad bucket size");
478 if (!( 0 <= treesize && treesize <= numpoints ))
479 throw
480 GeographicLib::GeographicErr("Bad number of points or tree size");
481 if (!( 0 <= cost ))
482 throw GeographicLib::GeographicErr("Bad value for cost");
483 std::vector<Node> tree;
484 tree.reserve(treesize);
485 for (int i = 0; i < treesize; ++i) {
486 Node node;
487 if (bin) {
488 is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
489 if (node.index >= 0) {
490 is.read(reinterpret_cast<char *>(node.data.lower),
491 2 * sizeof(dist_t));
492 is.read(reinterpret_cast<char *>(node.data.upper),
493 2 * sizeof(dist_t));
494 is.read(reinterpret_cast<char *>(node.data.child),
495 2 * sizeof(int));
496 } else {
497 is.read(reinterpret_cast<char *>(node.leaves),
498 bucket * sizeof(int));
499 for (int l = bucket; l < maxbucket; ++l)
500 node.leaves[l] = 0;
501 }
502 } else {
503 if (!( is >> node.index ))
504 throw GeographicLib::GeographicErr("Bad index");
505 if (node.index >= 0) {
506 for (int l = 0; l < 2; ++l) {
507 if (!( is >> node.data.lower[l] >> node.data.upper[l]
508 >> node.data.child[l] ))
509 throw GeographicLib::GeographicErr("Bad node data");
510 }
511 } else {
512 // Must be at least one valid leaf followed by a sequence end
513 // markers (-1).
514 for (int l = 0; l < bucket; ++l) {
515 if (!( is >> node.leaves[l] ))
516 throw GeographicLib::GeographicErr("Bad leaf data");
517 }
518 for (int l = bucket; l < maxbucket; ++l)
519 node.leaves[l] = 0;
520 }
521 }
522 node.Check(numpoints, treesize, bucket);
523 tree.push_back(node);
524 }
525 _tree.swap(tree);
526 _numpoints = numpoints;
527 _bucket = bucket;
528 _mc = _sc = 0;
529 _cost = cost; _c1 = _k = _cmax = 0;
530 _cmin = std::numeric_limits<int>::max();
531 }
532
533 /**
534 * Write the object to stream \e os as text.
535 *
536 * @param[in,out] os the output stream.
537 * @param[in] t the NearestNeighbor object to be saved.
538 * @exception std::bad_alloc if memory for the string representation of the
539 * object can't be allocated.
540 **********************************************************************/
541 friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
542 { t.Save(os, false); return os; }
543
544 /**
545 * Read the object from stream \e is as text.
546 *
547 * @param[in,out] is the input stream.
548 * @param[out] t the NearestNeighbor object to be loaded.
549 * @exception GeographicErr if the state read from \e is is illegal.
550 * @exception std::bad_alloc if memory for the tree can't be allocated.
551 **********************************************************************/
552 friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
553 { t.Load(is, false); return is; }
554
555 /**
556 * Swap with another NearestNeighbor object.
557 *
558 * @param[in,out] t the NearestNeighbor object to swap with.
559 **********************************************************************/
561 std::swap(_numpoints, t._numpoints);
562 std::swap(_bucket, t._bucket);
563 std::swap(_cost, t._cost);
564 _tree.swap(t._tree);
565 std::swap(_mc, t._mc);
566 std::swap(_sc, t._sc);
567 std::swap(_c1, t._c1);
568 std::swap(_k, t._k);
569 std::swap(_cmin, t._cmin);
570 std::swap(_cmax, t._cmax);
571 }
572
573 /**
574 * The accumulated statistics on the searches so far.
575 *
576 * @param[out] setupcost the cost of initializing the NearestNeighbor.
577 * @param[out] numsearches the number of calls to Search().
578 * @param[out] searchcost the total cost of the calls to Search().
579 * @param[out] mincost the minimum cost of a Search().
580 * @param[out] maxcost the maximum cost of a Search().
581 * @param[out] mean the mean cost of a Search().
582 * @param[out] sd the standard deviation in the cost of a Search().
583 *
584 * Here "cost" measures the number of distance calculations needed. Note
585 * that the accumulation of statistics is \e not thread safe.
586 **********************************************************************/
587 void Statistics(int& setupcost, int& numsearches, int& searchcost,
588 int& mincost, int& maxcost,
589 double& mean, double& sd) const {
590 setupcost = _cost; numsearches = _k; searchcost = _c1;
591 mincost = _cmin; maxcost = _cmax;
592 mean = _mc; sd = std::sqrt(_sc / (_k - 1));
593 }
594
595 /**
596 * Reset the counters for the accumulated statistics on the searches so
597 * far.
598 **********************************************************************/
599 void ResetStatistics() const {
600 _mc = _sc = 0;
601 _c1 = _k = _cmax = 0;
602 _cmin = std::numeric_limits<int>::max();
603 }
604
605 private:
606 // Package up a dist_t and an int. We will want to sort on the dist_t so
607 // put it first.
608 typedef std::pair<dist_t, int> item;
609 // \cond SKIP
610 class Node {
611 public:
612 struct bounds {
613 dist_t lower[2], upper[2]; // bounds on inner/outer distances
614 int child[2];
615 };
616 union {
617 bounds data;
618 int leaves[maxbucket];
619 };
620 int index;
621
622 Node()
623 : index(-1)
624 {
625 for (int i = 0; i < 2; ++i) {
626 data.lower[i] = data.upper[i] = 0;
627 data.child[i] = -1;
628 }
629 }
630
631 // Sanity check on a Node
632 void Check(int numpoints, int treesize, int bucket) const {
633 if (!( -1 <= index && index < numpoints ))
634 throw GeographicLib::GeographicErr("Bad index");
635 if (index >= 0) {
636 if (!( -1 <= data.child[0] && data.child[0] < treesize &&
637 -1 <= data.child[1] && data.child[1] < treesize ))
638 throw GeographicLib::GeographicErr("Bad child pointers");
639 if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
640 data.upper[0] <= data.lower[1] &&
641 data.lower[1] <= data.upper[1] ))
642 throw GeographicLib::GeographicErr("Bad bounds");
643 } else {
644 // Must be at least one valid leaf followed by a sequence end markers
645 // (-1).
646 bool start = true;
647 for (int l = 0; l < bucket; ++l) {
648 if (!( (start ?
649 ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
650 leaves[l] == -1) ))
651 throw GeographicLib::GeographicErr("Bad leaf data");
652 start = leaves[l] >= 0;
653 }
654 for (int l = bucket; l < maxbucket; ++l) {
655 if (leaves[l] != 0)
656 throw GeographicLib::GeographicErr("Bad leaf data");
657 }
658 }
659 }
660
661#if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
662 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
663 friend class boost::serialization::access;
664 template<class Archive>
665 void save(Archive& ar, const unsigned int) const {
666 ar & boost::serialization::make_nvp("index", index);
667 if (index < 0)
668 ar & boost::serialization::make_nvp("leaves", leaves);
669 else
670 ar & boost::serialization::make_nvp("lower", data.lower)
671 & boost::serialization::make_nvp("upper", data.upper)
672 & boost::serialization::make_nvp("child", data.child);
673 }
674 template<class Archive>
675 void load(Archive& ar, const unsigned int) {
676 ar & boost::serialization::make_nvp("index", index);
677 if (index < 0)
678 ar & boost::serialization::make_nvp("leaves", leaves);
679 else
680 ar & boost::serialization::make_nvp("lower", data.lower)
681 & boost::serialization::make_nvp("upper", data.upper)
682 & boost::serialization::make_nvp("child", data.child);
683 }
684 template<class Archive>
685 void serialize(Archive& ar, const unsigned int file_version)
686 { boost::serialization::split_member(ar, *this, file_version); }
687#endif
688 };
689 // \endcond
690#if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
691 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
692 friend class boost::serialization::access;
693 template<class Archive> void save(Archive& ar, const unsigned) const {
694 int realspec = std::numeric_limits<dist_t>::digits *
695 (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
696 // Need to use version1, otherwise load error in debug mode on Linux:
697 // undefined reference to GeographicLib::NearestNeighbor<...>::version.
698 int version1 = version;
699 ar & boost::serialization::make_nvp("version", version1)
700 & boost::serialization::make_nvp("realspec", realspec)
701 & boost::serialization::make_nvp("bucket", _bucket)
702 & boost::serialization::make_nvp("numpoints", _numpoints)
703 & boost::serialization::make_nvp("cost", _cost)
704 & boost::serialization::make_nvp("tree", _tree);
705 }
706 template<class Archive> void load(Archive& ar, const unsigned) {
707 int version1, realspec, bucket, numpoints, cost;
708 ar & boost::serialization::make_nvp("version", version1);
709 if (version1 != version)
710 throw GeographicLib::GeographicErr("Incompatible version");
711 std::vector<Node> tree;
712 ar & boost::serialization::make_nvp("realspec", realspec);
713 if (!( realspec == std::numeric_limits<dist_t>::digits *
714 (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
715 throw GeographicLib::GeographicErr("Different dist_t types");
716 ar & boost::serialization::make_nvp("bucket", bucket);
717 if (!( 0 <= bucket && bucket <= maxbucket ))
718 throw GeographicLib::GeographicErr("Bad bucket size");
719 ar & boost::serialization::make_nvp("numpoints", numpoints)
720 & boost::serialization::make_nvp("cost", cost)
721 & boost::serialization::make_nvp("tree", tree);
722 if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
723 throw
724 GeographicLib::GeographicErr("Bad number of points or tree size");
725 for (int i = 0; i < int(tree.size()); ++i)
726 tree[i].Check(numpoints, int(tree.size()), bucket);
727 _tree.swap(tree);
728 _numpoints = numpoints;
729 _bucket = bucket;
730 _mc = _sc = 0;
731 _cost = cost; _c1 = _k = _cmax = 0;
732 _cmin = std::numeric_limits<int>::max();
733 }
734 template<class Archive>
735 void serialize(Archive& ar, const unsigned int file_version)
736 { boost::serialization::split_member(ar, *this, file_version); }
737#endif
738
739 int _numpoints, _bucket, _cost;
740 std::vector<Node> _tree;
741 // Counters to track stastistics on the cost of searches
742 mutable double _mc, _sc;
743 mutable int _c1, _k, _cmin, _cmax;
744
745 int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
746 std::vector<Node>& tree, std::vector<item>& ids, int& cost,
747 int l, int u, int vp) {
748
749 if (u == l)
750 return -1;
751 Node node;
752
753 if (u - l > (bucket == 0 ? 1 : bucket)) {
754
755 // choose a vantage point and move it to the start
756 int i = vp;
757 std::swap(ids[l], ids[i]);
758
759 int m = (u + l + 1) / 2;
760
761 for (int k = l + 1; k < u; ++k) {
762 ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
763 ++cost;
764 }
765 // partition around the median distance
766 std::nth_element(ids.begin() + l + 1,
767 ids.begin() + m,
768 ids.begin() + u);
769 node.index = ids[l].second;
770 if (m > l + 1) { // node.child[0] is possibly empty
771 typename std::vector<item>::iterator
772 t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
773 node.data.lower[0] = t->first;
774 t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
775 node.data.upper[0] = t->first;
776 // Use point with max distance as vantage point; this point act as a
777 // "corner" point and leads to a good partition.
778 node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
779 l + 1, m, int(t - ids.begin()));
780 }
781 typename std::vector<item>::iterator
782 t = std::max_element(ids.begin() + m, ids.begin() + u);
783 node.data.lower[1] = ids[m].first;
784 node.data.upper[1] = t->first;
785 // Use point with max distance as vantage point here too
786 node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
787 m, u, int(t - ids.begin()));
788 } else {
789 if (bucket == 0)
790 node.index = ids[l].second;
791 else {
792 node.index = -1;
793 // Sort the bucket entries so that the tree is independent of the
794 // implementation of nth_element.
795 std::sort(ids.begin() + l, ids.begin() + u);
796 for (int i = l; i < u; ++i)
797 node.leaves[i-l] = ids[i].second;
798 for (int i = u - l; i < bucket; ++i)
799 node.leaves[i] = -1;
800 for (int i = bucket; i < maxbucket; ++i)
801 node.leaves[i] = 0;
802 }
803 }
804
805 tree.push_back(node);
806 return int(tree.size()) - 1;
807 }
808
809 };
810
811} // namespace GeographicLib
812
813namespace std {
814
815 /**
816 * Swap two GeographicLib::NearestNeighbor objects.
817 *
818 * @tparam dist_t the type used for measuring distances.
819 * @tparam pos_t the type for specifying the positions of points.
820 * @tparam distfun_t the type for a function object which calculates
821 * distances between points.
822 * @param[in,out] a the first GeographicLib::NearestNeighbor to swap.
823 * @param[in,out] b the second GeographicLib::NearestNeighbor to swap.
824 **********************************************************************/
825 template<typename dist_t, typename pos_t, class distfun_t>
828 a.swap(b);
829 }
830
831} // namespace std
832
833#if defined(_MSC_VER)
834# pragma warning (pop)
835#endif
836
837#endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP
Header for GeographicLib::Constants class.
Exception handling for GeographicLib.
Definition: Constants.hpp:316
Nearest-neighbor calculations.
void Statistics(int &setupcost, int &numsearches, int &searchcost, int &mincost, int &maxcost, double &mean, double &sd) const
friend std::istream & operator>>(std::istream &is, NearestNeighbor &t)
dist_t Search(const std::vector< pos_t > &pts, const distfun_t &dist, const pos_t &query, std::vector< int > &ind, int k=1, dist_t maxdist=std::numeric_limits< dist_t >::max(), dist_t mindist=-1, bool exhaustive=true, dist_t tol=0) const
void Initialize(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void Load(std::istream &is, bool bin=true)
void Save(std::ostream &os, bool bin=true) const
NearestNeighbor(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void swap(NearestNeighbor &t)
friend std::ostream & operator<<(std::ostream &os, const NearestNeighbor &t)
Namespace for GeographicLib.
Definition: Accumulator.cpp:12
void swap(GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &a, GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &b)