# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
This module contains functions for matching coordinate catalogs.
"""
import numpy as np
from astropy import units as u
from . import Angle
from .representation import UnitSphericalRepresentation
from .sky_coordinate import SkyCoord
__all__ = [
"match_coordinates_3d",
"match_coordinates_sky",
"search_around_3d",
"search_around_sky",
]
[docs]def match_coordinates_3d(
matchcoord, catalogcoord, nthneighbor=1, storekdtree="kdtree_3d"
):
"""
Finds the nearest 3-dimensional matches of a coordinate or coordinates in
a set of catalog coordinates.
This finds the 3-dimensional closest neighbor, which is only different
from the on-sky distance if ``distance`` is set in either ``matchcoord``
or ``catalogcoord``.
Parameters
----------
matchcoord : `~astropy.coordinates.BaseCoordinateFrame` or `~astropy.coordinates.SkyCoord`
The coordinate(s) to match to the catalog.
catalogcoord : `~astropy.coordinates.BaseCoordinateFrame` or `~astropy.coordinates.SkyCoord`
The base catalog in which to search for matches. Typically this will
be a coordinate object that is an array (i.e.,
``catalogcoord.isscalar == False``)
nthneighbor : int, optional
Which closest neighbor to search for. Typically ``1`` is desired here,
as that is correct for matching one set of coordinates to another.
The next likely use case is ``2``, for matching a coordinate catalog
against *itself* (``1`` is inappropriate because each point will find
itself as the closest match).
storekdtree : bool or str, optional
If a string, will store the KD-Tree used for the computation
in the ``catalogcoord``, as in ``catalogcoord.cache`` with the
provided name. This dramatically speeds up subsequent calls with the
same catalog. If False, the KD-Tree is discarded after use.
Returns
-------
idx : int array
Indices into ``catalogcoord`` to get the matched points for each
``matchcoord``. Shape matches ``matchcoord``.
sep2d : `~astropy.coordinates.Angle`
The on-sky separation between the closest match for each ``matchcoord``
and the ``matchcoord``. Shape matches ``matchcoord``.
dist3d : `~astropy.units.Quantity` ['length']
The 3D distance between the closest match for each ``matchcoord`` and
the ``matchcoord``. Shape matches ``matchcoord``.
Notes
-----
This function requires `SciPy <https://www.scipy.org/>`_ to be installed
or it will fail.
"""
if catalogcoord.isscalar or len(catalogcoord) < 1:
raise ValueError(
"The catalog for coordinate matching cannot be a scalar or length-0."
)
kdt = _get_cartesian_kdtree(catalogcoord, storekdtree)
# make sure coordinate systems match
if isinstance(matchcoord, SkyCoord):
matchcoord = matchcoord.transform_to(catalogcoord, merge_attributes=False)
else:
matchcoord = matchcoord.transform_to(catalogcoord)
# make sure units match
catunit = catalogcoord.cartesian.x.unit
matchxyz = matchcoord.cartesian.xyz.to(catunit)
matchflatxyz = matchxyz.reshape((3, np.prod(matchxyz.shape) // 3))
# Querying NaN returns garbage
if np.isnan(matchflatxyz.value).any():
raise ValueError("Matching coordinates cannot contain NaN entries.")
dist, idx = kdt.query(matchflatxyz.T, nthneighbor)
if nthneighbor > 1: # query gives 1D arrays if k=1, 2D arrays otherwise
dist = dist[:, -1]
idx = idx[:, -1]
sep2d = catalogcoord[idx].separation(matchcoord)
return (
idx.reshape(matchxyz.shape[1:]),
sep2d,
dist.reshape(matchxyz.shape[1:]) * catunit,
)
[docs]def match_coordinates_sky(
matchcoord, catalogcoord, nthneighbor=1, storekdtree="kdtree_sky"
):
"""
Finds the nearest on-sky matches of a coordinate or coordinates in
a set of catalog coordinates.
This finds the on-sky closest neighbor, which is only different from the
3-dimensional match if ``distance`` is set in either ``matchcoord``
or ``catalogcoord``.
Parameters
----------
matchcoord : `~astropy.coordinates.BaseCoordinateFrame` or `~astropy.coordinates.SkyCoord`
The coordinate(s) to match to the catalog.
catalogcoord : `~astropy.coordinates.BaseCoordinateFrame` or `~astropy.coordinates.SkyCoord`
The base catalog in which to search for matches. Typically this will
be a coordinate object that is an array (i.e.,
``catalogcoord.isscalar == False``)
nthneighbor : int, optional
Which closest neighbor to search for. Typically ``1`` is desired here,
as that is correct for matching one set of coordinates to another.
The next likely use case is ``2``, for matching a coordinate catalog
against *itself* (``1`` is inappropriate because each point will find
itself as the closest match).
storekdtree : bool or str, optional
If a string, will store the KD-Tree used for the computation
in the ``catalogcoord`` in ``catalogcoord.cache`` with the
provided name. This dramatically speeds up subsequent calls with the
same catalog. If False, the KD-Tree is discarded after use.
Returns
-------
idx : int array
Indices into ``catalogcoord`` to get the matched points for each
``matchcoord``. Shape matches ``matchcoord``.
sep2d : `~astropy.coordinates.Angle`
The on-sky separation between the closest match for each
``matchcoord`` and the ``matchcoord``. Shape matches ``matchcoord``.
dist3d : `~astropy.units.Quantity` ['length']
The 3D distance between the closest match for each ``matchcoord`` and
the ``matchcoord``. Shape matches ``matchcoord``. If either
``matchcoord`` or ``catalogcoord`` don't have a distance, this is the 3D
distance on the unit sphere, rather than a true distance.
Notes
-----
This function requires `SciPy <https://www.scipy.org/>`_ to be installed
or it will fail.
"""
if catalogcoord.isscalar or len(catalogcoord) < 1:
raise ValueError(
"The catalog for coordinate matching cannot be a scalar or length-0."
)
# send to catalog frame
if isinstance(matchcoord, SkyCoord):
newmatch = matchcoord.transform_to(catalogcoord, merge_attributes=False)
else:
newmatch = matchcoord.transform_to(catalogcoord)
# strip out distance info
match_urepr = newmatch.data.represent_as(UnitSphericalRepresentation)
newmatch_u = newmatch.realize_frame(match_urepr)
cat_urepr = catalogcoord.data.represent_as(UnitSphericalRepresentation)
newcat_u = catalogcoord.realize_frame(cat_urepr)
# Check for a stored KD-tree on the passed-in coordinate. Normally it will
# have a distinct name from the "3D" one, so it's safe to use even though
# it's based on UnitSphericalRepresentation.
storekdtree = catalogcoord.cache.get(storekdtree, storekdtree)
idx, sep2d, sep3d = match_coordinates_3d(
newmatch_u, newcat_u, nthneighbor, storekdtree
)
# sep3d is *wrong* above, because the distance information was removed,
# unless one of the catalogs doesn't have a real distance
if not (
isinstance(catalogcoord.data, UnitSphericalRepresentation)
or isinstance(newmatch.data, UnitSphericalRepresentation)
):
sep3d = catalogcoord[idx].separation_3d(newmatch)
# update the kdtree on the actual passed-in coordinate
if isinstance(storekdtree, str):
catalogcoord.cache[storekdtree] = newcat_u.cache[storekdtree]
elif storekdtree is True:
# the old backwards-compatible name
catalogcoord.cache["kdtree"] = newcat_u.cache["kdtree"]
return idx, sep2d, sep3d
[docs]def search_around_3d(coords1, coords2, distlimit, storekdtree="kdtree_3d"):
"""
Searches for pairs of points that are at least as close as a specified
distance in 3D space.
This is intended for use on coordinate objects with arrays of coordinates,
not scalars. For scalar coordinates, it is better to use the
``separation_3d`` methods.
Parameters
----------
coords1 : `~astropy.coordinates.BaseCoordinateFrame` or `~astropy.coordinates.SkyCoord`
The first set of coordinates, which will be searched for matches from
``coords2`` within ``seplimit``. Cannot be a scalar coordinate.
coords2 : `~astropy.coordinates.BaseCoordinateFrame` or `~astropy.coordinates.SkyCoord`
The second set of coordinates, which will be searched for matches from
``coords1`` within ``seplimit``. Cannot be a scalar coordinate.
distlimit : `~astropy.units.Quantity` ['length']
The physical radius to search within.
storekdtree : bool or str, optional
If a string, will store the KD-Tree used in the search with the name
``storekdtree`` in ``coords2.cache``. This speeds up subsequent calls
to this function. If False, the KD-Trees are not saved.
Returns
-------
idx1 : int array
Indices into ``coords1`` that matches to the corresponding element of
``idx2``. Shape matches ``idx2``.
idx2 : int array
Indices into ``coords2`` that matches to the corresponding element of
``idx1``. Shape matches ``idx1``.
sep2d : `~astropy.coordinates.Angle`
The on-sky separation between the coordinates. Shape matches ``idx1``
and ``idx2``.
dist3d : `~astropy.units.Quantity` ['length']
The 3D distance between the coordinates. Shape matches ``idx1`` and
``idx2``. The unit is that of ``coords1``.
Notes
-----
This function requires `SciPy <https://www.scipy.org/>`_
to be installed or it will fail.
If you are using this function to search in a catalog for matches around
specific points, the convention is for ``coords2`` to be the catalog, and
``coords1`` are the points to search around. While these operations are
mathematically the same if ``coords1`` and ``coords2`` are flipped, some of
the optimizations may work better if this convention is obeyed.
In the current implementation, the return values are always sorted in the
same order as the ``coords1`` (so ``idx1`` is in ascending order). This is
considered an implementation detail, though, so it could change in a future
release.
"""
if not distlimit.isscalar:
raise ValueError("distlimit must be a scalar in search_around_3d")
if coords1.isscalar or coords2.isscalar:
raise ValueError(
"One of the inputs to search_around_3d is a scalar. search_around_3d is"
" intended for use with array coordinates, not scalars. Instead, use"
" ``coord1.separation_3d(coord2) < distlimit`` to find the coordinates near"
" a scalar coordinate."
)
if len(coords1) == 0 or len(coords2) == 0:
# Empty array input: return empty match
return (
np.array([], dtype=int),
np.array([], dtype=int),
Angle([], u.deg),
u.Quantity([], coords1.distance.unit),
)
kdt2 = _get_cartesian_kdtree(coords2, storekdtree)
cunit = coords2.cartesian.x.unit
# we convert coord1 to match coord2's frame. We do it this way
# so that if the conversion does happen, the KD tree of coord2 at least gets
# saved. (by convention, coord2 is the "catalog" if that makes sense)
coords1 = coords1.transform_to(coords2)
kdt1 = _get_cartesian_kdtree(coords1, storekdtree, forceunit=cunit)
# this is the *cartesian* 3D distance that corresponds to the given angle
d = distlimit.to_value(cunit)
idxs1 = []
idxs2 = []
for i, matches in enumerate(kdt1.query_ball_tree(kdt2, d)):
for match in matches:
idxs1.append(i)
idxs2.append(match)
idxs1 = np.array(idxs1, dtype=int)
idxs2 = np.array(idxs2, dtype=int)
if idxs1.size == 0:
d2ds = Angle([], u.deg)
d3ds = u.Quantity([], coords1.distance.unit)
else:
d2ds = coords1[idxs1].separation(coords2[idxs2])
d3ds = coords1[idxs1].separation_3d(coords2[idxs2])
return idxs1, idxs2, d2ds, d3ds
[docs]def search_around_sky(coords1, coords2, seplimit, storekdtree="kdtree_sky"):
"""
Searches for pairs of points that have an angular separation at least as
close as a specified angle.
This is intended for use on coordinate objects with arrays of coordinates,
not scalars. For scalar coordinates, it is better to use the ``separation``
methods.
Parameters
----------
coords1 : coordinate-like
The first set of coordinates, which will be searched for matches from
``coords2`` within ``seplimit``. Cannot be a scalar coordinate.
coords2 : coordinate-like
The second set of coordinates, which will be searched for matches from
``coords1`` within ``seplimit``. Cannot be a scalar coordinate.
seplimit : `~astropy.units.Quantity` ['angle']
The on-sky separation to search within.
storekdtree : bool or str, optional
If a string, will store the KD-Tree used in the search with the name
``storekdtree`` in ``coords2.cache``. This speeds up subsequent calls
to this function. If False, the KD-Trees are not saved.
Returns
-------
idx1 : int array
Indices into ``coords1`` that matches to the corresponding element of
``idx2``. Shape matches ``idx2``.
idx2 : int array
Indices into ``coords2`` that matches to the corresponding element of
``idx1``. Shape matches ``idx1``.
sep2d : `~astropy.coordinates.Angle`
The on-sky separation between the coordinates. Shape matches ``idx1``
and ``idx2``.
dist3d : `~astropy.units.Quantity` ['length']
The 3D distance between the coordinates. Shape matches ``idx1``
and ``idx2``; the unit is that of ``coords1``.
If either ``coords1`` or ``coords2`` don't have a distance,
this is the 3D distance on the unit sphere, rather than a
physical distance.
Notes
-----
This function requires `SciPy <https://www.scipy.org/>`_
to be installed or it will fail.
In the current implementation, the return values are always sorted in the
same order as the ``coords1`` (so ``idx1`` is in ascending order). This is
considered an implementation detail, though, so it could change in a future
release.
"""
if not seplimit.isscalar:
raise ValueError("seplimit must be a scalar in search_around_sky")
if coords1.isscalar or coords2.isscalar:
raise ValueError(
"One of the inputs to search_around_sky is a scalar. search_around_sky is"
" intended for use with array coordinates, not scalars. Instead, use"
" ``coord1.separation(coord2) < seplimit`` to find the coordinates near a"
" scalar coordinate."
)
if len(coords1) == 0 or len(coords2) == 0:
# Empty array input: return empty match
if coords2.distance.unit == u.dimensionless_unscaled:
distunit = u.dimensionless_unscaled
else:
distunit = coords1.distance.unit
return (
np.array([], dtype=int),
np.array([], dtype=int),
Angle([], u.deg),
u.Quantity([], distunit),
)
# we convert coord1 to match coord2's frame. We do it this way
# so that if the conversion does happen, the KD tree of coord2 at least gets
# saved. (by convention, coord2 is the "catalog" if that makes sense)
coords1 = coords1.transform_to(coords2)
# strip out distance info
urepr1 = coords1.data.represent_as(UnitSphericalRepresentation)
ucoords1 = coords1.realize_frame(urepr1)
kdt1 = _get_cartesian_kdtree(ucoords1, storekdtree)
if storekdtree and coords2.cache.get(storekdtree):
# just use the stored KD-Tree
kdt2 = coords2.cache[storekdtree]
else:
# strip out distance info
urepr2 = coords2.data.represent_as(UnitSphericalRepresentation)
ucoords2 = coords2.realize_frame(urepr2)
kdt2 = _get_cartesian_kdtree(ucoords2, storekdtree)
if storekdtree:
# save the KD-Tree in coords2, *not* ucoords2
coords2.cache["kdtree" if storekdtree is True else storekdtree] = kdt2
# this is the *cartesian* 3D distance that corresponds to the given angle
r = (2 * np.sin(Angle(seplimit) / 2.0)).value
idxs1 = []
idxs2 = []
for i, matches in enumerate(kdt1.query_ball_tree(kdt2, r)):
for match in matches:
idxs1.append(i)
idxs2.append(match)
idxs1 = np.array(idxs1, dtype=int)
idxs2 = np.array(idxs2, dtype=int)
if idxs1.size == 0:
if coords2.distance.unit == u.dimensionless_unscaled:
distunit = u.dimensionless_unscaled
else:
distunit = coords1.distance.unit
d2ds = Angle([], u.deg)
d3ds = u.Quantity([], distunit)
else:
d2ds = coords1[idxs1].separation(coords2[idxs2])
try:
d3ds = coords1[idxs1].separation_3d(coords2[idxs2])
except ValueError:
# they don't have distances, so we just fall back on the cartesian
# distance, computed from d2ds
d3ds = 2 * np.sin(d2ds / 2.0)
return idxs1, idxs2, d2ds, d3ds
def _get_cartesian_kdtree(coord, attrname_or_kdt="kdtree", forceunit=None):
"""
This is a utility function to retrieve (and build/cache, if necessary)
a 3D cartesian KD-Tree from various sorts of astropy coordinate objects.
Parameters
----------
coord : `~astropy.coordinates.BaseCoordinateFrame` or `~astropy.coordinates.SkyCoord`
The coordinates to build the KD-Tree for.
attrname_or_kdt : bool or str or KDTree
If a string, will store the KD-Tree used for the computation in the
``coord``, in ``coord.cache`` with the provided name. If given as a
KD-Tree, it will just be used directly.
forceunit : unit or None
If a unit, the cartesian coordinates will convert to that unit before
being put in the KD-Tree. If None, whatever unit it's already in
will be used
Returns
-------
kdt : `~scipy.spatial.cKDTree` or `~scipy.spatial.KDTree`
The KD-Tree representing the 3D cartesian representation of the input
coordinates.
"""
from warnings import warn
# without scipy this will immediately fail
from scipy import spatial
try:
KDTree = spatial.cKDTree
except Exception:
warn(
"C-based KD tree not found, falling back on (much slower) "
"python implementation"
)
KDTree = spatial.KDTree
if attrname_or_kdt is True: # backwards compatibility for pre v0.4
attrname_or_kdt = "kdtree"
# figure out where any cached KDTree might be
if isinstance(attrname_or_kdt, str):
kdt = coord.cache.get(attrname_or_kdt, None)
if kdt is not None and not isinstance(kdt, KDTree):
raise TypeError(
f'The `attrname_or_kdt` "{attrname_or_kdt}" is not a scipy KD tree!'
)
elif isinstance(attrname_or_kdt, KDTree):
kdt = attrname_or_kdt
attrname_or_kdt = None
elif not attrname_or_kdt:
kdt = None
else:
raise TypeError(
"Invalid `attrname_or_kdt` argument for KD-Tree:" + str(attrname_or_kdt)
)
if kdt is None:
# need to build the cartesian KD-tree for the catalog
if forceunit is None:
cartxyz = coord.cartesian.xyz
else:
cartxyz = coord.cartesian.xyz.to(forceunit)
flatxyz = cartxyz.reshape((3, np.prod(cartxyz.shape) // 3))
# There should be no NaNs in the kdtree data.
if np.isnan(flatxyz.value).any():
raise ValueError("Catalog coordinates cannot contain NaN entries.")
try:
# Set compact_nodes=False, balanced_tree=False to use
# "sliding midpoint" rule, which is much faster than standard for
# many common use cases
kdt = KDTree(flatxyz.value.T, compact_nodes=False, balanced_tree=False)
except TypeError:
# Python implementation does not take compact_nodes and balanced_tree
# as arguments. However, it uses sliding midpoint rule by default
kdt = KDTree(flatxyz.value.T)
if attrname_or_kdt:
# cache the kdtree in `coord`
coord.cache[attrname_or_kdt] = kdt
return kdt