# Source code for astropy.stats.circstats

```
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
This module contains simple functions for dealing with circular statistics, for
instance, mean, variance, standard deviation, correlation coefficient, and so
on. This module also cover tests of uniformity, e.g., the Rayleigh and V tests.
The Maximum Likelihood Estimator for the Von Mises distribution along with the
Cramer-Rao Lower Bounds are also implemented. Almost all of the implementations
are based on reference [1]_, which is also the basis for the R package
'CircStats' [2]_.
"""
import numpy as np
from astropy.units import Quantity
__all__ = ['circmean', 'circstd', 'circvar', 'circmoment', 'circcorrcoef',
'rayleightest', 'vtest', 'vonmisesmle']
__doctest_requires__ = {'vtest': ['scipy']}
def _components(data, p=1, phi=0.0, axis=None, weights=None):
# Utility function for computing the generalized rectangular components
# of the circular data.
if weights is None:
weights = np.ones((1,))
try:
weights = np.broadcast_to(weights, data.shape)
except ValueError:
raise ValueError('Weights and data have inconsistent shape.')
C = np.sum(weights * np.cos(p * (data - phi)), axis)/np.sum(weights, axis)
S = np.sum(weights * np.sin(p * (data - phi)), axis)/np.sum(weights, axis)
return C, S
def _angle(data, p=1, phi=0.0, axis=None, weights=None):
# Utility function for computing the generalized sample mean angle
C, S = _components(data, p, phi, axis, weights)
# theta will be an angle in the interval [-np.pi, np.pi)
# [-180, 180)*u.deg in case data is a Quantity
theta = np.arctan2(S, C)
if isinstance(data, Quantity):
theta = theta.to(data.unit)
return theta
def _length(data, p=1, phi=0.0, axis=None, weights=None):
# Utility function for computing the generalized sample length
C, S = _components(data, p, phi, axis, weights)
return np.hypot(S, C)
[docs]def circmean(data, axis=None, weights=None):
""" Computes the circular mean angle of an array of circular data.
Parameters
----------
data : numpy.ndarray or Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
axis : int, optional
Axis along which circular means are computed. The default is to compute
the mean of the flattened array.
weights : numpy.ndarray, optional
In case of grouped data, the i-th element of ``weights`` represents a
weighting factor for each group such that ``sum(weights, axis)``
equals the number of observations. See [1]_, remark 1.4, page 22, for
detailed explanation.
Returns
-------
circmean : numpy.ndarray or Quantity
Circular mean.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import circmean
>>> from astropy import units as u
>>> data = np.array([51, 67, 40, 109, 31, 358])*u.deg
>>> circmean(data) # doctest: +FLOAT_CMP
<Quantity 48.62718088722989 deg>
References
----------
.. [1] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
"""
return _angle(data, 1, 0.0, axis, weights)
[docs]def circvar(data, axis=None, weights=None):
""" Computes the circular variance of an array of circular data.
There are some concepts for defining measures of dispersion for circular
data. The variance implemented here is based on the definition given by
[1]_, which is also the same used by the R package 'CircStats' [2]_.
Parameters
----------
data : numpy.ndarray or dimensionless Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
axis : int, optional
Axis along which circular variances are computed. The default is to
compute the variance of the flattened array.
weights : numpy.ndarray, optional
In case of grouped data, the i-th element of ``weights`` represents a
weighting factor for each group such that ``sum(weights, axis)``
equals the number of observations. See [1]_, remark 1.4, page 22,
for detailed explanation.
Returns
-------
circvar : numpy.ndarray or dimensionless Quantity
Circular variance.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import circvar
>>> from astropy import units as u
>>> data = np.array([51, 67, 40, 109, 31, 358])*u.deg
>>> circvar(data) # doctest: +FLOAT_CMP
<Quantity 0.16356352748437508>
References
----------
.. [1] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
Notes
-----
The definition used here differs from the one in scipy.stats.circvar.
Precisely, Scipy circvar uses an approximation based on the limit of small
angles which approaches the linear variance.
"""
return 1.0 - _length(data, 1, 0.0, axis, weights)
[docs]def circstd(data, axis=None, weights=None, method='angular'):
""" Computes the circular standard deviation of an array of circular data.
The standard deviation implemented here is based on the definitions given
by [1]_, which is also the same used by the R package 'CirStat' [2]_.
Two methods are implemented: 'angular' and 'circular'. The former is
defined as sqrt(2 * (1 - R)) and it is bounded in [0, 2*Pi]. The
latter is defined as sqrt(-2 * ln(R)) and it is bounded in [0, inf].
Following 'CircStat' the default method used to obtain the standard
deviation is 'angular'.
Parameters
----------
data : numpy.ndarray or dimensionless Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
axis : int, optional
Axis along which circular variances are computed. The default is to
compute the variance of the flattened array.
weights : numpy.ndarray, optional
In case of grouped data, the i-th element of ``weights`` represents a
weighting factor for each group such that ``sum(weights, axis)``
equals the number of observations. See [3]_, remark 1.4, page 22,
for detailed explanation.
method : str, optional
The method used to estimate the standard deviation:
- 'angular' : obtains the angular deviation
- 'circular' : obtains the circular deviation
Returns
-------
circstd : numpy.ndarray or dimensionless Quantity
Angular or circular standard deviation.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import circstd
>>> from astropy import units as u
>>> data = np.array([51, 67, 40, 109, 31, 358])*u.deg
>>> circstd(data) # doctest: +FLOAT_CMP
<Quantity 0.57195022>
Alternatively, using the 'circular' method:
>>> import numpy as np
>>> from astropy.stats import circstd
>>> from astropy import units as u
>>> data = np.array([51, 67, 40, 109, 31, 358])*u.deg
>>> circstd(data, method='circular') # doctest: +FLOAT_CMP
<Quantity 0.59766999>
References
----------
.. [1] P. Berens. "CircStat: A MATLAB Toolbox for Circular Statistics".
Journal of Statistical Software, vol 31, issue 10, 2009.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
.. [3] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
"""
if method not in ('angular', 'circular'):
raise ValueError("method should be either 'angular' or 'circular'")
if method == 'angular':
return np.sqrt(2. * (1. - _length(data, 1, 0.0, axis, weights)))
else:
return np.sqrt(-2. * np.log(_length(data, 1, 0.0, axis, weights)))
[docs]def circmoment(data, p=1.0, centered=False, axis=None, weights=None):
""" Computes the ``p``-th trigonometric circular moment for an array
of circular data.
Parameters
----------
data : numpy.ndarray or Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
p : float, optional
Order of the circular moment.
centered : bool, optional
If ``True``, central circular moments are computed. Default value is
``False``.
axis : int, optional
Axis along which circular moments are computed. The default is to
compute the circular moment of the flattened array.
weights : numpy.ndarray, optional
In case of grouped data, the i-th element of ``weights`` represents a
weighting factor for each group such that ``sum(weights, axis)``
equals the number of observations. See [1]_, remark 1.4, page 22,
for detailed explanation.
Returns
-------
circmoment : numpy.ndarray or Quantity
The first and second elements correspond to the direction and length of
the ``p``-th circular moment, respectively.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import circmoment
>>> from astropy import units as u
>>> data = np.array([51, 67, 40, 109, 31, 358])*u.deg
>>> circmoment(data, p=2) # doctest: +FLOAT_CMP
(<Quantity 90.99263082432564 deg>, <Quantity 0.48004283892950717>)
References
----------
.. [1] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
"""
if centered:
phi = circmean(data, axis, weights)
else:
phi = 0.0
return _angle(data, p, phi, axis, weights), _length(data, p, phi, axis,
weights)
[docs]def circcorrcoef(alpha, beta, axis=None, weights_alpha=None,
weights_beta=None):
""" Computes the circular correlation coefficient between two array of
circular data.
Parameters
----------
alpha : numpy.ndarray or Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
beta : numpy.ndarray or Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
axis : int, optional
Axis along which circular correlation coefficients are computed.
The default is the compute the circular correlation coefficient of the
flattened array.
weights_alpha : numpy.ndarray, optional
In case of grouped data, the i-th element of ``weights_alpha``
represents a weighting factor for each group such that
``sum(weights_alpha, axis)`` equals the number of observations.
See [1]_, remark 1.4, page 22, for detailed explanation.
weights_beta : numpy.ndarray, optional
See description of ``weights_alpha``.
Returns
-------
rho : numpy.ndarray or dimensionless Quantity
Circular correlation coefficient.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import circcorrcoef
>>> from astropy import units as u
>>> alpha = np.array([356, 97, 211, 232, 343, 292, 157, 302, 335, 302,
... 324, 85, 324, 340, 157, 238, 254, 146, 232, 122,
... 329])*u.deg
>>> beta = np.array([119, 162, 221, 259, 270, 29, 97, 292, 40, 313, 94,
... 45, 47, 108, 221, 270, 119, 248, 270, 45, 23])*u.deg
>>> circcorrcoef(alpha, beta) # doctest: +FLOAT_CMP
<Quantity 0.2704648826748831>
References
----------
.. [1] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
"""
if(np.size(alpha, axis) != np.size(beta, axis)):
raise ValueError("alpha and beta must be arrays of the same size")
mu_a = circmean(alpha, axis, weights_alpha)
mu_b = circmean(beta, axis, weights_beta)
sin_a = np.sin(alpha - mu_a)
sin_b = np.sin(beta - mu_b)
rho = np.sum(sin_a*sin_b)/np.sqrt(np.sum(sin_a*sin_a)*np.sum(sin_b*sin_b))
return rho
[docs]def rayleightest(data, axis=None, weights=None):
""" Performs the Rayleigh test of uniformity.
This test is used to identify a non-uniform distribution, i.e. it is
designed for detecting an unimodal deviation from uniformity. More
precisely, it assumes the following hypotheses:
- H0 (null hypothesis): The population is distributed uniformly around the
circle.
- H1 (alternative hypothesis): The population is not distributed uniformly
around the circle.
Small p-values suggest to reject the null hypothesis.
Parameters
----------
data : numpy.ndarray or Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
axis : int, optional
Axis along which the Rayleigh test will be performed.
weights : numpy.ndarray, optional
In case of grouped data, the i-th element of ``weights`` represents a
weighting factor for each group such that ``np.sum(weights, axis)``
equals the number of observations.
See [1]_, remark 1.4, page 22, for detailed explanation.
Returns
-------
p-value : float or dimensionless Quantity
p-value.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import rayleightest
>>> from astropy import units as u
>>> data = np.array([130, 90, 0, 145])*u.deg
>>> rayleightest(data) # doctest: +FLOAT_CMP
<Quantity 0.2563487733797317>
References
----------
.. [1] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
.. [3] M. Chirstman., C. Miller. "Testing a Sample of Directions for
Uniformity." Lecture Notes, STA 6934/5805. University of Florida, 2007.
.. [4] D. Wilkie. "Rayleigh Test for Randomness of Circular Data". Applied
Statistics. 1983.
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.211.4762>
"""
n = np.size(data, axis=axis)
Rbar = _length(data, 1, 0.0, axis, weights)
z = n*Rbar*Rbar
# see [3] and [4] for the formulae below
tmp = 1.0
if(n < 50):
tmp = 1.0 + (2.0*z - z*z)/(4.0*n) - (24.0*z - 132.0*z**2.0 +
76.0*z**3.0 - 9.0*z**4.0)/(288.0 *
n * n)
p_value = np.exp(-z)*tmp
return p_value
[docs]def vtest(data, mu=0.0, axis=None, weights=None):
""" Performs the Rayleigh test of uniformity where the alternative
hypothesis H1 is assumed to have a known mean angle ``mu``.
Parameters
----------
data : numpy.ndarray or Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
mu : float or Quantity, optional
Mean angle. Assumed to be known.
axis : int, optional
Axis along which the V test will be performed.
weights : numpy.ndarray, optional
In case of grouped data, the i-th element of ``weights`` represents a
weighting factor for each group such that ``sum(weights, axis)``
equals the number of observations. See [1]_, remark 1.4, page 22,
for detailed explanation.
Returns
-------
p-value : float or dimensionless Quantity
p-value.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import vtest
>>> from astropy import units as u
>>> data = np.array([130, 90, 0, 145])*u.deg
>>> vtest(data) # doctest: +FLOAT_CMP
<Quantity 0.6223678199713766>
References
----------
.. [1] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
.. [3] M. Chirstman., C. Miller. "Testing a Sample of Directions for
Uniformity." Lecture Notes, STA 6934/5805. University of Florida, 2007.
"""
from scipy.stats import norm
if weights is None:
weights = np.ones((1,))
try:
weights = np.broadcast_to(weights, data.shape)
except ValueError:
raise ValueError('Weights and data have inconsistent shape.')
n = np.size(data, axis=axis)
R0bar = np.sum(weights * np.cos(data - mu), axis)/np.sum(weights, axis)
z = np.sqrt(2.0 * n) * R0bar
pz = norm.cdf(z)
fz = norm.pdf(z)
# see reference [3]
p_value = 1 - pz + fz*((3*z - z**3)/(16.0*n) +
(15*z + 305*z**3 - 125*z**5 + 9*z**7)/(4608.0*n*n))
return p_value
def _A1inv(x):
# Approximation for _A1inv(x) according R Package 'CircStats'
# See http://www.scienceasia.org/2012.38.n1/scias38_118.pdf, equation (4)
if 0 <= x < 0.53:
return 2.0*x + x*x*x + (5.0*x**5)/6.0
elif x < 0.85:
return -0.4 + 1.39*x + 0.43/(1.0 - x)
else:
return 1.0/(x*x*x - 4.0*x*x + 3.0*x)
[docs]def vonmisesmle(data, axis=None):
""" Computes the Maximum Likelihood Estimator (MLE) for the parameters of
the von Mises distribution.
Parameters
----------
data : numpy.ndarray or Quantity
Array of circular (directional) data, which is assumed to be in
radians whenever ``data`` is ``numpy.ndarray``.
axis : int, optional
Axis along which the mle will be computed.
Returns
-------
mu : float or Quantity
the mean (aka location parameter).
kappa : float or dimensionless Quantity
the concentration parameter.
Examples
--------
>>> import numpy as np
>>> from astropy.stats import vonmisesmle
>>> from astropy import units as u
>>> data = np.array([130, 90, 0, 145])*u.deg
>>> vonmisesmle(data) # doctest: +FLOAT_CMP
(<Quantity 101.16894320013179 deg>, <Quantity 1.49358958737054>)
References
----------
.. [1] S. R. Jammalamadaka, A. SenGupta. "Topics in Circular Statistics".
Series on Multivariate Analysis, Vol. 5, 2001.
.. [2] C. Agostinelli, U. Lund. "Circular Statistics from 'Topics in
Circular Statistics (2001)'". 2015.
<https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
"""
mu = circmean(data, axis=None)
kappa = _A1inv(np.mean(np.cos(data - mu), axis))
return mu, kappa
```