-
Notifications
You must be signed in to change notification settings - Fork 44
Description
The current implementation of the circular median only returns correct results for well-behaved data. In particular, for an even number of data points, the line 141
min_idx = np.argsort(dm)[:2]
will take both points having the smallest left-to-right unbalance without checking that (1) these points are direct neighbours, (2) their unbalances cancel each other, (3) the unbalance is 1 for each, (4) their circular mean is really a median (depending on what is present on the opposite side of the circle, there could be a whole zoology of behaviours between two balancing points).
In addition, the condition
if m > 1:
warnings.warn('Ties detected in median computation')
doesn't make sense. You can have cases with m>1 without ties, e.g. [ 0, 1, 1, 2, 2, 3 ], and cases with m==1 and ties, e.g. [ 1, 1.2, 2, 2.2, 1+pi, 1.2+pi, 2+pi, 2.2+pi ].
I have tried to rewrite this function in a simple 1D-arrays case. It works for most cases and returns NaN when facing an unexpected case. The multiple case handling makes it messy though...
def delta_angle(alpha, beta):
"""
Difference (alpha - beta) around the circle. Input angles do not need to
be normalized in a given angle interval. Output is in (-pi,pi)
"""
return np.angle(np.exp(1j * alpha) / np.exp(1j * beta))
def circmedian( alpha ):
"""
Computes a median direction for circular data. alpha: 1-d array, in radians.
The algorithm is based
on finding the diameter that splits the point distribution in two (50% of
points on each side). The side of the diameter closest to the mean of the
data is returned. For even data sets, one doesn't guarantee that the returned
value will fall in the middle of two data points.
Output in (-pi,pi)
"""
# Checks
if alpha.ndim > 1:
raise Exception( 'circmedian only handles 1D arrays' )
# inits
alpha = alpha.ravel()
n = alpha.size
is_odd = (n % 2 == 1)
mean_alpha = circmean( alpha, low=-np.pi, high=np.pi, axis=0 ) # return values in [-pi,pi]
blnUseClosest = False # use candidate closest to mean if True
blnUseEps = False # use candidate +- eps if True
dd = delta_angle( alpha[ :, np.newaxis ], alpha[ np.newaxis, : ] ) # angular distance between all pair of points, in [-pi,pi]
m1 = np.sum( dd >= 0, 0 ) # For each point, number of points on one side of the point (itself included)
m2 = np.sum( dd <= 0, 0 ) # number of points on the other side of the point (itself included)
dm = m1 - m2 # signed unbalance between sides
dmabs = np.abs( dm )
min_dm = np.min( dmabs )
min_ind = np.argwhere( dmabs == min_dm ).squeeze() # 1D array
if is_odd:
if min_ind.size > 1:
# multiple solutions
blnUseClosest = True
if min_dm == 1:
blnUseEps = True
elif min_dm > 1:
return np.nan # hopeless
else:
if min_ind.size == 1:
# unpaired solution
if min_dm == 1:
blnUseEps = True
elif min_dm > 1:
return np.nan # hopeless
elif min_ind.size == 2 and np.sum( dm[ min_ind ] ) != 0:
# unpaired solution
blnUseClosest = True
elif min_ind.size > 2:
# multiple solutions
blnUseClosest = True
if blnUseClosest:
# Take the closest value to the circular mean.
# NB: for even number of points, one cannot be certain that the returned solution
# will be the closest one (only 1 point of the pair of points defining the median will be closest to the mean)
phase_shift = [ 0, np.pi ] # account for +-pi degeneracy of the median
ind_closest = [] # index of min_ind
dist_closest = []
for shift in phase_shift:
dist_to_mean = np.abs( delta_angle( alpha[ min_ind ], mean_alpha + shift ) )
ind_closest.append( np.argmin( dist_to_mean ) )
dist_closest.append( dist_to_mean[ ind_closest[-1] ] )
min_ind_closest = min_ind[ ind_closest[ np.argmin( dist_closest ) ] ]
if is_odd:
min_ind = [ min_ind_closest ]
elif dm[ min_ind_closest ] == 0:
# the closest point is already a solution, although the number of
# points is even. That means his partner point shares the same value (degenerate)
min_ind = [ min_ind_closest ]
else:
# Need to identify the 2nd (non-degenerate) side of the pair.
side1 = alpha[ min_ind_closest ]
delta_to_side1 = delta_angle( alpha, side1 )
sort_ind = np.argsort( delta_to_side1 )
sorted_delta_to_side1 = delta_to_side1[ sort_ind ]
ind_next_above = np.argmax( sorted_delta_to_side1 > 0 )
ind_next_below = np.argmin( sorted_delta_to_side1 < 0 ) - 1
if dm[ sort_ind[ ind_next_above ] ] == -dm[ min_ind_closest ]: # non-degenerate partner must have opposite sign of unbalance
min_ind = [ min_ind_closest, sort_ind[ ind_next_above ] ]
elif dm[ sort_ind[ ind_next_below ] ] == -dm[ min_ind_closest ]:
min_ind = [ sort_ind[ ind_next_below ], min_ind_closest ]
elif dmabs[ min_ind_closest ] == 1:
# Direct neighbours are not balancing partners, but a small
# shift would be enough
blnUseEps = True
min_ind = [ min_ind_closest ]
else:
# One may have a balancing pair further away from the mean
# (would need to find it) but min_dm > 1 anyway...
# A (costly) bisection algorithm could be used if necessary
return np.nan
if blnUseEps:
# If points are not close enough or grouped enough, one
# might have the case that consecutive balancing partners don't exist!
# Here, take eps left or right of point (so we get a median, but not halfway between 2 neighbours)
dd_eps = delta_angle( alpha, alpha[ min_ind ] + np.finfo(float).eps )
dm_eps = np.sum( dd_eps >= 0 ) - np.sum( dd_eps <= 0 ) # signed unbalance between sides
if dm_eps == 0:
med = alpha[ min_ind ] + np.finfo(float).eps
else:
med = alpha[ min_ind ] - np.finfo(float).eps
else:
if len( min_ind ) == 1:
# check that we really have the median
if dm[ min_ind[0] ] == 0:
med = alpha[ min_ind[0] ]
else:
return np.nan
else:
# compute the median and check it, because it could not be correct
# (e.g. if there are intermediate points, if min_dm > 1, ...)
med = circmean( alpha[ min_ind ] )
dd_med = delta_angle( alpha, med )
dm_med = np.sum( dd_med >= 0 ) - np.sum( dd_med <= 0 ) # signed unbalance between sides
if dm_med != 0:
return np.nan
# Remove +-pi degeneracy of median by taking solution closest to mean
if np.abs( delta_angle( med, mean_alpha ) ) > np.abs( delta_angle( med + np.pi, mean_alpha ) ):
med += np.pi
return ( med + np.pi ) % ( 2. * np.pi ) - np.pi # convert to (-pi,pi). NB: % 2pi returns in [0,2pi]