import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
def fractile(l, p, C):
"""Find the p'th fractile of l for various interpolation schemes.
All schemes are linear but vary in their definition of the range
of the CDF.
Args:
l (numpy.array): The array of numbers to find fractiles for.
p (numpy.array): The fractiles of interest (as floats between 0 and 1).
C (float): Interpolation scheme, given as a float between 0 and 1.
0 corresponds to what Excel's PERCENTILE.EXC calculates.
1/2 corresponds to what MATLAB does.
1 corresponds to what NumPy/Excel's PERCENTILE.INC calculates."""
def get_index(N, p, C):
the_index = (N + 1 - 2*C)*p + C - 1
return the_index.clip(0, N-1)
s = np.sort(l)
x = get_index(len(s), p, C)
s = np.append(s, 0) # For C < 1, we add a dummy value to s to take care of the case x = N-1.
return s[np.int_(x)]*(1-x%1) + s[np.int_(x)+1]*(x%1)
x = np.array([15, 20, 35, 40, 50])
p = np.arange(0, 100.1, 0.1)
for v in (0, 0.5, 1):
plt.plot(p, fractile(x, p/100, v))
plt.title('Three different linear interpolation schemes')
plt.xlabel('Percent rank')
plt.ylabel('Percentile value')
plt.xlim(0, 100)
plt.ylim(14, 51)
plt.legend(['$C = 0$', '$C = 1/2$', '$C = 1$'], loc='lower right')
plt.savefig('percentile_interpolation.png', dpi=400)