NEW ANSWER
from numba import njit
import pandas as pd
import numpy as np
@njit
def mean1(a):
n = len(a)
b = np.empty(n)
for i in range(n):
b[i] = a[i].mean()
return b
@njit
def std1(a):
n = len(a)
b = np.empty(n)
for i in range(n):
b[i] = a[i].std()
return b
@njit
def c(a, b):
''' Correlation '''
n, k = a.shape
m, k = b.shape
mu_a = mean1(a)
mu_b = mean1(b)
sig_a = std1(a)
sig_b = std1(b)
out = np.empty((n, m))
for i in range(n):
for j in range(m):
out[i, j] = (a[i] - mu_a[i]) @ (b[j] - mu_b[j]) / k / sig_a[i] / sig_b[j]
return out
r = df_test.rank(1).values
df_test.T.corr('spearman') == c(r, r)
OLD ANSWER
Doing a Spearman Rank correlation is simply doing a correlation of the ranks.
Rank
We can leverage argsort
to get ranks. Though the argsort
of the argsort
does get us the ranks, we can limit ourselves to one sort by slice assigning.
def rank(a):
i, j = np.meshgrid(*map(np.arange, a.shape), indexing='ij')
s = a.argsort(1)
out = np.empty_like(s)
out[i, s] = j
return out
Correlation
In the case of correlating ranks, the means and standard deviations are all predetermined by the size of the second dimension of the array.
You can accomplish this same thing without numba, but I'm assuming you want it.
from numba import njit
@njit
def c(a, b):
n, k = a.shape
m, k = b.shape
mu = (k - 1) / 2
sig = ((k - 1) * (k + 1) / 12) ** .5
out = np.empty((n, m))
a = a - mu
b = b - mu
for i in range(n):
for j in range(m):
out[i, j] = a[i] @ b[j] / k / sig ** 2
return out
For posterity, we could avoid the internal loop altogether but this might have memory issues.
@njit
def c1(a, b):
n, k = a.shape
m, k = b.shape
mu = (k - 1) / 2
sig = ((k - 1) * (k + 1) / 12) ** .5
a = a - mu
b = b - mu
return a @ b.T / k / sig ** 2
Demonstration
np.random.seed([3, 1415])
a = np.random.randn(2, 10)
b = np.random.randn(2, 10)
rank_a = rank(a)
rank_b = rank(b)
c(rank_a, rank_b)
array([[0.32121212, 0.01818182],
[0.13939394, 0.55151515]])
If you were working with DataFrame
da = pd.DataFrame(a)
db = pd.DataFrame(b)
pd.DataFrame(c(rank(da.values), rank(db.values)), da.index, db.index)
0 1
0 0.321212 0.018182
1 0.139394 0.551515
Validation
We can do a quick validation using pandas.DataFrame.corr
pd.DataFrame(a.T).corr('spearman') == c(rank_a, rank_a)
0 1
0 True True
1 True True