I tried to implement the Spearman's rank correlation coefficient (wiki) as a custom objective function for xgboost. I'm using the fast-soft-sort (github) package from google for the differentiable ranking and tensorflow to automatically calculate the gradients. You can find the code below:
from fast_soft_sort.tf_ops import soft_rank
import tensorflow as tf
import numpy as np
def pearson_corr(x, y):
xy_t = tf.concat([x, y], axis=0)
mean_t = tf.reduce_mean(xy_t, axis=1, keepdims=True)
cov_t = ((xy_t-mean_t) @ tf.transpose(xy_t-mean_t))/(x.shape[1]-1)
cov2_t = tf.linalg.diag(1/tf.sqrt(tf.linalg.diag_part(cov_t)))
corr_matrix = cov2_t @ cov_t @ cov2_t
corr = tf.reduce_mean(corr_matrix) * 2 - 1 # equivalent to taking element [0][1] assuming the 2x2 corr matrix is symmetric and the diagonals are 1
return corr
def spearman_corr(x, y):
ranks = soft_rank(x, regularization_strength=0.1)
corr = pearson_corr(ranks, y)
return corr
def get_value_grad_and_hess(x, y, f):
x_var = tf.Variable(x, dtype=tf.float32)
y_var = tf.Variable(y, dtype=tf.float32)
val, grad, hess = None, None, None
with tf.GradientTape() as t2:
with tf.GradientTape() as t1:
val = f(x_var, y_var)
grad = t1.gradient(val, x_var)
hess = t2.jacobian(grad, x_var)
return val, grad, hess
# test with random input
x = np.random.rand(1, 10) # predictions
y = np.random.rand(1, 10) # labels
print('pearson:')
val, grad, hess = get_value_grad_and_hess(x, y, pearson_corr)
print(' value:', val)
print(' gradient:', grad)
print(' hessian:', hess)
print('spearman:')
val, grad, hess = get_value_grad_and_hess(x, y, spearman_corr)
print(' value:', val)
print(' gradient:', grad)
print(' hessian:', hess)
Example output:
pearson:
value: tf.Tensor(-0.3348779, shape=(), dtype=float32)
gradient: tf.Tensor(
[[ 0.21893269 0.16921082 0.19409613 -0.00321923 0.07347419 0.29004234
-0.07947832 -0.7088071 0.29586902 -0.4501205 ]], shape=(1, 10), dtype=float32)
hessian: tf.Tensor(
[[[[ 0.04441248 -0.03097764 0.02028688 -0.20294864 -0.22516166
-0.09771542 -0.06334648 0.42131865 -0.02681065 0.16094248]]
[[-0.03097765 0.40132353 0.04399774 -0.07797898 -0.05632872
0.04975905 -0.07172927 -0.17790946 0.06856277 -0.14871901]]
[[ 0.02028689 0.04399772 0.44207606 -0.06522453 -0.03210837
0.0911998 -0.07974204 -0.30411014 0.10508882 -0.22146425]]
[[-0.20294863 -0.077979 -0.06522458 0.27985442 -0.12591925
-0.13325104 -0.02723934 0.31153008 -0.10839472 0.14957213]]
[[-0.22516167 -0.05632871 -0.03210838 -0.12591931 0.23029271
-0.10794277 -0.04108595 0.30121914 -0.07069567 0.12773061]]
[[-0.09771542 0.04975905 0.0911998 -0.13325103 -0.10794276
0.4497667 -0.09163402 -0.12746409 0.11477053 -0.14748882]]
[[-0.06334649 -0.07172926 -0.07974204 -0.02723937 -0.04108596
-0.09163402 0.35762674 0.07487351 -0.09705587 0.03933275]]
[[ 0.4213187 -0.17790946 -0.3041101 0.31153005 0.3012191
-0.12746407 0.07487351 -0.09769349 -0.2807703 -0.12099396]]
[[-0.02681071 0.06856281 0.1050889 -0.10839473 -0.07069571
0.11477058 -0.0970559 -0.28077024 0.5259669 -0.23066193]]
[[ 0.1609425 -0.14871901 -0.22146428 0.1495721 0.12773061
-0.14748883 0.03933276 -0.12099396 -0.23066193 0.39175004]]]], shape=(1, 10, 1, 10), dtype=float32)
spearman:
value: tf.Tensor(-0.3408205, shape=(), dtype=float32)
gradient: tf.Tensor(
[[ 0.13679196 0.13627169 0.15643153 -0.10963751 -0.02715444 0.2698098
0.20591483 -0.8303905 0.26787752 -0.20591483]], shape=(1, 10), dtype=float32)
hessian: None
As you can see the code above yields both gradient and hessian for the pearson correlation function but for the Spearman correlation the hessian is None.
Does someone have an idea why the hessian is None for the Spearman correlation?