0

I got this script to pairplot a dataframe with seaborn. Instead of displaying the pearsonr, I'd like to square it and display the r².

And to display the lineregress equation on each plot.

import numpy as np
from lecture_fichier import lire_tableau
np.set_printoptions(precision=4, suppress=True)
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

##pearsonr
def r2(x, y, ax=None, **kws):
    """Plot the correlation coefficient in the top left hand corner of a plot."""
    r, _ = pearsonr(x, y)
    ax = ax or plt.gca()
    ax.annotate(f'r² = {r:.2F}', xy=(.1, .9), xycoords=ax.transAxes)

##array datas 
E = lire_tableau("data/Mv.txt")
liste1 = np.array(E).ravel().tolist()
F = lire_tableau("data/Es.txt")
liste2 = np.array(F).ravel().tolist()
G = lire_tableau("data/Ed.txt")
liste3 = np.array(G).ravel().tolist()
T = lire_tableau("data/Ef.txt")
liste4 = np.array(T).ravel().tolist()

## dataframe
df = pd.DataFrame(list(zip(liste1, liste2, liste3, liste4)),
                  columns=['Mv', 'f stat', 't dyn', 'f dyn'])

g = sn.pairplot(df, kind='reg', diag_kind='kde', height=1.5)
g.map_lower(r2)
# remove upper triangle plots
for i, j in zip(*np.triu_indices_from(g.axes, 1)):
    g.axes[i, j].set_visible(False)
plt.show()
JohanC
  • 71,591
  • 8
  • 33
  • 66
Spero42
  • 3
  • 3

1 Answers1

1

Following the ideas of How to get the numerical fitting results when plotting a regression? , you could calculate the slope, intercept and r value with scipy.stats.linregress().

Optionally, the text can be displayed with a contrasting color and a semi-transparent background.

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import linregress

def r2(x, y, ax=None, **kws):
    ax = ax or plt.gca()
    slope, intercept, r_value, p_value, std_err = linregress(x=x, y=y)
    ax.annotate(f'$r^2 = {r_value ** 2:.2f}$\nEq: ${slope:.2f}x{intercept:+.2f}$',
                xy=(.05, .95), xycoords=ax.transAxes, fontsize=8,
                color='darkred', backgroundcolor='#FFFFFF99', ha='left', va='top')

# generate some dummy test data
liste1 = np.random.randn(100, 5).cumsum(axis=0).ravel()
liste2 = np.random.randn(100, 5).cumsum(axis=0).ravel()
liste3 = np.random.randn(100, 5).cumsum(axis=0).ravel()
liste4 = np.random.randn(100, 5).cumsum(axis=0).ravel()

df = pd.DataFrame(list(zip(liste1, liste2, liste3, liste4)),
                  columns=['Mv', 'f stat', 't dyn', 'f dyn'])

g = sns.pairplot(df, kind='reg', diag_kind='kde', height=2,
                 plot_kws={'line_kws': {'color': 'black'}})
g.map_lower(r2)
for i, j in zip(*np.triu_indices_from(g.axes, 1)):
    g.axes[i, j].set_visible(False)
plt.show()

seaborn pairplot regression with pearson r, slope and intercept

JohanC
  • 71,591
  • 8
  • 33
  • 66