I have a dataset (dataset at end of question) of 2 Groups (group1, group2) of 4 scatter measurements each (R1,R2,R3,R4). I want to plot:
- Scatter of each measurement (R1, R2, R3, R4)
- The Trendline of each measurement (R1,R2, R3, R4)
- The Average line for each group (group1, group2)
- Filled colored error area for each average line (average line +- stddev)
Here is what the plot should look like
As you can see from the plot above, I have successfully plotted it (dataset and working example below); however the code is very long, maybe there is a simpler way in plotly to do this? I think that "Error area, trendlines for 2 groups" is something that a lot of people would do.
Code
import pandas as pd
#model fitting
from scipy import stats
import numpy as np
#plotting
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
#import the data
df = pd.read_csv("/content/dfs.csv", sep=None)
#1 Fun: fit the data
def model(df_):
y = df_["y"].values
X = df_["x"].values
mask = ~np.isnan(X) & ~np.isnan(y)
#eliminate values with nan #! this shouldn't exist in my case ?
reg = stats.linregress(X[mask], y[mask])
return (reg)
#2 Fun: make average fit and upper lower bounds
def get_average_line (df_, results_ ):
name_ = df_["Group"].iloc[0]
r=results_.query("Group == @name_").describe() #This works but is a patch
x = df_["x"].unique()
y=r["m"]["mean"]*x + r["x"]["mean"]
y_upper = (r["m"]["mean"]+r["m"]["std"]) *x + (r["x"]["mean"]+r["x"]["std"])
y_lower = (r["m"]["mean"]-r["m"]["std"]) *x + (r["x"]["mean"]-r["x"]["std"])
average_line = pd.DataFrame({"x": x, "y":y, "y_upper":y_upper, "y_lower":y_lower})
return average_line
#5 plot the upper and lower bound
def plot_avearge_lines (df_, name_line ="Group", row=1, col=1):
#Assume a figure is already started
#name to use in addition for plotting
name = " " + df_["Group"].iloc[0]
fig.add_trace(go.Scatter(
name='Average' + name,
x=df_["x"],
y=df_["y"],
mode='lines',
line=dict(color='rgb(31, 119, 180)'),
legendgroup=name
),
row=row, col=col)
#uppet bound
fig.add_trace(go.Scatter(
name='Upper Bound'+ name ,
x=df_["x"],
y=df_['y_upper'],
mode='lines',
marker=dict(color="#444"),
line=dict(width=0),
showlegend=False,
legendgroup=name
),
row=row, col=col)
#lower bound - fill = tonexty will make it fill until line before it
fig.add_trace(go.Scatter(
name='Lower Bound' + name ,
x=df_["x"],
y=df_['y_lower'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(68, 68, 68, 0.3)',
fill='tonexty',
showlegend=False,
legendgroup=name
),
row=row, col=row)
#scatter of each line (R1, R2, R3, R4)
def plot_scatter_and_fit (df_ , color ="name", row=1, col=1):
for t in px.scatter(df_, x="x", y="y", color=color, title='', trendline="ols" ).data:
fig.add_trace(t, row=1, col=1)
##### MAIN ##########
#1fit the data
df_grouped_Group_mes = df.groupby(["Group", "Mes"])
results = df_grouped_Group_mes.apply(model).reset_index().rename(columns={0:'fit_results'})
results = pd.concat([results, pd.DataFrame([(reg.intercept, reg.slope, reg.rvalue, reg.pvalue, reg.stderr) for reg in results["fit_results"]], columns=["x", "m", "r-value", "p-value", "y-std"])], axis=1)
#2 make average fit and upper lower bounds
df_grouped_Group = df.groupby(["Group"])
avearge_lines = df_grouped_Group.apply(get_average_line, results_=results).reset_index()
#3 plot a figure with..
fig = make_subplots(
rows=1,
cols=1,
shared_xaxes=False,
vertical_spacing=0.03,
specs=[[{"type": "scatter"}]], #, {"type": "table"}
)
#4 scatter of each line (R1, R2, R3, R4)
df_grouped_Group.apply(plot_scatter_and_fit, color ="name", row=1, col=1)
#5 plot the upper and lower bound
averge_line_grouped_name = avearge_lines.groupby(["Group"])
averge_line_grouped_name.apply(plot_avearge_lines, name_line ="name", row=1, col=1)
#layout
fig.update_layout(
yaxis_title='y',
xaxis_title ="x",
title='error area and trendlines for 2 groups of scatters series',
width=800,
height=800,
hovermode="x",
xaxis = dict(
tickmode = 'array',
tickvals = df["x"]
),
)
fig.show()
Dataset
Group Mes name x y
group1 R1 group1-R1 2.00 3.0
group1 R1 group1-R1 13.00 50.0
group1 R1 group1-R1 25.00 78.0
group1 R1 group1-R1 37.00 130.0
group2 R1 group2-R1 2.00 35.0
group2 R1 group2-R1 6.00 63.0
group2 R1 group2-R1 10.00 93.0
group2 R1 group2-R1 22.00 323.0
group2 R1 group2-R1 34.00 455.0
group1 R2 group1-R2 5.00 16.0
group1 R2 group1-R2 16.00 76.0
group1 R2 group1-R2 28.00 110.0
group1 R2 group1-R2 40.00 153.0
group2 R2 group2-R2 3.00 47.0
group2 R2 group2-R2 7.13 68.0
group2 R2 group2-R2 13.00 111.0
group2 R2 group2-R2 25.00 353.0
group2 R2 group2-R2 37.00 493.0
group1 R3 group1-R3 8.00 45.0
group1 R3 group1-R3 19.00 82.0
group1 R3 group1-R3 31.00 128.0
group2 R3 group2-R3 0.00 12.0
group2 R3 group2-R3 4.00 53.0
group2 R3 group2-R3 8.00 80.0
group2 R3 group2-R3 16.00 121.0
group2 R3 group2-R3 28.00 394.0
group2 R3 group2-R3 40.00 521.0
group1 R4 group1-R4 11.00 45.0
group1 R4 group1-R4 22.00 90.0
group1 R4 group1-R4 34.00 128.0
group2 R4 group2-R4 1.00 13.0
group2 R4 group2-R4 5.00 71.0
group2 R4 group2-R4 9.00 NaN
group2 R4 group2-R4 19.00 139.0
group2 R4 group2-R4 31.00 400.0