community, I have done some copy-&-paste and came up with the code below. Nevertheless the scientific notation for the numbers in the y-axis and also the ANOVA test is not working. Any ideas on why? I think it might be some redundancy or conflict, but I can't find it myself. I'm running the code below in jupyter notebook, but assuming it wouldn't affect the code to work.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import f_oneway
import string
import matplotlib.ticker as mticker
# Create the new dataframe
data = {
'a': [533330.45, 488550.63, 433338.27, 63335.47, 613338.78, 658029.33, 423276.31, 486775.20, 465138.94],
'b': [103336.32, 93337.35, 93336.19, 233305.77, 283336.51, 152925.39, 331697.83, 299425.14, 274197.59],
'c': [252919.78, 193330.26, 205333.11, 134333.34, 123332.30, 123336.85, 153332.33, 195452.55, 167399.20],
'd': [44133.07, 43963.73, 35705.67, 135454.11, 137261.84, 152548.42, 233336.48, 213787.00, 257127.76],
'e': [38697.59, 43337.02, 33331.53, 34333.28, 63332.51, 43338.29, 34333.43, 42793.33, 63334.84]
}
df = pd.DataFrame(data)
color_palette = {
'a': 'green',
'b': 'lightgreen',
'c': 'yellow',
'd': 'lightyellow',
'e': 'red'
}
# Create a figure with desired dimensions
plt.figure(figsize=(14, 10))
# Create the box plot using Seaborn with color code
ax = sns.boxplot(data=df, palette=color_palette)
# Add stripplot to display all data points
sns.stripplot(data=df, color="black", ax=ax, jitter=True, dodge=True, alpha=0.5)
# Add faint horizontal grid lines
ax.yaxis.grid(True, linestyle='dotted', linewidth=0.5, color='lightgray')
# Increase font size of y- and x-axis labels
ax.set_ylabel('values', fontsize=18)
ax.yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=True))
ax.yaxis.get_offset_text().set_fontsize(12) # Set font size for the offset (e.g., '1e6')
# Get the p-value from ANOVA test
_, p_value = f_oneway(df['a'], df['b'], df['c'], df['d'], df['e'])
# Add ANOVA test result above the plot
significance = "*" if p_value < 0.05 else "ns"
y_position = df.values.max() + 50000 # Adjust y position
ax.annotate(f'ANOVA p-value: {p_value:.4f} {significance}', xy=(0.5, y_position), fontsize=12, ha='center')
# Set the x-axis tick labels
ax.set_xticklabels(['a', 'b', 'c', 'd', 'e'], fontsize=20)
# plt.tight_layout()
# Save the figure
# plt.savefig('box_plot_with_anova.png', dpi=360)
plt.show()