0

community, I have done some copy-&-paste and came up with the code below. Nevertheless the scientific notation for the numbers in the y-axis and also the ANOVA test is not working. Any ideas on why? I think it might be some redundancy or conflict, but I can't find it myself. I'm running the code below in jupyter notebook, but assuming it wouldn't affect the code to work.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import f_oneway
import string
import matplotlib.ticker as mticker

# Create the new dataframe
data = {
    'a': [533330.45, 488550.63, 433338.27, 63335.47, 613338.78, 658029.33, 423276.31, 486775.20, 465138.94],
    'b': [103336.32, 93337.35, 93336.19, 233305.77, 283336.51, 152925.39, 331697.83, 299425.14, 274197.59],
    'c': [252919.78, 193330.26, 205333.11, 134333.34, 123332.30, 123336.85, 153332.33, 195452.55, 167399.20],
    'd': [44133.07, 43963.73, 35705.67, 135454.11, 137261.84, 152548.42, 233336.48, 213787.00, 257127.76],
    'e': [38697.59, 43337.02, 33331.53, 34333.28, 63332.51, 43338.29, 34333.43, 42793.33, 63334.84]
}

df = pd.DataFrame(data)

color_palette = {
    'a': 'green',
    'b': 'lightgreen',
    'c': 'yellow',
    'd': 'lightyellow',
    'e': 'red'
}

# Create a figure with desired dimensions
plt.figure(figsize=(14, 10))

# Create the box plot using Seaborn with color code
ax = sns.boxplot(data=df, palette=color_palette)

# Add stripplot to display all data points
sns.stripplot(data=df, color="black", ax=ax, jitter=True, dodge=True, alpha=0.5)


# Add faint horizontal grid lines
ax.yaxis.grid(True, linestyle='dotted', linewidth=0.5, color='lightgray')



# Increase font size of y- and x-axis labels
ax.set_ylabel('values', fontsize=18)
ax.yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=True))
ax.yaxis.get_offset_text().set_fontsize(12)  # Set font size for the offset (e.g., '1e6')


# Get the p-value from ANOVA test
_, p_value = f_oneway(df['a'], df['b'], df['c'], df['d'], df['e'])

# Add ANOVA test result above the plot
significance = "*" if p_value < 0.05 else "ns"
y_position = df.values.max() + 50000  # Adjust y position
ax.annotate(f'ANOVA p-value: {p_value:.4f} {significance}', xy=(0.5, y_position), fontsize=12, ha='center')

# Set the x-axis tick labels
ax.set_xticklabels(['a', 'b', 'c', 'd', 'e'], fontsize=20)

# plt.tight_layout()

# Save the figure
# plt.savefig('box_plot_with_anova.png', dpi=360)

plt.show()

1 Answers1

0

Adding the scientific notation can be done by grabbing the tick information and adjusting the formatting to be what you want using the Python String Format Cookbook examples. I have adjusted your section of code named '# Increase font size of y- and x-axis labels' to add making the new ticks with exponent notation. With that new code, I'm not seeing a need for the line ax.yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=True)) or the one following it. I adjusted the label for the y axis itself larger, too.

I've used ax.text() with Seaborn and it works for annotation.

Finally, I was seeing a warning about the use of color="black" use for the stripplot and so I updated that to be the recommended usage as well.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import f_oneway
import string
import matplotlib.ticker as mticker

# Create the new dataframe
data = {
    'a': [533330.45, 488550.63, 433338.27, 63335.47, 613338.78, 658029.33, 423276.31, 486775.20, 465138.94],
    'b': [103336.32, 93337.35, 93336.19, 233305.77, 283336.51, 152925.39, 331697.83, 299425.14, 274197.59],
    'c': [252919.78, 193330.26, 205333.11, 134333.34, 123332.30, 123336.85, 153332.33, 195452.55, 167399.20],
    'd': [44133.07, 43963.73, 35705.67, 135454.11, 137261.84, 152548.42, 233336.48, 213787.00, 257127.76],
    'e': [38697.59, 43337.02, 33331.53, 34333.28, 63332.51, 43338.29, 34333.43, 42793.33, 63334.84]
}

df = pd.DataFrame(data)

color_palette = {
    'a': 'green',
    'b': 'lightgreen',
    'c': 'yellow',
    'd': 'lightyellow',
    'e': 'red'
}

# Create a figure with desired dimensions
plt.figure(figsize=(14, 10))

# Create the box plot using Seaborn with color code
ax = sns.boxplot(data=df, palette=color_palette)

# Add stripplot to display all data points
#sns.stripplot(data=df, color="black", ax=ax, jitter=True, dodge=True, alpha=0.5)
sns.stripplot(data=df, palette='dark:black', ax=ax, jitter=True, dodge=True, alpha=0.5) # removes warning: `FutureWarning: Setting a gradient palette using color= is deprecated and will be removed in version 0.13. Set `palette='dark:black'` for same effect.`

# Add faint horizontal grid lines
ax.yaxis.grid(True, linestyle='dotted', linewidth=0.5, color='lightgray')



# Increase font size of y- and x-axis labels
ax.set_ylabel('values', fontsize=22)
#ax.yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=True))
#ax.yaxis.get_offset_text().set_fontsize(12)  # Set font size for the offset (e.g., '1e6')
grab_major_locations = ax.yaxis.get_majorticklocs()
ax.yaxis.set_major_locator(mticker.FixedLocator(grab_major_locations)) # added this based on https://stackoverflow.com/a/63755285/8508004 or else got `UserWarning: FixedFormatter should only be used together with FixedLocator`
ax.set_yticklabels([f"{x:.1E}" for x in grab_major_locations],fontsize=14)


# Get the p-value from ANOVA test
_, p_value = f_oneway(df['a'], df['b'], df['c'], df['d'], df['e'])

# Add ANOVA test result above the plot
significance = "*" if p_value < 0.05 else "ns"
y_position = df.values.max() + 50000  # Adjust y position
#ax.annotate(f'ANOVA p-value: {p_value:.4f} {significance}', xy=(0.5, y_position), fontsize=12, ha='center')
ax.text(0.5, y_position, f'ANOVA p-value: {p_value:.4f} {significance}',fontsize=16, ha='center')

# Set the x-axis tick labels
ax.set_xticklabels(['a', 'b', 'c', 'd', 'e'], fontsize=20)

# plt.tight_layout()

# Save the figure
# plt.savefig('box_plot_with_anova.png', dpi=360)

plt.show()
Wayne
  • 6,607
  • 8
  • 36
  • 93