You have mentioned wanting to plot the evolution of the gases with time, but in the code sample you have given, you use wind_dir
as the x variable. In this answer, I disregard this and use time as the x variable instead.
Looking at your code, I understand that you are wanting to create two different figures made of small multiples, one for gas concentrations and one for gas fluxes. For this kind of plot, I recommend using pandas or seaborn so that you can plot all the variables contained in a pandas dataframe at once. Here I share an example using pandas.
Because you are wanting to plot different measurements of the same substances, I recommend creating a table that lists the names of the variables and units associated with each unique substance (see df_subs
below). I create one using code to extract the units and share it here, but this is easier to do with spreadsheet software.
Having a table like that makes it easier to create a plotting function that selects the group of variables you want to plot from the ec_top
dataframe. You can then use the pandas plotting function like this: df.plot(subplots=True)
.
Most of the code shown below is to create some sample data based on your code to make it possible for you to recreate exactly what I show here and for anyone else who would like to give this a try. So if you want to use this solution, you can skip most of it, all you would need to do is create the substances table your way and then adjust the plotting function to fit your preferences.
Create sample dataset
import io # from Python v 3.8.5
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.pyplot as plt # v 3.3.2
import matplotlib.dates as mdates
pd.set_option("display.max_columns", 6)
rng = np.random.default_rng(seed=1) # random number generator
# Copy paste variable names from sample given in question
var_strings = '''
"M 33(ppbv)"
"M 39(ncps)"
"M 45(ppbv)"
"M 59(ppbv)"
"M 69(ppbv)"
"M 71(ppbv)"
"M 81(ppbv)"
"M 137(ppbv)"
"M 87(ppbv)"
"M 47(ppbv)"
"M 61(ppbv)"
"Flux_M 33"
"Flux_M 45"
"Flux_M 59"
"Flux_M 69"
"Flux_M 71"
"Flux_M 81"
"Flux_M 137"
"Flux_M 87"
"Flux_M 47"
"Flux_M 61"
'''
variables = pd.read_csv(io.StringIO(var_strings), header=None, names=['var'])['var']
# Create datetime variable
nperiods = 60
time = pd.date_range('2021-01-15 12:00', periods=nperiods, freq='min')
# Create range of numbers to compute sine waves for fake data
x = np.linspace(0, 2*np.pi, nperiods)
# Create dataframe containing gas concentrations
var_conc = np.array([var for var in variables if '(' in var])
conc_sine_wave = np.reshape(np.sin(x), (len(x), 1))
loc = rng.exponential(scale=10, size=var_conc.size)
scale = loc/10
var_conc_noise = rng.normal(loc, scale, size=(x.size, var_conc.size))
data_conc = conc_sine_wave + var_conc_noise + 2
df_conc = pd.DataFrame(data_conc, index=time, columns=var_conc)
# Create dataframe containing gas fluxes
var_flux = np.array([var for var in variables if 'Flux' in var])
flux_sine_wave = np.reshape(np.sin(x)**2, (len(x), 1))
loc = rng.exponential(scale=10, size=var_flux.size)
scale = loc/10
var_flux_noise = rng.normal(loc, scale, size=(x.size, var_flux.size))
data_flux = flux_sine_wave + var_flux_noise + 1
df_flux = pd.DataFrame(data_flux, index=time, columns=var_flux)
# Merge concentrations and fluxes into single dataframe
ec_top = pd.merge(left=df_conc, right=df_flux, how='outer',
left_index=True, right_index=True)
ec_top.head()
# M 33(ppbv) M 39(ncps) M 45(ppbv) ... Flux_M 87 Flux_M 47 Flux_M 61
# 2021-01-15 12:00:00 11.940054 5.034281 53.162767 ... 8.079255 2.402073 31.383911
# 2021-01-15 12:01:00 13.916828 4.354558 45.706391 ... 10.229084 2.494649 26.816754
# 2021-01-15 12:02:00 13.635604 5.500438 53.202743 ... 12.772899 2.441369 33.219213
# 2021-01-15 12:03:00 13.146823 5.409585 53.346907 ... 11.373669 2.817323 33.409331
# 2021-01-15 12:04:00 14.124752 5.491555 49.455010 ... 11.827497 2.939942 28.639749
Create substances table containing variable names and units
The substances are shown in the figure subplots in the order that they are listed here. Information from this table is used to create the labels and titles of the subplots.
# Copy paste substance codes and names from sample given in question
subs_strings = """
M33 "Methanol"
M39 "Water cluster"
M45 "Acetaldehyde"
M47 "Unknown"
M59 "Acetone"
M61 "Unknown"
M69 "Isoprene"
M71 "Methyl vinyl, ketone and methacrolein"
M81 "Fragment of monoterpenes"
M87 "Methylbutenols"
M137 "Monoterpenes"
"""
# Create dataframe containing substance codes and names
df_subs = pd.read_csv(io.StringIO(subs_strings), header=None,
names=['subs', 'subs_name'], index_col=False,
delim_whitespace=True)
# Add units and variable names matching the substance codes
# Do this for gas concentrations
for var in var_conc:
var_subs, var_unit_raw = var.split('(')
var_subs_num = var_subs.lstrip('M ')
var_unit = var_unit_raw.rstrip(')')
for i, subs in enumerate(df_subs['subs']):
if var_subs_num == subs.lstrip('M'):
df_subs.loc[i, 'conc_unit'] = var_unit
df_subs.loc[i, 'conc_var'] = var
# Do this for gas fluxes
for var in var_flux:
var_subs_num = var.split('M')[1].lstrip()
var_unit = rng.choice(['unit_a', 'unit_b', 'unit_c'])
for i, subs in enumerate(df_subs['subs']):
if var_subs_num == subs.lstrip('M'):
df_subs.loc[i, 'flux_unit'] = var_unit
df_subs.loc[i, 'flux_var'] = var
df_subs
# subs subs_name conc_unit conc_var flux_unit flux_var
# 0 M33 Methanol ppbv M 33(ppbv) unit_c Flux_M 33
# 1 M39 Water cluster ncps M 39(ncps) NaN NaN
# 2 M45 Acetaldehyde ppbv M 45(ppbv) unit_a Flux_M 45
# 3 M47 Unknown ppbv M 47(ppbv) unit_b Flux_M 47
# 4 M59 Acetone ppbv M 59(ppbv) unit_a Flux_M 59
# 5 M61 Unknown ppbv M 61(ppbv) unit_c Flux_M 61
# 6 M69 Isoprene ppbv M 69(ppbv) unit_a Flux_M 69
# 7 M71 Methyl vinyl, ketone and methacrolein ppbv M 71(ppbv) unit_a Flux_M 71
# 8 M81 Fragment of monoterpenes ppbv M 81(ppbv) unit_c Flux_M 81
# 9 M87 Methylbutenols ppbv M 87(ppbv) unit_c Flux_M 87
# 10 M137 Monoterpenes ppbv M 137(ppbv) unit_b Flux_M 137
Create plotting function based on pandas
Here is one way of creating a plotting function that lets you select the variables for the plot with the graph_type
argument. It works by selecting the relevant variables from the substances table using the if/elif
statement. This and the ec_top[variables].plot(...)
function are all that is really necessary to create the plot, the rest is all for formatting the figure. The variables are plotted in the order of the variables
list. I draw only two columns of subplots because of width constraints here (max 10 inches width to get a sharp image on Stack Overflow).
# Create plotting function that creates a single figure showing all
# variables of the chosen type
def plot_grid(graph_type):
# Set the type of variables and units to fetch in df_subs: using if
# statements for the strings lets you use a variety of strings
if 'conc' in graph_type:
var_type = 'conc_var'
unit_type = 'conc_unit'
elif 'flux' in graph_type:
var_type = 'flux_var'
unit_type = 'flux_unit'
else:
return f'Error: "{graph_type}" is not a valid string, \
it must contain "conc" or "flux".'
# Create list of variables to plot depending on type
variables = df_subs[var_type].dropna()
# Set parameters for figure dimensions
nvar = variables.size
cols = 2
rows = int(np.ceil(nvar/cols))
width = 10/cols
height = 3
# Draw grid of line plots: note that x_compat is used to override the
# default x-axis time labels, remove it if you do not want to use custom
# tick locators and formatters like the ones created in the loop below
grid = ec_top[variables].plot(subplots=True, figsize=(cols*width, rows*height),
layout=(rows, cols), marker='.', linestyle='',
xlabel='Time', x_compat=True)
# The code in the following loop is optional formatting based on my
# preferences, if you remove it the plot should still look ok but with
# fewer informative labels and the legends may not all be in the same place
# Loop through the subplots to edit format, including creating labels and
# titles based on the information in the substances table (df_subs):
for ax in grid.flatten()[:nvar]:
# Edit tick locations and format
plt.setp(ax.get_xticklabels(which='both'), fontsize=8, rotation=0, ha='center')
loc = mdates.AutoDateLocator()
ax.xaxis.set_major_locator(loc)
ax.set_xticks([], minor=True)
fmt = mdates.ConciseDateFormatter(loc, show_offset=False)
ax.xaxis.set_major_formatter(fmt)
# Edit legend
handle, (var_name,) = ax.get_legend_handles_labels()
subs = df_subs[df_subs[var_type] == var_name]['subs']
ax.legend(handle, subs, loc='upper right')
# Add y label
var_unit, = df_subs[df_subs[var_type] == var_name][unit_type]
ylabel_type = f'{"Concentration" if "conc" in graph_type else "Flux"}'
ax.set_ylabel(f'{ylabel_type} [{var_unit}]')
# Add title
subs_name, = df_subs[df_subs[var_type] == var_name]['subs_name']
ax.set_title(subs_name)
# Edit figure format
fig = plt.gcf()
date = df_conc.index[0].strftime('%b %d %Y')
title_type = f'{"concentrations" if "conc" in graph_type else "fluxes"}'
fig.suptitle(f'BVOCs {title_type} on {date} from 12:00 to 13:00',
y=0.93, fontsize=15);
fig.subplots_adjust(wspace=0.3, hspace=0.4)
plt.show()
plot_grid('conc') # any kind of string works if it contains 'conc' or 'flux'

plot_grid('graph fluxes')

Documentation: matplotlib date ticks