0

I am able to plot a seaborn barplot and lineplot on the the same plot with the axes aligned at 0 and the same number of tickers. However, I cannot cut off any empty spaces dynamically. The code for the plot can be found below. What I would like to end up with is the same graph I have attached, but with the tickers ending at -49.3 and -13.63 (to get rid of any blank spaces). I do not want to hard code it, it needs to be dynamic so that it is able to adjust for any input data. Also, both axes still need to align at 0 and have the same number of tickers.

import numpy as np
import pandas as pd
import math
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as md
import matplotlib.ticker as ticker
from datetime import datetime, timedelta

# create dataframes that will be used
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(1000), freq='D')

np.random.seed(seed=1111)
data_a = np.random.randint(-10, high=20, size=len(days))
data_b = np.random.randint(-30, high=70, size=len(days))

a = pd.DataFrame({'date': days, 'a': data_a})
a = a.set_index('date')

b = pd.DataFrame({'date': days, 'b': data_b})
b = b.set_index('date')

# result dataframe which will be used for the plotting 
result = pd.concat([a, b], axis=1)

# make sure only the dates are being used 
result = result.reset_index()
result['date'] = result['date'].dt.date
result = result.set_index('date')

# set- up for the plot
matplotlib.rc_file_defaults()
ax1 = sns.set_style(style=None, rc=None)
fig, ax1 = plt.subplots(figsize=(12,6))
ax2 = ax1.twinx()

# bar plot
result_date = result.copy()
result_date = result_date.reset_index()

b_plot = sns.barplot(data = result_date, x=result_date.iloc[:, 0], y=result_date.iloc[:, 2], ax=ax1)

# pointplot
a_plot = sns.pointplot(data=result, x=result.index, y=result.iloc[:, 0], color="black", ax=ax2, markers = 'o', scale=0.4)

# set the x tickers to be those of the bar plot
ax1.set_xticks(np.arange(len(result_date)))
ax1.set_xticklabels(result_date.date.apply(lambda x: str(x.year)))
ax1.xaxis.set_major_locator(ticker.AutoLocator())


# to align the axes and make them start at 0
max1 = np.nanmax(np.abs(ax1.get_ybound())) # in case you have nan values
max2 = np.nanmax(np.abs(ax2.get_ybound()))
nticks = 7 #or other odd number
ax1.set_yticks(np.linspace(-max1, max1, nticks))
ax2.set_yticks(np.linspace(-max2, max2, nticks))

plot

EDIT: Here is another method I tried using answers from other posts, but it still does not achieve what I want:

import numpy as np
import pandas as pd
import math
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as md
import matplotlib.ticker as ticker
from datetime import datetime, timedelta

# create dataframes that will be used
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(1000), freq='D')

np.random.seed(seed=1111)
data_a = np.random.randint(-10, high=20, size=len(days))
data_b = np.random.randint(-30, high=70, size=len(days))

a = pd.DataFrame({'date': days, 'a': data_a})
a = a.set_index('date')

b = pd.DataFrame({'date': days, 'b': data_b})
b = b.set_index('date')

# result dataframe which will be used for the plotting 
result = pd.concat([a, b], axis=1)

# make sure only the dates are being used 
result = result.reset_index()
result['date'] = result['date'].dt.date
result = result.set_index('date')

# set- up for the plot
matplotlib.rc_file_defaults()
ax1 = sns.set_style(style=None, rc=None)
fig, ax1 = plt.subplots(figsize=(12,6))
ax2 = ax1.twinx()

# bar plot
result_date = result.copy()
result_date = result_date.reset_index()

b_plot = sns.barplot(data = result_date, x=result_date.iloc[:, 0], y=result_date.iloc[:, 2], ax=ax1)

# pointplot
a_plot = sns.pointplot(data=result, x=result.index, y=result.iloc[:, 0], color="black", ax=ax2, markers = 'o', scale=0.4)

# set the x tickers to be those of the bar plot
ax1.set_xticks(np.arange(len(result_date)))
ax1.set_xticklabels(result_date.date.apply(lambda x: str(x.year)))
ax1.xaxis.set_major_locator(ticker.AutoLocator())


ax1_ylims = ax1.axes.get_ylim()           # Find y-axis limits set by the plotter
ax1_yratio = ax1_ylims[0] / ax1_ylims[1]  # Calculate ratio of lowest limit to highest limit

ax2_ylims = ax2.axes.get_ylim()           # Find y-axis limits set by the plotter
ax2_yratio = ax2_ylims[0] / ax2_ylims[1]  # Calculate ratio of lowest limit to highest limit


# If the plot limits ratio of plot 1 is smaller than plot 2, the first data set has
# a wider range range than the second data set. Calculate a new low limit for the
# second data set to obtain a similar ratio to the first data set.
# Else, do it the other way around

if ax1_yratio < ax2_yratio: 
    ax2.set_ylim(bottom = ax2_ylims[1]*ax1_yratio)
    nticks = len(ax1.yaxis.get_ticklabels()) # number of ticks for the wider axis 
    ax2.set_yticks(np.linspace(ax2.get_ylim()[0], ax2.get_ylim()[-1], nticks))
    
    
else:
    ax1.set_ylim(bottom = ax1_ylims[1]*ax2_yratio)
    nticks = len(ax2.yaxis.get_ticklabels()) # number of ticks for the wider axis 
    ax1.set_yticks(np.linspace(ax1.get_ylim()[0], ax1.get_ylim()[-1], nticks))

plot 2

galaxy_d
  • 23
  • 1
  • 9
  • The discussions at your previous questions solve everything you need; you just need to combine things. You can e.g. use [this approach](https://stackoverflow.com/a/65824524/12046409) which sets the bottom limit, and apply that same approach also for the top limit. You can just keep everything else from your current solution, maybe with more ticks, as some ticks will be cut away by moving either the top or the bottom limit. Note that most people prefer "nice" ticks instead of the fractions that come out of `np.linspace`. – JohanC Mar 11 '22 at 16:21
  • Did you test out the library mentioned in [this answer](https://stackoverflow.com/a/64956389/12046409)? Didn't it work for you? – JohanC Mar 11 '22 at 16:23
  • @JohanC as per the discussions in my previous question, combining the solutions doesn't work - it gets rid of the blank spaces but won't keep the same number of tickers – galaxy_d Mar 11 '22 at 16:41
  • If you just keep `ax1.set_yticks(np.linspace(-max1, max1, nticks))` and `ax2.set_yticks(np.linspace(-max2, max2, nticks))` into the code, without changes, both y-axes will have the same ticks. Afterwards, changing the top or bottom limits will cut away a few of those ticks, but there still will be the same number. If you really want to, you could then count how many ticks stay visible and increase the `nticks` parameter. Note that just writing `"combining the solutions doesn't work"` without showing your exact code, isn't very helpful for people trying to help you. – JohanC Mar 11 '22 at 17:15
  • @JohanC As per my previous question (where I had also added an edit to show why combining doesn't work), I had added an edit to show what I mean – galaxy_d Mar 11 '22 at 17:32
  • @JohanC if there is an actual segment of code you would like to suggest, I would be happy to try it out. However, I have already gone through answers on other pages and tried changing/combining etc without getting what I am trying to achieve. Hence this question – galaxy_d Mar 11 '22 at 17:38

2 Answers2

1

Here is the solution I have come up with which works for datasets with different ranges. It also ensures the tickers have equal spaces between them. Please let me know if there is a way to make it more concise.

import numpy as np
import pandas as pd
import math
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as md
import matplotlib.ticker as ticker
from datetime import datetime, timedelta

# create dataframes that will be used
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(1000), freq='D')

np.random.seed(seed=1111)
data_a = np.random.randint(-9, high=30, size=len(days))
data_b = np.random.randint(-20, high=70, size=len(days))

a = pd.DataFrame({'date': days, 'a': data_a})
a = a.set_index('date')

b = pd.DataFrame({'date': days, 'b': data_b})
b = b.set_index('date')

# result dataframe which will be used for the plotting
result = pd.concat([a, b], axis=1)

# make sure only the dates are being used
result = result.reset_index()
result['date'] = result['date'].dt.date
result = result.set_index('date')

# set- up for the plot
matplotlib.rc_file_defaults()
sns.set_style(style=None, rc=None)
fig, ax1 = plt.subplots(figsize=(12,6))
ax2 = ax1.twinx()

# bar plot
result_date = result.reset_index()

b_plot = sns.barplot(data = result_date, x=result_date.iloc[:, 0], y=result_date.iloc[:, 2], ax=ax1)

# pointplot
a_plot = sns.pointplot(data=result, x=result.index, y=result.iloc[:, 0], color="black", ax=ax2, markers = 'o', scale=0.4)

# set the x tickers to be those of the bar plot
ax1.set_xticks(np.arange(len(result_date)))
ax1.set_xticklabels(result_date.date.apply(lambda x: str(x.year)))
ax1.xaxis.set_major_locator(ticker.AutoLocator())

# get the max and min values for both plot datasets 
max_value_1 = np.max(result_date.iloc[:, 2])
min_value_1 = np.min(result_date.iloc[:, 2])
max_value_2 = np.max(result.iloc[:, 0])
min_value_2 = np.min(result.iloc[:, 0])

# to align the axes and make them start at 0
max1 = np.nanmax(np.abs(ax1.get_ybound())) # in case you have nan values
max2 = np.nanmax(np.abs(ax2.get_ybound()))
nticks = 7 #or other odd number
ax1.set_yticks(np.linspace(-max1, max1, nticks))
ax2.set_yticks(np.linspace(-max2, max2, nticks))

# y axis value arrays going from -ve to +ve values
ax1_y_ticks = ax1.get_yticks() 
ax2_y_ticks = ax2.get_yticks()

# create empty lists for the indexes of the upper and lower elements of the two y-ranges
ax1_lower_elements = [] # pick last value (first ticker just below the lowest value in the range)
ax1_upper_elements = [] # pick first value (first ticker just above the highest value in the range)
ax2_lower_elements = [] # pick last value
ax2_upper_elements = [] # pick first value

for i in range(len(ax1_y_ticks)):
    if ax1_y_ticks[i] <= min_value_1:
        ax1_lower_elements.append(i)

    if ax1_y_ticks[i] >= max_value_1:
        ax1_upper_elements.append(i)

for i in range(len(ax2_y_ticks)):
    if ax2_y_ticks[i] <= min_value_2:
        ax2_lower_elements.append(i)

    if ax2_y_ticks[i] >= max_value_2:
        ax2_upper_elements.append(i)        

# get the indexs for the upper and lower limits of the y-axes
ax1_lower_ticker_element = ax1_lower_elements[-1]
ax1_upper_ticker_element = ax1_upper_elements[0]
ax2_lower_ticker_element = ax2_lower_elements[-1]
ax2_upper_ticker_element = ax2_upper_elements[0]


# determine which upper and lower indexes should be used
if ax1_lower_ticker_element <= ax2_lower_ticker_element: 
    lower_ticker_element = ax1_lower_ticker_element
else: 
    lower_ticker_element = ax2_lower_ticker_element

if ax1_upper_ticker_element <= ax2_upper_ticker_element: 
    upper_ticker_element = ax2_upper_ticker_element
else: 
    upper_ticker_element = ax1_upper_ticker_element
    
# the indexes to be used as a list
new_y_ticker_elements = []
for i in range (len(ax1_y_ticks)):
    if ((i >= lower_ticker_element) & (i <= upper_ticker_element)):
        new_y_ticker_elements.append(i)      
        
# setting y-axis for ax1 
ax1_rng = []
for i in range(len(new_y_ticker_elements)):
    ax1_rng.append(ax1_y_ticks[new_y_ticker_elements[i]])
ax1.set_yticks(ax1_rng)
ax1.set_ylim(bottom=ax1_rng[0], top=ax1_rng[-1])

# setting y-axis for ax2
ax2_rng = []
for i in range(len(new_y_ticker_elements)):
    ax2_rng.append(ax2_y_ticks[new_y_ticker_elements[i]])
ax2.set_yticks(ax2_rng)
ax2.set_ylim(bottom=ax2_rng[0], top=ax2_rng[-1])

Here is the final plot

enter image description here

Mr. T
  • 11,960
  • 10
  • 32
  • 54
galaxy_d
  • 23
  • 1
  • 9
0

What about:

import numpy as np
import pandas as pd
import math
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as md
import matplotlib.ticker as ticker
from datetime import datetime, timedelta

# create dataframes that will be used
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(1000), freq='D')

np.random.seed(seed=1111)
data_a = np.random.randint(-10, high=20, size=len(days))
data_b = np.random.randint(-30, high=70, size=len(days))

a = pd.DataFrame({'date': days, 'a': data_a})
a = a.set_index('date')

b = pd.DataFrame({'date': days, 'b': data_b})
b = b.set_index('date')

# result dataframe which will be used for the plotting
result = pd.concat([a, b], axis=1)

# make sure only the dates are being used
result = result.reset_index()
result['date'] = result['date'].dt.date
result = result.set_index('date')

# set- up for the plot
matplotlib.rc_file_defaults()
sns.set_style(style=None, rc=None)
fig, ax1 = plt.subplots(figsize=(12,6))
ax2 = ax1.twinx()

# bar plot
result_date = result.reset_index()

b_plot = sns.barplot(data = result_date, x=result_date.iloc[:, 0], y=result_date.iloc[:, 2], ax=ax1)

# pointplot
a_plot = sns.pointplot(data=result, x=result.index, y=result.iloc[:, 0], color="black", ax=ax2, markers = 'o', scale=0.4)

# set the x tickers to be those of the bar plot
ax1.set_xticks(np.arange(len(result_date)))
ax1.set_xticklabels(result_date.date.apply(lambda x: str(x.year)))
ax1.xaxis.set_major_locator(ticker.AutoLocator())

# get the original ylims (before setting the yticks)
ax1_ylims = ax1.get_ylim()
ax1_yratio = ax1_ylims[0] / ax1_ylims[1]  # Calculate ratio of lowest limit to highest limit
ax2_ylims = ax2.get_ylim()
ax2_yratio = ax2_ylims[0] / ax2_ylims[1]  # Calculate ratio of lowest limit to highest limit

# to align the axes and make them start at 0
max1 = np.nanmax(np.abs(ax1.get_ybound())) # in case you have nan values
max2 = np.nanmax(np.abs(ax2.get_ybound()))
nticks = 7 #or other odd number
ax1.set_yticks(np.linspace(-max1, max1, nticks))
ax2.set_yticks(np.linspace(-max2, max2, nticks))

# If the plot limits ratio of plot 1 is smaller than plot 2, the first data set has
# a wider range range than the second data set. Calculate a new low limit for the
# second data set to obtain a similar ratio to the first data set.
# Else, do it the other way around
if ax1_yratio < ax2_yratio:
    ax2.set_ylim(bottom=ax2_ylims[1] * ax1_yratio)
    ax1.set_ylim(ax1_ylims)
else:
    ax1.set_ylim(bottom=ax1_ylims[1] * ax2_yratio)
    ax2.set_ylim(ax2_ylims)
plt.show()

adjusting limits

JohanC
  • 71,591
  • 8
  • 33
  • 66
  • Thank you @JohanC ! I had tried this but in a different order (I had the aligning at 0 code before the getting the original limits code) which is why it hadn't worked. It makes sense why adding it after that, and before the if statement works :) thanks again – galaxy_d Mar 14 '22 at 09:13
  • @Mr.T Unfortunately it doesn't work in all cases. It worked for this specific dataset. I am trying to adjust the solution so that it works in all scenarios – galaxy_d Mar 14 '22 at 11:53