0

Thanks to @Trenton McKinney, I know how to how to plot daily data against a 24 hour axis (00:00 - 23:59:59) in this question. In the following dataset, when I apply the custom sort ( custom_date_sorter function ), the plot does not order the x-axis as in custom_date_sorter function. I want the x-axis o start at 12:00:00 to 00:00:00 and end at 11:59:59. :

import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as tkr
from datetime import time as dtime
random.seed(0)

df = pd.DataFrame({'DATE_TIME': pd.date_range('2022-11-01', '2022-11-06 23:00:00', freq='20min'),
                   'ID': [random.randrange(1, 3) for n in range(430)]})
df['VALUE1'] = [random.uniform(110, 160) for n in range(430)]
df['VALUE2'] = [random.uniform(50, 80) for n in range(430)]
df['INSPECTION'] = df['DATE_TIME'].dt.day
# df['INSPECTION'] = df['INSPECTION'].replace(6, 1)
# df['INSPECTION'] = df['INSPECTION'].replace(3, 1)

df['MODE'] = np.select([df['INSPECTION'] == 1, df['INSPECTION'].isin([2, 3])], ['A', 'B'], 'C')
df['TIME'] = df['DATE_TIME'].dt.time
df['TIME'] = df['TIME'].astype('str')

df['TIMEINTERVAL'] = df.DATE_TIME.diff().astype('timedelta64[m]')
df['TIMEINTERVAL'] = df['TIMEINTERVAL'].fillna(0)


def to_day_period(s):
    bins = ['0', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00']
    labels = ['Nighttime', 'Daytime', 'Daytime', 'Nighttime', 'Nighttime']

    return pd.cut(
        pd.to_timedelta(s),
        bins=list(map(pd.Timedelta, bins)),
        labels=labels, right=False, ordered=False
    )


df['TIME_OF_DAY'] = to_day_period(df['TIME'])

# ++++++++++++++++++++++++++++++++ sns plot ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
df = df[(df['ID'] == 1) & (df['INSPECTION'].isin([1, 2, 3]))]

# +++++++++++++ CUSTOM SORTING +++++++++++++ 
def custom_date_sorter(s):
    s = pd.to_datetime(s)
    return np.argsort(np.lexsort([s.sub(pd.Timedelta('12h')).dt.time,
                                  s.dt.normalize()]))

df = df.sort_values(by='DATE_TIME', key=custom_date_sorter)
# +++++++++++++ ++++++++++++ +++++++++++++ 

sns.set_style('darkgrid')
sns.set(rc={'figure.figsize':(14,8)})
# add a column for total seconds
df['total_seconds'] = df.DATE_TIME.apply(
    lambda row: (row - row.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds())

# iterate through each ID
for id_ in sorted(df.ID.unique()):
    # select the data for the given id_
    data = df[df.ID.eq(id_)]

    # create a figure
    fig = plt.figure(figsize=(10, 6))

    # plot the data
    ax = sns.lineplot(data=data, x='total_seconds', y='VALUE1', hue='INSPECTION', palette='viridis', legend='full')

    # set the title and labels
    ax.set(title=f'ID: {id_}', xlabel='TIME', ylabel='VALUE1')

    # move the legend
    sns.move_legend(ax, bbox_to_anchor=(1.0, 0.5), loc='center left', frameon=False)

    # constrain the x-axis limits to the number of seconds in a day
    ax.set_xlim(0, 24 * 3600)

    # create labels for every hour in the day, and add an extra spot for the last tick position
    hours = [dtime(i).strftime('%H:%M') for i in range(24)] + ['']

    # create xticks at every hour
    ax.xaxis.set_major_locator(tkr.MultipleLocator(3600))

    # set the ticks and corresponding labels; cut off extra starting and ending ticks to match labels
    ax.set_xticks(ticks=ax.get_xticks()[1:-1], labels=hours, rotation=90)

    # remove spines
    ax.spines[['top', 'right']].set_visible(False)
plt.show()

How can I implement custom sorting in plotting daily data against a 24 hour axis which is given above so that x-axis start at 12:00:00 and end at 11:59:59. Please note that data should be reflected correctly, I mean shifting the time without shifting data simultaneously would be not helpful.

dsapprentice
  • 114
  • 12

1 Answers1

0

Instead of DATE_TIME, I should have used TIME when I use the custom sorting! So, adding this line:

def custom_time_sorter(s):
    s = pd.to_datetime(s)
    return np.argsort(np.lexsort([s.sub(pd.Timedelta('12h')).dt.time,
                                  s.dt.normalize()]))

df = df.sort_values(by='TIME', key=custom_time_sorter)

after

df = df[(df['ID'] == 1) & (df['INSPECTION'].isin([1, 2, 3]))]

solves the issue.

dsapprentice
  • 114
  • 12