I think the reason you're getting this error is because the data_df
is an empty dataframe due to no rows satisfy the condition data_df['hour_local'].isin(target_hours)
, causing all hour_shift
column values to be NaT
, making all rows to be dropped at data_df = data_df.dropna(subset=['hour_shift'])
. You can test this by using the sample data that has hour_local
values that satisfy the condition vs that doesn't
Satisfy condition:
from datetime import datetime
from datetime import timedelta
import time
import pandas as pd
data_df = pd.DataFrame({'local_time': [datetime.strptime("08:30:00",'%H:%M:%S'), datetime.strptime("08:24:00",'%H:%M:%S')], 'product_name': ['A', 'B']})
delta = timedelta(minutes=5)
# Start time
start_time = datetime.strptime("08:20:00",'%H:%M:%S')
cur_time = start_time
target_hours = []
while cur_time.date() <= start_time.date():
target_hours.append(cur_time.time())
cur_time += delta
data_df['hour_local'] = pd.to_datetime(data_df["local_time"].astype(str)).dt.time
data_df = data_df.drop(columns=['hour_shift'], errors='ignore')
data_df.loc[data_df['hour_local'].isin(target_hours),'hour_shift'] = data_df['local_time']
data_df = data_df.sort_values(by=['local_time'])
data_df['hour_shift'] = data_df['hour_shift'].ffill()
data_df = data_df.dropna(subset=['hour_shift'])
# This will print dataframe with one row
print(data_df)
data_df['id_cont'] = data_df.apply(lambda row:row['product_name']+'- '+row['hour_shift'].strftime('%Y-%m-%d %H:%M:%S'),axis=1)
print(data_df)
Not satisfy condition:
from datetime import datetime
from datetime import timedelta
import time
import pandas as pd
# NOTE: no data satisfy the below condition
data_df = pd.DataFrame({'local_time': [datetime.strptime("08:31:00",'%H:%M:%S'), datetime.strptime("08:24:00",'%H:%M:%S')], 'product_name': ['A', 'B']})
delta = timedelta(minutes=5)
# Start time
start_time = datetime.strptime("08:20:00",'%H:%M:%S')
cur_time = start_time
target_hours = []
while cur_time.date() <= start_time.date():
target_hours.append(cur_time.time())
cur_time += delta
data_df['hour_local'] = pd.to_datetime(data_df["local_time"].astype(str)).dt.time
data_df = data_df.drop(columns=['hour_shift'], errors='ignore')
data_df.loc[data_df['hour_local'].isin(target_hours),'hour_shift'] = data_df['local_time']
data_df = data_df.sort_values(by=['local_time'])
data_df['hour_shift'] = data_df['hour_shift'].ffill()
data_df = data_df.dropna(subset=['hour_shift'])
# This will print empty dataframe
print(data_df)
data_df['id_cont'] = data_df.apply(lambda row:row['product_name']+'- '+row['hour_shift'].strftime('%Y-%m-%d %H:%M:%S'),axis=1)
One way I think you can avoid this error is the add a check to only run the apply line if the dataframe is not empty
if len(data_df):
data_df['id_cont'] = data_df.apply(lambda row:row['product_name']+'- '+row['hour_shift'].strftime('%Y-%m-%d %H:%M:%S'),axis=1)
print(data_df)