Can someone help me with this function to make dummies:
def make_dummies(df):
# Create dummies for all hours of the day
hours = pd.get_dummies(df.index.hour, prefix='hour')
# Create columns for hour, day of week, weekend, and month
df['hour'] = df.index.strftime('%H')
df['day_of_week'] = df.index.dayofweek
df['weekend'] = np.where(df['day_of_week'].isin([5,6]), 1, 0)
df['month'] = df.index.month
# Create dummies for hours of the day
hour_dummies = pd.get_dummies(df['hour'], prefix='hour')
# Create dummies for all days of the week
day_mapping = {0: 'monday', 1: 'tuesday', 2: 'wednesday', 3: 'thursday', 4: 'friday', 5: 'saturday', 6: 'sunday'}
all_days = pd.Categorical(df['day_of_week'].map(day_mapping), categories=day_mapping.values())
day_dummies = pd.get_dummies(all_days)
# Create dummies for all months of the year
month_mapping = {1: 'jan', 2: 'feb', 3: 'mar', 4: 'apr', 5: 'may', 6: 'jun', 7: 'jul',
8: 'aug', 9: 'sep', 10: 'oct', 11: 'nov', 12: 'dec'}
all_months = pd.Categorical(df['month'].map(month_mapping), categories=month_mapping.values())
month_dummies = pd.get_dummies(all_months)
# Merge all dummies with original DataFrame
df = pd.concat([df, hours, hour_dummies, day_dummies, month_dummies], axis=1)
# Drop redundant columns
df = df.drop(['hour', 'day_of_week', 'month'], axis=1)
return df
On a small dataset like this:
import pandas as pd
import numpy as np
data = {"temp":[53.13,52.93,52.56,51.58,47.57],
"Date":["2023-04-07 15:00:00-05:00","2023-04-07 16:00:00-05:00","2023-04-07 17:00:00-05:00","2023-04-07 18:00:00-05:00","2023-04-07 19:00:00-05:00"]
}
df = pd.DataFrame(data).set_index("Date")
# Converting the index as date
df.index = pd.to_datetime(df.index)
df = make_dummies(df)
print(df)
This wont merge the data correctly. I apologize for the screenshot but the function is just stacking dummy variables beneath where what I was hoping for is ALL dummy variables would be added to the df and not stacked beneath. Hopefully this makes sense, was hoping to make a function that creates all dummy variables for each hour, month, and day type.