I want to drop rows before current week from my dataframe. The intended code is not working though, as the single digit weeks are still showing up. Is there a better way?
import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta
data = {
"Year": [2019, 2020, 2020, 2020, 2020, 2020, 2020],
"Week": [40, 8, 9, 10, 11, 12, 13]
}
df = pd.DataFrame(data)
# Current YearWeek
year_week = datetime.now().strftime("%Y/W%V")
print(year_week)
df["Year/Week"] = pd.to_datetime(
(df["Year"].astype(str) + "/W" + df["Week"].astype(str)),
format="%Y/W%V",
errors="ignore")
# Drop rows that have Year-Week value less than current Year-Week
df["Exclude Rows"] = np.where(
pd.to_datetime(
(df["Year"].astype(str) + "/W" + df["Week"].astype(str)),
format="%Y/W%V",
errors="ignore",
) < year_week, "Yes", "No")
# Drop rows
df.drop(df.loc[df["Exclude Rows"] == "Yes"].index, inplace=True)
print(df)
The output I am getting:
Year Week Year/Week Exclude Rows
1 2020 8 2020/W8 No
2 2020 9 2020/W9 No
5 2020 12 2020/W12 No
6 2020 13 2020/W13 No