EDIT: Another version that includes months:
import re
import pandas as pd
units = ("sec", "min", "hour", "day", "week", "month", "year")
re_offset = re.compile("|".join(r"\d+\s*" + f"{unit}s?" for unit in units))
re_num_unit = re.compile(r"(\d+)\s*(\w+)")
def offset(s):
keys = {
"sec": "seconds", "secs": "seconds",
"min": "minutes", "mins": "minutes",
"hour": "hours", "day": "days", "week": "weeks",
"month": "months", "year": "years"
}
values = {}
for num_unit in re_offset.findall(s):
num_str, unit = re_num_unit.match(num_unit).groups()
unit = keys.get(unit, unit)
values[unit] = int(num_str)
return pd.DateOffset(**values)
df["DATE"] = (dt.datetime.now() - df.AGE.apply(offset)).dt.strftime("%Y-%m-%d %H:%M")
or shorter in case the strings behave really well:
def offset(lst):
keys = {
"sec": "seconds", "secs": "seconds",
"min": "minutes", "mins": "minutes",
"hour": "hours", "day": "days", "week": "weeks",
"month": "months", "year": "years"
}
return pd.DateOffset(
**{keys.get(unit, unit): int(num_str) for num_str, unit in lst}
)
df["DATE"] = (dt.datetime.now()
- df.AGE.str.findall(r"(\d+)\s*(\w+)")
.map(offset)).dt.strftime("%Y-%m-%d %H:%M")
Old version:
import re
import datetime as dt
units = ("sec", "min", "hour", "day", "week", "year")
re_timedelta = re.compile("|".join(r"\d+\s*" + f"{unit}s?" for unit in units))
re_num_unit = re.compile(r"(\d+)\s*(\w+)")
def ago(s, now):
keys = {
"sec": "seconds", "secs": "seconds",
"min": "minutes", "mins": "minutes",
"hour": "hours", "day": "days", "week": "weeks", "year": "years"
}
values = {"seconds": 0, "minutes": 0, "hours": 0, "days": 0, "weeks": 0}
for num_unit in re_timedelta.findall(s):
num_str, unit = re_num_unit.match(num_unit).groups()
unit = keys.get(unit, unit)
if unit == "years":
years = int(num_str)
if now.month == 2 and now.day == 29 and years%4:
ago_date = dt.date(now.year - years, 3, 1)
else:
ago_date = dt.date(now.year - years, now.month, now.day)
values["days"] += (now.date() - ago_date).days
else:
values[unit] += int(num_str)
return (now - dt.timedelta(**values)).strftime("%Y-%m-%d %H:%M")
With that preparation you can do (df
your dataframe):
now = dt.datetime.now()
df["DATE"] = df.AGE.apply(ago, args=[now])
Result:
AGE DATE
0 1 years 2 days ago 2020-07-29 17:51
1 3 hours 4 mins ago 2021-07-31 14:47
2 5 mins 6 secs ago 2021-07-31 17:46