I am trying to demonstrate what is my problem. I really do not understand, why PyNative
<class 'datetime.datetime'>
object is replaced with Pandas
custom object <class 'pandas._libs.tslibs.timestamps.Timestamp'>
.
import typing
from dateutil.parser import parse
def _normalize_users_dataframe(row: pd.core.series.Series) -> pd.core.series.Series:
last_seen: typing.Union[str, datetime.datetime] = row.get('last_seen', '')
if last_seen:
last_seen = parse(last_seen)
row['last_seen'] = last_seen
print(row['last_seen'][0].__class__.__mro__) # This shows me that, it is <class 'datetime.datetime'> object, which is PyNative datetime.
return row
def process_users_dataframe(filepath: str) -> pd.core.frame.DataFrame:
df: pd.core.frame.DataFrame = pd.read_csv(filepath, sep='\t')
df.rename(columns=mapping, inplace=True)
df.replace({np.nan: None}, inplace=True)
df = df.apply(_normalize_users_dataframe, axis=1)
print(row['last_seen'][0].__class__.__mro__) # This shows me that, it is <class 'pandas._libs.tslibs.timestamps.Timestamp'>, which is `Pandas` specific object.
return df
def main() -> None:
process_users_dataframe('<dir>')
Inside normalize_users_dataframe()
function, when I am trying to print
last_seen
column series, it shows me that dtype
is <class 'datetime.datetime'>
, which is fine, but after run apply()
method on DataFrame
which returns new DataFrame
object,last_seen
dtype
became <class 'pandas._libs.tslibs.timestamps.Timestamp'>
.
How this happens ? Maybe deep implementation detail ?