I have the following code that is working for cross joining two Pandas dataframes. Is this the best way to do it? Can this be done in a faster and efficient way?
# Cross join in Pandas
import pandas as pd
import numpy as np
d1 = {'Year': [2019, 2019, 2019, 2019, 2019, 2019],
'Week': [1, 2, 3, 5, 5, 6],
'Part': ['A', 'A', 'A', 'A', 'B', 'B'],
'Plant': [100, 100, 200, 200, 100, 100],
'Static': [20, 20, 20, 20, 40, 40],
'Value': [np.nan, 10, np.nan, 50, 30, np.nan]}
d2 = {'Year': [2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019],
'Week': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
df1 = pd.DataFrame(d1)
df2 = pd.DataFrame(d2)
df3 = (df2.assign(Key=1)
.merge(pd.DataFrame({'Part': df1['Part'].unique(), 'Key': 1}), on='Key')
.drop('Key', 1)
)
df4 = (df3.assign(Key=1)
.merge(pd.DataFrame({'Plant': df1['Plant'].unique(), 'Key': 1}), on='Key')
.drop('Key', 1)
)
df5 = df4.merge(df1, on=['Year', 'Week', 'Part', 'Plant'], how='left')
df5 = df5.sort_values(by=['Part', 'Plant', 'Year', 'Week'])
df5.reset_index()
print(df5)