I created 2 dataframes called "ee" and "dd" and tried to find the correlation between them using corrwith() ,i can't figure out why am i getting NaN value in the result.
import pandas as pd
# DataFrame dd values
dd_data = {
'TOT_SALES': [7.9, 6.8, 8.8, 3.9, 24.2, 4.9, 83.0, 69.8, 44.7, 35.4, 71.2, 61.4,
53.8],
'nCustomers': [2, 2, 1, 1, 4, 2, 16, 12, 11, 9, 12, 9, 9],
'nTxnPerCust': [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0625, 1.0000,
1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
'nChipsPerTxn': [1.500000, 1.500000, 2.000000, 1.000000, 1.750000, 1.000000, 1.562500, 1.666667, 1.454545, 1.222222, 1.666667, 1.666667, 1.777778],
'avgPricePerUnit': [2.633333, 2.266667, 4.400000, 3.900000, 3.457143, 2.450000, 3.320000, 3.490000, 2.793750, 3.218182, 3.560000, 4.093333, 3.362500]
}
dd_index = pd.to_datetime(['1801', '1802', '1803', '1804', '1805', '1806', '1807', '1808', '1809', '1810', '1811', '1812', '1901'])
dd = pd.DataFrame(dd_data, index=dd_index)
# DataFrame ee values
ee_data = {
'TOT_SALES': [14.3, 8.7, 15.7, 16.6, 17.2, 7.6, 38.8, 41.9, 59.6, 54.3, 49.2, 51.6, 60.2],
'nCustomers': [3, 2, 3, 2, 4, 1, 9, 10, 12, 11, 9, 9, 15],
'nTxnPerCust': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
'nChipsPerTxn': [1.333333, 1.0, 1.666667, 2.0, 1.25, 2.0, 1.555556, 1.3, 1.5, 1.454545, 1.555556, 1.444444, 1.333333],
'avgPricePerUnit': [3.575000, 4.350000, 3.140000, 4.150000, 3.440000, 3.800000, 2.771429, 3.223077, 3.311111, 3.393750, 3.514286, 3.969231, 3.010000]
}
ee_index = pd.to_datetime(['1801', '1802', '1803', '1804', '1805', '1806', '1807', '1808', '1809', '1810', '1811', '1812', '1901'])
ee = pd.DataFrame(ee_data, index=ee_index)
# Correlation calculation
correlation = dd.corrwith(ee, axis=0)
print(correlation)
THE RESULT COMING
TOT_SALES 0.773883
nCustomers 0.811135
nTxnPerCust NaN
nChipsPerTxn -0.542081
avgPricePerUnit -0.235673
dtype: float64