I have a dataframe with 5 columns and 100 values. I am trying to do a basic descriptive analysis on the data before I go further. I am doing this by creating a function where the different descriptive information is found and stored in a new dataframe, but I am getting an element error.
import numpy as np
import pandas as pd
from IPython.display import display
df = pd.read_excel('cmc_data.xlsx', engine='openpyxl')
print(df)
def detailed_analysis(data, pred = None):
obs = data.shape[0]
types = data.dtypes
counts = data.apply(lambda x: x.count())
uniques = data.apply(lambda x: [x.unique])
nulls = data.apply(lambda x: x.isnull().sum())
distincts = data.apply(lambda x: x.unique().shape[0])
missing_ratio = (nulls / obs)*100
skewness = data.skew()
kurtosis = data.kurt()
print('Data shape:', data.shape)
if pred is None:
cols = ['types', 'counts', 'nulls', 'distincts', 'missing ratio', 'uniques', 'skewness', 'kurtosis']
details = pd.concat([types, counts, nulls, distincts, missing_ratio, uniques, skewness, kurtosis], axis=1)
else:
corr = data.corr()[pred]
details = pd.concat([types, counts, nulls, distincts, missing_ratio, uniques, skewness, kurtosis, corr], axis=1
, sort=False)
corr_col = 'corr' + pred
cols = ['types', 'counts', 'nulls', 'distincts', 'missing ratio', 'uniques', 'skewness', 'kurtosis', corr_col]
details.columns = cols
dtypes = details.types.value_counts()
print('____________________________\nData types:\n', dtypes)
print('____________________________')
return details
details = detailed_analysis(df)
display(details)
My error:
Traceback (most recent call last):
File "C:/Users/Ejer/PycharmProjects/pythonProject/CMC_3.py", line 38, in <module>
details = detailed_analysis(df)
File "C:/Users/Ejer/PycharmProjects/pythonProject/CMC_3.py", line 32, in detailed_analysis
details.columns = cols
File "C:\Users\Ejer\anaconda3\envs\pythonProject\lib\site-packages\pandas\core\generic.py", line 5152, in __setattr__
return object.__setattr__(self, name, value)
File "pandas\_libs\properties.pyx", line 66, in pandas._libs.properties.AxisProperty.__set__
File "C:\Users\Ejer\anaconda3\envs\pythonProject\lib\site-packages\pandas\core\generic.py", line 564, in _set_axis
self._mgr.set_axis(axis, labels)
File "C:\Users\Ejer\anaconda3\envs\pythonProject\lib\site-packages\pandas\core\internals\managers.py", line 226, in set_axis
raise ValueError(
ValueError: Length mismatch: Expected axis has 12 elements, new values have 8 elements
Process finished with exit code 1
ValueError: Length mismatch: Expected axis has 12 elements, new values have 8 elements