I am writing a program to determine the expectation value, expectation of the X^2 and E(X - X_avg)^2. I have written a program like so:
# program : expectation value
import csv
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
import seaborn as sns
import matplotlib.pyplot as plt
import logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
# Step 1: read csv
probabilityCSV = open('probability.csv')
df = pd.read_csv(probabilityCSV)
logging.debug(df['X'])
logging.debug(df['P'])
logging.debug(type(df['X']))
logging.debug(type(df['P']))
# Step 2: convert dataframe to ndarry
# https://stackoverflow.com/questions/13187778/convert-pandas-dataframe-to-numpy-array
X = df['X'].to_numpy()
p = df['P'].to_numpy()
logging.debug(f'X={X}')
logging.debug(f'p={p}')
# Step 3: calculate E(X)
# https://www.statology.org/expected-value-in-python/
def expected_value(values, weights):
return np.sum((np.dot(values,weights))) / np.sum(weights)
logging.debug('Step 3: calculate E(X)')
expectation = expected_value(X,p)
logging.debug(f'E(X)={expectation}')
# Step 4: calculate E(X^2)
logging.debug('Step 4: calculate E(X^2)')
# add normalize='index'
contingency_pct = pd.crosstab(df['Observed'],df['Expected'],normalize='index')
logging.debug(f'contingency_pct:{contingency_pct}')
# Step 5: calculate E(X - X_avg)^2
logging.debug('Step 5: calculate E(X - X_avg)^2')
The dataset that I am using is:
X,P
8,1/8
12,1/6
16,3/8
20,1/4
24,1/12
Expected:
E(X) = 16 E(X^2) = 276 E(X- X_avg)^2 =20
Actual:
Traceback (most recent call last):
File "/Users/evangertis/development/PythonAutomation/Statistics/expectation.py", line 35, in <module>
expectation = expected_value(X,p)
File "/Users/evangertis/development/PythonAutomation/Statistics/expectation.py", line 32, in expected_value
return np.sum((np.dot(values,weights))) / np.sum(weights)
File "<__array_function__ internals>", line 5, in sum
File "/usr/local/lib/python3.9/site-packages/numpy/core/fromnumeric.py", line 2259, in sum
return _wrapreduction(a, np.add, 'sum', axis, dtype, out, keepdims=keepdims,
File "/usr/local/lib/python3.9/site-packages/numpy/core/fromnumeric.py", line 86, in _wrapreduction
return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
TypeError: cannot perform reduce with flexible type