I am new to python coding . I have written a python code to calculate Gaussian distribution and predict the label of the set of values. This was my class assignment for which I got good marks. Now I want to know if my code is correct in more python sense. Can I do any more improvement to the code and make it precise and more "Pythonic".
import math
import operator
# Class to get the mean and variance of all data point. Input paramaters
#are the Labels (M or W) and parameter to calculate (height, weight, age).
#samples are the number of data points
def getMean(trainingSet,parameter1,parameter2):
mean =0
samples = 0
variance = 0
for x in range(len(trainingSet)):
if trainingSet[x][3]==parameter2:
mean+= trainingSet[x][parameter1]
samples = samples+1
finalMean = mean/samples
#print(finalMean)
for x in range(len(trainingSet)):
if trainingSet[x][3]==parameter2:
variance+= (trainingSet[x][parameter1]-finalMean)**2
finalVariance = variance/samples
gausVal = []
for x in range(len(trainingSet)):
tempval =
calculateGuassian(finalMean,finalVariance,trainingSet[x][parameter1])
gausVal.append(tempval)
return gausVal
#Class to calculate the gussaian distriubion points
def calculateGuassian(meanVal, varianceVal, feature1):
DenoVariance = 2*varianceVal
func1 = 1/(math.sqrt(2*3.14*varianceVal))
func2 = (-(feature1-meanVal)**2)/DenoVariance
func3 = math.exp(func2)
distro = func1*func3
return distro
def finalProduct(multiplyer):
result = 1
for x in multiplyer:
result = result*x
return result
def arrayMultiply(arr1, arr2) :
resultArray = []
for x in range(len(arr1)):
arrMul = arr1[x]*arr2[x]
resultArray.append(arrMul)
return resultArray
# Main classes where every feature is calculated multiplied and the result
#is shown
def main() :
MenArr = []
WomenList = []
heightM = getMean(trainSet,0,'M')
finalHM = finalProduct(heightM)
MenArr.append(finalHM)
heightW = getMean(trainSet,0, 'W')
finalHW = finalProduct(heightW)
WomenList.append(finalHW)
weightM = getMean(trainSet,1,'M')
finalWM = finalProduct(weightM)
MenArr.append(finalWM)
weightW = getMean(trainSet,1,'W')
finalWW = finalProduct(weightW)
WomenList.append(finalWW)
ageM = getMean(trainSet,2,'M')
finalAM = finalProduct(ageM)
MenArr.append(finalAM)
ageW = getMean(trainSet,2,'W')
finalAW = finalProduct(ageW)
WomenList.append(finalAW)
BestResultMTemp = arrayMultiply(MenArr,testData)
BestResultWTemp = arrayMultiply(WomenList,testData)
BestResultM = finalProduct(BestResultMTemp)*0.50
BestResultW = finalProduct(BestResultWTemp)*0.50
print (BestResultM)
print(BestResultW)
if BestResultM<BestResultW :
print("The Class Label Is W")
if BestResultM>BestResultW :
print("The Class Label Is M")
trainSet = [[170, 57, 32, 'W'],
[192, 95, 28, 'M'],
[150, 45, 30, 'W'],
[170, 65, 29, 'M'],
[175, 78, 35, 'M'],
[185, 90, 32, 'M'],
[170, 65, 28, 'W'],
[155, 48, 31, 'W'],
[160, 55, 30, 'W'],
[182, 80, 30, 'M'],
[175, 69, 28, 'W'],
[180, 80, 27, 'M'],
[160, 50, 31, 'W'],
[175, 72, 30, 'M']]
testData = (175, 70, 35)
main()
Any kind of suggestion is most welcome. Thank You in advance.