I am having trouble getting my code to run. I keep getting the error that my x variable such as 'hsGPA' is not defined. Below is my code. Ive tried the solutions posted on the pother thread and none have helped so please don't mark this as a duplicate. THANKS!
def readData(fileName):
hsGPA = [] #High School GPA
mathSAT = [] #Math SAT scores
crSAT = [] #Verbal SAT scores
collegeGPA = [] #College GPA
FullList=[]
inputFile = open(fileName, 'r', encoding = 'utf-8')
for line in inputFile:
FullList=line.split(',')
hsGPA.append(float(FullList[0]))
mathSAT.append(int(FullList[1]))
crSAT.append(int(FullList[2]))
collegeGPA.append(float(FullList[3]))
return hsGPA, mathSAT, crSAT, collegeGPA
def plotData(hsGPA, mathSAT, crSAT, collegeGPA):
GPA1 = [] #High School GPA
Score1 = [] #Math SAT scores
Score2= [] #Verbal SAT scores
GPA2 = [] #College GPA
hsGPA, mathGPA, crSAT, collegeGPA = readData('SAT.txt')
pyplot.figure(1)
pyplot.subplot(4,1,1)
for line in range(len(hsGPA)):
GPA1.append(line)
pyplot.plot(GPA1,hsGPA)
pyplot.subplot(4,1,2)
for line in range(len(mathSAT)):
Score1.append(line)
pyplot.plot(Score1,mathSAT)
pyplot.subplot(4,1,3)
for line in range(len(crSAT)):
Score2.append(line)
pyplot.plot(Score2,crSAT)
pyplot.subplot(4,1,4)
for line in range(len(collegeGPA)):
GPA2.append(line)
pyplot.plot(GPA2,collegeGPA)
pyplot.show()
def LinearRegression(xList, yList):
'''
This function finds the constants in the y = mx+b, or linear regression
forumula
xList - a list of the x values
yList - a list of the y values
m - the slope f the line
b - where the line intercepts the y axis
'''
n = len(xList)
sumX = 0
sumXX = 0
sumXY = 0
sumY = 0
for index in range(n):
sumX += xList[index]
sumXY += xList[index] * yList[index]
sumXX += xList[index]**2
sumY += yList[index]
#the components needed to find m and b
m = (n*(sumXY - (sumX*sumY)))/(n*(sumXX - (sumX**2)))
b = (sumY - (m*sumX))/n
#actually implements formula
return m, b
def plotRegression(x,y, xLabel, yLabel):
ScoreT = []
pyplot.scatter(x,y)
m,b = linearRegression(xList,yList)
minX = min(x)
maxX = max(x)
pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
pyplot.xlabel(xLabel)
pyplot.ylabel(yLabel)
pyplot.show()
for index in range(len(mathSAT)):
sumscore = mathSAT[index] + crSAT[index]
ScoreT.append(sumscore)
return ScoreT
def rSquared(x,y,m,b):
n = len(x)
R=0
sumS=0
sumT=0
sumY=0
for index in range(n):
a=(y[index]-((m*x[index])+b))**2
sumS = sumS+a
for index in range(len(y)):
sumY = sumY= y[index]
MeanY= sumY/(len(y))
e=(y[index]-MeanY)**2
sumT = sumT+e
m,b= LinearRegression(xList, yList)
RG=1-(sumS/sumT)
def main():
print(readData('SAT.txt'))
plotData(*readData('SAT.txt'))
plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
plotRegression(mathSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(crSAT,collegeGPA, 'highGPA' , 'collegeGPA')
plotRegression(ScoreT,collegeGPA, 'highGPA' , 'collegeGPA')
main()
It's giving the error in main, after plotRegression for each of the x variables. Please Help! Thanks!