0

I am having trouble getting my code to run. I keep getting the error that my x variable such as 'hsGPA' is not defined. Below is my code. Ive tried the solutions posted on the pother thread and none have helped so please don't mark this as a duplicate. THANKS!

def readData(fileName):


    hsGPA = []   #High School GPA
    mathSAT = []  #Math SAT scores
    crSAT = []  #Verbal SAT scores
    collegeGPA = []  #College GPA
    FullList=[] 
    inputFile = open(fileName, 'r', encoding = 'utf-8')

    for line in inputFile:
        FullList=line.split(',')
        hsGPA.append(float(FullList[0]))
        mathSAT.append(int(FullList[1]))
        crSAT.append(int(FullList[2]))
        collegeGPA.append(float(FullList[3]))
    return hsGPA, mathSAT, crSAT, collegeGPA 



def plotData(hsGPA, mathSAT, crSAT, collegeGPA):

    GPA1 = []   #High School GPA
    Score1 = []  #Math SAT scores
    Score2= []  #Verbal SAT scores
    GPA2 = []  #College GPA

    hsGPA, mathGPA, crSAT, collegeGPA = readData('SAT.txt')
    pyplot.figure(1)

    pyplot.subplot(4,1,1)
    for line in range(len(hsGPA)):
        GPA1.append(line)
    pyplot.plot(GPA1,hsGPA)

    pyplot.subplot(4,1,2)
    for line in range(len(mathSAT)):
        Score1.append(line)
    pyplot.plot(Score1,mathSAT)

    pyplot.subplot(4,1,3)
    for line in range(len(crSAT)):
        Score2.append(line)       
    pyplot.plot(Score2,crSAT)

    pyplot.subplot(4,1,4)
    for line in range(len(collegeGPA)):
        GPA2.append(line)
    pyplot.plot(GPA2,collegeGPA)

    pyplot.show()

def LinearRegression(xList, yList):
    '''
    This function finds the constants in the y = mx+b, or linear regression
    forumula

    xList - a list of the x values
    yList - a list of the y values
    m - the slope f the line
    b - where the line intercepts the y axis
    '''

    n = len(xList)
    sumX = 0
    sumXX = 0
    sumXY = 0
    sumY = 0

    for index in range(n):
        sumX += xList[index]
        sumXY += xList[index] * yList[index]
        sumXX += xList[index]**2
        sumY += yList[index]
        #the components needed to find m and b

    m = (n*(sumXY - (sumX*sumY)))/(n*(sumXX - (sumX**2)))
    b = (sumY - (m*sumX))/n
    #actually implements formula

    return m, b


def plotRegression(x,y, xLabel, yLabel):
    ScoreT = []

    pyplot.scatter(x,y)
    m,b = linearRegression(xList,yList)
    minX = min(x)
    maxX = max(x)
    pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
    pyplot.xlabel(xLabel)
    pyplot.ylabel(yLabel)
    pyplot.show()

    for index in range(len(mathSAT)):
        sumscore = mathSAT[index] + crSAT[index]
        ScoreT.append(sumscore)
    return ScoreT


def rSquared(x,y,m,b):

    n = len(x)
    R=0
    sumS=0
    sumT=0
    sumY=0

    for index in range(n):
        a=(y[index]-((m*x[index])+b))**2
        sumS = sumS+a


    for index in range(len(y)):
        sumY = sumY= y[index]
        MeanY= sumY/(len(y))
        e=(y[index]-MeanY)**2
        sumT = sumT+e




    m,b= LinearRegression(xList, yList)

    RG=1-(sumS/sumT)





def main():
    print(readData('SAT.txt'))
    plotData(*readData('SAT.txt'))
    plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
    plotRegression(mathSAT,collegeGPA, 'highGPA' , 'collegeGPA')
    plotRegression(crSAT,collegeGPA, 'highGPA' , 'collegeGPA')
    plotRegression(ScoreT,collegeGPA, 'highGPA' , 'collegeGPA')



main()

It's giving the error in main, after plotRegression for each of the x variables. Please Help! Thanks!

Brad Larson
  • 170,088
  • 45
  • 397
  • 571
Sarah
  • 13
  • 4

2 Answers2

1

Try this:

def plotRegression(x,y, xLabel, yLabel):
    # I deleted ScoreT = [] here
    pyplot.scatter(x,y)
    m,b = linearRegression(x,y)
    minX = min(x)
    maxX = max(x)
    pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
    pyplot.xlabel(xLabel)
    pyplot.ylabel(yLabel)
    pyplot.show()
    # I deleted the loop and return statement here

# ....

def main():
    data = readData('SAT.txt')
    print(data)
    plotData(*data)
    hsGPA, mathSAT, crSAT, collegeGPA = data
    # added ScoreT calculation here
    ScoreT = [sum(x) for x in zip(mathSAT, crSAT)]
    plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
    plotRegression(mathSAT,collegeGPA, 'highGPA' , 'collegeGPA')
    plotRegression(crSAT,collegeGPA, 'highGPA' , 'collegeGPA')
    plotRegression(ScoreT,collegeGPA, 'highGPA' , 'collegeGPA')
Tamas Hegedus
  • 28,755
  • 12
  • 63
  • 97
  • I'm not sure what `ScoreT` should be – Tamas Hegedus Dec 06 '15 at 00:01
  • ScoreT is a separate list because I want to combine two individual variables into one, so that won't help. – Sarah Dec 06 '15 at 00:17
  • The first three call to plotRegression creates three different lists named ScoreT. How do you wish them to be combined? adding them together by value? average them? – Tamas Hegedus Dec 06 '15 at 00:20
  • Ok so what Im trying to do is have four plots. The first three are hsGPA, mathSAT, crSAT all compared with college GPA, and the fourth is the mathSAT and crSAT added ( which i called scoredT), so i want scoreT to be the fourth comparisson and plotted with collegeGPA – Sarah Dec 06 '15 at 00:25
  • Updated my answer based on that, let's see if it works for you – Tamas Hegedus Dec 06 '15 at 00:29
  • Its giving me an error still saying error in main plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA') and then – Sarah Dec 06 '15 at 00:35
  • in plotRegression m,b = LinearRegression(xList,yList) NameError: name 'xList' is not defined – Sarah Dec 06 '15 at 00:35
  • change it to `m,b = linearRegression(x,y)` – Tamas Hegedus Dec 06 '15 at 00:37
  • You're amazing!! it worked!! thank you soon much!!!! – Sarah Dec 06 '15 at 00:45
0

In your main(), hsGPA is never defined. It's defined inside other function and is not shared in the global context. So main cannot access it.

You need to it from readData()'s return

Cyrbil
  • 6,341
  • 1
  • 24
  • 40