-1

So my program reads MPG vs weight relationship and draws a graph of what it is suppose to look like but as you can see the graph is not looking right.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#read txt file
dataframe= pd.read_table('auto_data71.txt',delim_whitespace=True,names=['MPG','Cylinder','Displacement','Horsepower','Weight','acceleration','Model year','Origin','Car Name'])
dataframe.dropna(inplace=True)

#filter the un-necessary columns
X = dataframe.iloc[:,4:5].values
Y = dataframe.iloc[:,0:1].values


#scale data
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_Y= StandardScaler()
X = sc_X.fit_transform(X)
Y = sc_Y.fit_transform(Y)

#split data into train and test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)

#create model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree=2)
poly_X = poly_reg.fit_transform(x_train)
poly_reg.fit(poly_X,y_train)
regressor2= LinearRegression()
regressor2.fit(poly_X,y_train)


#graph
result = regressor2.predict(poly_X)
plt.scatter(x_train,y_train,color='red')
plt.plot(x_train, result,color='blue')
plt.show()

the output is this: As you can see the regression line does not look right. Any help will be much appreciated.

as you can see the regression line does not look right. Any help will be much appreciated

#auto_data.txt(part of data...)

****NOTE:i am only using weight and mpg column for this code file(mpg,cylinder,distance,horsepower,weight,acceleration,year,origin,name)

27.0   4.   97.00      88.00      2130.      14.5   71.  3. "datsun pl510"
28.0   4.   140.0      90.00      2264.      15.5   71.  1. "chevrolet vega 2300"
25.0   4.   113.0      95.00      2228.      14.0   71.  3. "toyota corona"
25.0   4.   98.00      NA         2046.      19.0   71.  1. "ford pinto"
NA     4.   97.00      48.00      1978.      20.0   71.  2. "volkswagen super beetle 117"
19.0   6.   232.0      100.0      2634.      13.0   71.  1. "amc gremlin"
16.0   6.   225.0      105.0      3439.      15.5   71.  1. "plymouth satellite custom"
17.0   6.   250.0      100.0      3329.      15.5   71.  1. "chevrolet chevelle malibu"
19.0   6.   250.0      88.00      3302.      15.5   71.  1. "ford torino 500"
18.0   6.   232.0      100.0      3288.      15.5   71.  1. "amc matador"
14.0   8.   350.0      165.0      4209.      12.0   71.  1. "chevrolet impala"
14.0   8.   400.0      175.0      4464.      11.5   71.  1. "pontiac catalina brougham"
14.0   8.   351.0      153.0      4154.      13.5   71.  1. "ford galaxie 500"
14.0   8.   318.0      150.0      4096.      13.0   71.  1. "plymouth fury iii"
12.0   8.   383.0      180.0      4955.      11.5   71.  1. "dodge monaco (sw)"
13.0   8.   400.0      170.0      4746.      12.0   71.  1. "ford country squire (sw)"
13.0   8.   400.0      175.0      5140.      12.0   71.  1. "pontiac safari (sw)"
18.0   6.   258.0      110.0      2962.      13.5   71.  1. "amc hornet sportabout (sw)"
desertnaut
  • 57,590
  • 26
  • 140
  • 166
Amit Dahal
  • 41
  • 1
  • 9

1 Answers1

0

You need to sort the values before plotting.

DATA: https://files.fm/u/2g5dxyb4

Use this:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split


data = pd.read_csv('data.txt', delim_whitespace=True)
data.dropna(inplace=True)

X = data['weight'].values
Y = data['mpg'].values

X = X.reshape(-1, 1)
Y = Y.reshape(-1, 1)

#scale data
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_Y= StandardScaler()
X = sc_X.fit_transform(X)
Y = sc_Y.fit_transform(Y)

#split data into train and test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)

#create model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree=2)
poly_X = poly_reg.fit_transform(x_train)
poly_reg.fit(poly_X,y_train)
regressor2= LinearRegression()
regressor2.fit(poly_X,y_train)


#graph
result = regressor2.predict(np.sort(poly_X,axis=0))
plt.scatter(x_train,y_train,color='red')
plt.plot(np.sort(x_train, axis = 0), result,color='blue')
plt.show()
seralouk
  • 30,938
  • 9
  • 118
  • 133