0
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import numpy as np
pd.set_option("display.max_columns", None)
df = pd.read_csv(r"C:\Users\kiaab\Downloads\FuelConsumption.csv.csv")
#print(df.head(6))
cdf = df[['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_CITY', 'FUELCONSUMPTION_COMB', 
'CO2EMISSIONS']]
#print(cdf.head(9))
#plt.scatter(cdf.ENGINESIZE, cdf.CO2EMISSIONS, color='black')
plt.xlabel("engine size")
plt.ylabel("Emission")
#plt.show()
msk = np.random.rand(len(df)) < 0.8
train = cdf[msk]
test = cdf[~msk]
#plt.scatter(train.ENGINESIZE, train.CO2EMISSIONS, color='blue')
plt.xlabel("engine size")
plt.ylabel("Emission")
#plt.show()
from sklearn import linear_model
regr = linear_model.LinearRegression()
x = np.asanyarray(train[['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_COMB']])
y = np.asanyarray(train[['CO2EMISSIONS']])
regr.fit(x, y)
#print('Coefficients: ', regr.coef_)
#print('Interceept: ', regr.intercept_)
y_hat = regr.predict(test[['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_COMB']])
y = np.asanyarray(test[['CO2EMISSIONS']])
print("Residual sum of squares: %.2f" % np.mean((y_hat - y) ** 2))
print('variance score: %.2f' % regr.score(x, y))

I am testing my model and in the last step, I got the below error and do not know how to solve it.

ValueError: Found input variables with inconsistent numbers of samples: [225, 842]
Rodalm
  • 5,169
  • 5
  • 21
kiazad
  • 1
  • 1
    Does this answer your question? [sklearn: Found arrays with inconsistent numbers of samples when calling LinearRegression.fit()](https://stackoverflow.com/questions/30813044/sklearn-found-arrays-with-inconsistent-numbers-of-samples-when-calling-linearre) – Chris Jun 12 '22 at 14:50
  • This question is missing a lot - where's the traceback (full error)? Several `print` statements, but no show. No information about the arguments in the problem line. How much of this code is your own, or is it all borrowed, without understanding, from some other site? – hpaulj Jun 12 '22 at 15:02
  • this is error 'ValueError: Found input variables with inconsistent numbers of samples: [197, 870]'. this code is for a tutorial course that I am trying but I take this error. – kiazad Jun 12 '22 at 18:19

0 Answers0