I'm trying to run a number of classification models, but all of them keep throwing the reshape error. I think it has to do with the calculation of model.score or model.predict but i've tried running some reshape commands (on X_valid and Y_valid) with no success
Code:
X = train.drop("Survived", axis=1) # features
Y = train["Survived"] # target
X_test = test # test set, containing no target
# run a split of train data and later predict on x_test
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, random_state=42, test_size=0.20, stratify=Y)
# Random Forest
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, Y_train)
Y_pred = random_forest.predict(X_valid)
acc_random_forest = round(random_forest.score(Y_valid, Y_pred) * 100, 2)
Error and traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed exec> in <module>
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight)
498 """
499 from .metrics import accuracy_score
--> 500 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
501
502 def _more_tags(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in predict(self, X)
628 The predicted classes.
629 """
--> 630 proba = self.predict_proba(X)
631
632 if self.n_outputs_ == 1:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in predict_proba(self, X)
672 check_is_fitted(self)
673 # Check data
--> 674 X = self._validate_X_predict(X)
675
676 # Assign chunk of trees to jobs
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in _validate_X_predict(self, X)
420 check_is_fitted(self)
421
--> 422 return self.estimators_[0]._validate_X_predict(X, check_input=True)
423
424 @property
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\_classes.py in _validate_X_predict(self, X, check_input)
400 """Validate the training data on predict (probabilities)."""
401 if check_input:
--> 402 X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr",
403 reset=False)
404 if issparse(X) and (X.indices.dtype != np.intc or
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
419 out = X
420 elif isinstance(y, str) and y == 'no_validation':
--> 421 X = check_array(X, **check_params)
422 out = X
423 else:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
635 # If input is 1D raise error
636 if array.ndim == 1:
--> 637 raise ValueError(
638 "Expected 2D array, got 1D array instead:\narray={}.\n"
639 "Reshape your data either using array.reshape(-1, 1) if "
ValueError: Expected 2D array, got 1D array instead:
array=[0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1.
0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0.
1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0.
0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1.
1. 0. 0. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0.
0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0.
0. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0.
0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
Y_valid seems to be the one causing the problem. I tried reshaping as follows:
Y_valid2 = Y_valid.values.reshape(-1,1)
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, Y_train)
Y_pred = random_forest.predict(X_valid)
acc_random_forest = round(random_forest.score(Y_valid2, Y_pred) * 100, 2)
But now a different error occurs:
ValueError: X has 1 features, but DecisionTreeClassifier is expecting 10 features as input.
I've tried viewing some other similar questions but I can't discover a successful correction for my own version of the issue. Help!
- Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample
- sci-kit learn: Reshape your data either using X.reshape(-1, 1)
- Getting a weird error that says 'Reshape your data either using array.reshape(-1, 1)'
- Reshaping array using array.reshape(-1, 1)
- ValueError: Expected 2D array, got 1D array instead: array=[0.33913043 0.36086956 0.4173913 ... 0.52608699 0.56956524 0.53913045]
- Got a ValueError: Expected 2D array, got 1D array instead while fiiting my image data into decisiontree classifier