I am trying oneHotEncoder on the categiorical values
However its failing with below error. What could be goind wrong ? Please help , any comments are alwaya welcome.
Below is the code snipet
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X = LabelEncoder()
print(X.shape)
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
X[:, 1] = labelencoder_X.fit_transform(X[:, 1])
print(X)
print(X.shape)
print(y)
#X = X.reshape(len(X[:, 0]), 7)
print(X.shape)
onehotencoder = OneHotEncoder(categorical_features = [0])
X = onehotencoder.fit_transform(X).toarray()
print(X.shape)
print(X)
=================================================================== The output of the code is as below Looks like the issue is with array formatting
I am a getting following ouput
(17, 7)
[[2 0 0 'Offline' 'Low' 'Cold' 'No']
[0 0 0 'Offline' 'High' 'Cold' 'No']
[3 0 1 'Online' 'High' 'Cold' 'Yes']
[2 0 1 'Offline' 'Low' 'Hot' 'Yes']
[2 0 1 'Offline' 'High' 'Hot' 'Yes']
[2 0 0 'Online' 'High' 'Cold' 'Yes']
[2 1 1 'Offline' 'Low' 'Hot' 'No']
[2 1 0 'Offline' 'Low' 'Cold' 'No']
[0 1 0 'Online' 'Low' 'Cold' 'Yes']
[3 1 1 'Online' 'Low' 'Hot' 'Yes']
[1 1 0 'Offline' 'Low' 'Hot' 'No']
[2 1 1 'Offline' 'Low' 'Hot' 'Yes']
[3 1 1 'Online' 'High' 'Hot' 'Yes']
[2 1 0 'Online' 'High' 'Hot' 'No']
[2 2 2 'Offline' 'Low' 'Hot' 'Yes']
[2 2 1 'Offline' 'Low' 'Cold' 'No']
[1 2 0 'Offline' 'High' 'Cold' 'Yes']]
(17, 7)
['Low' 'Low' 'High' 'High' 'High' 'Low' 'Low' 'Low' 'Low' 'High' 'Low'
'High' 'High' 'High' 'High' 'Low' 'Low']
(17, 7)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-42-84bec98371d4> in <module>()
28 print(X.shape)
29 onehotencoder = OneHotEncoder(categorical_features = [0])
---> 30 X = onehotencoder.fit_transform(X).toarray()
31 print(X.shape)
32 print(X)
C:\Users\patilsi\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\sklearn\preprocessing\data.py in fit_transform(self, X, y)
2017 """
2018 return _transform_selected(X, self._fit_transform,
-> 2019 self.categorical_features, copy=True)
2020
2021 def _transform(self, X):
C:\Users\patilsi\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\sklearn\preprocessing\data.py in _transform_selected(X, transform, selected, copy)
1807 X : array or sparse matrix, shape=(n_samples, n_features_new)
1808 """
-> 1809 X = check_array(X, accept_sparse='csc', copy=copy, dtype=FLOAT_DTYPES)
1810
1811 if isinstance(selected, six.string_types) and selected == "all":
C:\Users\patilsi\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
431 force_all_finite)
432 else:
--> 433 array = np.array(array, dtype=dtype, order=order, copy=copy)
434
435 if ensure_2d:
(17, 7)
[[2 0 0 'Offline' 'Low' 'Cold' 'No']
[0 0 0 'Offline' 'High' 'Cold' 'No']
[3 0 1 'Online' 'High' 'Cold' 'Yes']
[2 0 1 'Offline' 'Low' 'Hot' 'Yes']
[2 0 1 'Offline' 'High' 'Hot' 'Yes']
[2 0 0 'Online' 'High' 'Cold' 'Yes']
[2 1 1 'Offline' 'Low' 'Hot' 'No']
[2 1 0 'Offline' 'Low' 'Cold' 'No']
[0 1 0 'Online' 'Low' 'Cold' 'Yes']
[3 1 1 'Online' 'Low' 'Hot' 'Yes']
[1 1 0 'Offline' 'Low' 'Hot' 'No']
[2 1 1 'Offline' 'Low' 'Hot' 'Yes']
[3 1 1 'Online' 'High' 'Hot' 'Yes']
[2 1 0 'Online' 'High' 'Hot' 'No']
[2 2 2 'Offline' 'Low' 'Hot' 'Yes']
[2 2 1 'Offline' 'Low' 'Cold' 'No']
[1 2 0 'Offline' 'High' 'Cold' 'Yes']]
(17, 7)
['Low' 'Low' 'High' 'High' 'High' 'Low' 'Low' 'Low' 'Low' 'High' 'Low'
'High' 'High' 'High' 'High' 'Low' 'Low']
(17, 7)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-42-84bec98371d4> in <module>()
28 print(X.shape)
29 onehotencoder = OneHotEncoder(categorical_features = [0])
---> 30 X = onehotencoder.fit_transform(X).toarray()
31 print(X.shape)
32 print(X)
C:\Users\patilsi\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\sklearn\preprocessing\data.py in fit_transform(self, X, y)
2017 """
2018 return _transform_selected(X, self._fit_transform,
-> 2019 self.categorical_features, copy=True)
2020
2021 def _transform(self, X):
C:\Users\patilsi\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\sklearn\preprocessing\data.py in _transform_selected(X, transform, selected, copy)
1807 X : array or sparse matrix, shape=(n_samples, n_features_new)
1808 """
-> 1809 X = check_array(X, accept_sparse='csc', copy=copy, dtype=FLOAT_DTYPES)
1810
1811 if isinstance(selected, six.string_types) and selected == "all":
C:\Users\patilsi\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
431 force_all_finite)
432 else:
--> 433 array = np.array(array, dtype=dtype, order=order, copy=copy)
434
435 if ensure_2d:
ValueError: could not convert string to float: 'Yes'