This is what my columns look like. I would like to change the data in each column to one-hot-encoding.
The data is either [1,0,0],[0,1,0] or [0,0,1]
How can I do this?
Here is my code
def code_letter(letter):
return {
'A':'2',
'B':'1',
'C':'1',
'D':'1',
'E':'2',
'F':'1',
'G':'1',
'H':'1',
'I':'2',
'J':'1',
'K':'1',
'L':'1',
'M':'1',
'N':'1',
'O':'2',
'P':'1',
'Q':'1',
'R':'1',
'S':'1',
'T':'1',
'U':'2',
'V':'1',
'W':'1',
'X':'1',
'Y':'2',
'Z':'1'
}[letter]
# #preprocessing
def codify(word):
n=[]
for x in word:
n.append(code_letter(x))
n = ''.join(n)
return n
def code_letter2(letter):
return {
'0':[1,0,0],
'1':[0,1,0],
'2':[0,0,1],
}[letter]
def codify2d(word):
n=[]
for x in word:
n.append(code_letter2(x))
return n
def zerox(code):
code_len = len(code)
if code_len < 24:
diff = 24 - code_len
filled = code.ljust(diff + len(code), '0')
return filled
else:
return code
def one_hot(final_code):
final_split = list(final_code)
dummies = pd.get_dummies(final_split)
return dummies
dataset['Coded'] = dataset['Word'].apply(codify)
dataset['Codedfill'] = dataset['Coded'].apply(zerox)
dataset['Onehot'] = dataset['Codedfill'].apply(codify2d)
print(dataset)
dataset_new = pd.DataFrame(dataset.Onehot.tolist(), index=
dataset.index)
dataset_new["Syllables"] = dataset["Syllables"]
print(dataset_new)
from sklearn.model_selection import train_test_split
train_dataset, test_dataset = train_test_split(dataset_new,
test_size=0.2)
xtrain_dataset = train_dataset.loc[:, train_dataset.columns !=
'Syllables']
ytrain_dataset = train_dataset["Syllables"]
# Define a simple sequential model
def create_model():
model = Sequential()
model.add(Embedding(len(xtrain_dataset), 3, input_length=24))
model.add(LSTM(18, dropout=0.1))
model.add(Dense(12, activation='relu')),
model.add(Dense(9, activation='relu')),
model.add(Dense(1, activation="sigmoid"))
optimizer = Adam(learning_rate=0.01)
model.compile(optimizer=optimizer,
loss='binary_crossentropy',
metrics=['accuracy'])
return model
#Create a basic model instance
model = create_model()
# Display the model's architecture
model.summary()
train_dataset.Syllables)
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback =
tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,verbose=1)
# Train the model with the new callback
#print(train_dataset)
model.fit(#train_dataset,
x=xtrain_dataset,
y=ytrain_dataset,
epochs=50,
batch_size=100,
callbacks=[cp_callback]) # Pass callback to training
os.listdir(checkpoint_dir)
it gives
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list)
when I try feeding the NN.