I have a .csv file with data of which i want to transform some columns to one-hot. The problem occurs in the second last line, where the one-hot index (e.g. 1st feature) gets placed in all rows instead of just the one i am in currently. It seems to be some problem with how i access the 2D list... any suggestions? thank you
def one_hot_encode(data_list, column):
one_hot_list = [[]]
different_elements = []
for row in data_list[1:]: # count different elements
if row[column] not in different_elements:
different_elements.append(row[column])
for i in range(len(different_elements)): # set variable names
one_hot_list[0].append(different_elements[i])
vector = [] # create list shape with zeroes
for i in range(len(different_elements)):
vector.append(0)
for i in range(1460):
one_hot_list.append(vector)
ind_row = 1 # encode 1 for each sample
for row in data_list[1:]:
index = different_elements.index(row[column])
one_hot_list[ind_row][index] = 1 # mistake!! sets all rows to 1
ind_row += 1