I have a dataset which contains only int
values and I would like to convert these int values to a range of [0,1]
. I have two methods here which already work. However, I would like to try the one with pandas
. Now my question how can I convert each column to a value range of [0,1]
without violating the values, i.e. changing the value. For example, since the 3 stands for a class.
# My first try without pandas:
# Load a CSV file
def load_csv(filename):
dataset = list()
with open(filename, 'r') as file:
csv_reader = reader(file)
for row in csv_reader:
if not row:
continue
dataset.append(row)
return dataset
# Find the min and max values for each column
def dataset_minmax(dataset):
minmax = list()
stats = [[min(column), max(column)] for column in zip(*dataset)]
return stats
# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
for row in dataset:
for i in range(len(row)-1):
row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)
Dataframe:
import pandas as pd
d = {'int1': [1, 2, 1], 'int2': [3, 4, 5]}
# df = pd.read_csv('test.csv')
df = pd.DataFrame(data=d)
print(df['int1'].min()) # What I found, it give me the min and max back
print(df['int1'].max())