I currently have a script which I want to use to combine csv data files. For example I have a file called process.csv and file.csv but when I try to append one to the other in a new file called 'all_files.csv' it appends it the correct column but not from the top of the file.
What happens at the moment:
process/sec
08/03/16 11:19 0
08/03/16 11:34 0.1
08/03/16 11:49 0
08/03/16 12:03 0
08/03/16 12:13 0
08/03/16 12:23 0
file/sec
0
43.3
0
0
0
0
0
What I want:
process/sec file/sec
08/03/16 11:19 0 0
08/03/16 11:34 0.1 43.3
08/03/16 11:49 0 0
08/03/16 12:03 0 0
08/03/16 12:13 0 0
08/03/16 12:23 0 0
Here is my code (Note I removed all the excess code relating the an algorithm I use for the per_second
value and use a static value in this example):
def all_data(data_name,input_file_name,idx):
#Create file if first set of data
if data_name == 'first_set_of_data':
all_per_second_file = open("all_data.csv", 'wb')
#Append to file for all other data
else:
all_per_second_file = open("all_data.csv", 'a')
row_position=''
#For loop with index number to position rows after one another
#So not to rewrite new data to the same columns in all_data.csv
for number in range(0,idx):
row_position=row_position+','
with open(input_file_name, 'rb') as csvfile:
# get number of columns
for line in csvfile.readlines():
array = line.split(',')
first_item = array[0]
num_columns = len(array)
csvfile.seek(0)
reader = csv.reader(csvfile, delimiter=',')
#Columns to include Date and desired data
included_cols = [0, 3]
count =0
#Test value for example purposes
per_second=12
for row in reader:
#Create header
if count==1:
all_per_second_file.write(row_position+','+event_name+"\n")
#Intialise date column with first set of data
#first entry rate must be 0
if count ==2:
if event_name == 'first_set_of_data':
all_per_second_file.write(row_position+row[0]+",0\n")
else:
all_per_second_file.write(row_position+",0\n")
#If data after the first row =0 value should reset so data/sec should be 0, not a minus number
if count>2 and row[3]=='0':
if event_name == 'first_set_of_data':
all_per_second_file.write(row_position+row[0]+",0\n")
else:
all_per_second_file.write(row_position+",0\n")
#Otherwise calculate rate
elif count >=3:
if event_name == 'first_set_of_data':
all_per_second_file.write(row_position+row[0]+","+str("%.1f" % per_second)+"\n")
else:
all_per_second_file.write(row_position+","+str("%.1f" % per_second)+"\n")
count = count+1
all_per_second_file.close()
Update in code:
I have changed my script to the following which seems to work correctly:
def all_data(input_file_name):
a = pd.read_csv(per_second_address+input_file_name[0])
b = pd.read_csv(per_second_address+input_file_name[1])
c = pd.read_csv(per_second_address+input_file_name[2])
d = pd.read_csv(per_second_address+input_file_name[3])
b = b.dropna(axis=1)
c = c.dropna(axis=1)
d = d.dropna(axis=1)
merged = a.merge(b, on='Date')
merged = merged.merge(c, on='Date')
merged = merged.merge(d, on='Date')
merged.to_csv(per_second_address+"all_event_per_second.csv", index=False)