I have two csv-Files with a common key which is for file1 in the second column and for file2 in the first column.
I want to write a new csv-file that features columns from both files (not all columns from both files just the ones I need).
This is what I tried:
import numpy as np
import csv
import os
output_dir = "<output_dir>"
file1 = "f1.csv"
file2 = "f2.csv"
path1 = os.path.join(output_dir, file1)
path2 = os.path.join(output_dir, file2)
file3 = "fd_labels_processed.csv"
output_file = os.path.join(output_dir, file3)
with open(path1, 'r') as f1, open(path2, 'r') as f2, \
open(output_file, 'w+', newline='') as f3:
f1_reader = csv.reader(f1, delimiter=',')
f2_reader = csv.reader(f2, delimiter=',')
header_f1 = []
header_f1 = next(f1_reader) # reading the next line after header of csv file.
header_f2 = []
header_f2 = next(f2_reader) # reading the next line after header of csv file.
count = 0
writer = csv.writer(f3, delimiter=',') #preparing the file f3 for writing the file.
writer.writerow(["ATTRIBUTE_1", "ATTRIBUTE_2", "ATTRIBUTE_3", "ATTRIBUTE_4", "ATTRIBUTE_5", "ATTRIBUTE_6", "ATTRIBUTE_7", "ATTRIBUTE_8", "ATTRIBUTE_9"])
for row_f1 in f1_reader: # looking each row from csv file f1
for row_f2 in f2_reader: # looking for each row from csv file f2
if row_f1[1] == row_f2[0]: #checking the condition; worse case Time complexity o(n2)
if (row_f1[3] == row_f2[2] and row_f1[4] == row_f2[3] and row_f1[5] == row_f2[4] and row_f1[6] == row_f2[5]):
print(count)
writer = csv.writer(f3)
row_f2.append(row_f1[9])
row_f2.append(row_f1[11])
row_f2.append(row_f1[12])
writer.writerows([row_f2])
count +=1
But for some reason it takes the first line (after the header) from f1_reader
, iterates through all lines from f2_reader
once and then just stops after that. So the outer for
-loop just stops after the first line.