1

I'm trying to do a full outer join in python without using pandas, I already developed a code to an inner join but can't really edit it for the full outer join

here is my code for the inner join

import collections
import csv
import sys


def c_merge(f1,f2):
    with open(f1,'r') as infile:
        obj=csv.reader(infile)
        header_a=next(obj)
        dict_a={row[0]: row[1:] for row in obj}

    with open(f2,'r') as infile:
        obj=csv.reader(infile)
        header_b=next(obj)
        dict_b=collections.defaultdict(list)
        for row in obj:
            dict_b[row[0]].append(row[1:])
    with open('newfile.txt','w') as newfile:
        w=csv.writer(newfile)
        w.writerow(header_a+header_b[1:])

        for m in dict_a.keys():
            for n in dict_b.get(m, [[]]):
                w.writerow([m]+dict_a[m]+n)


if __name__ == "__main__":
    c_merge(sys.argv[0],sys.argv[1])
    obj=csv.reader(open('newfile.txt','r'))
    for x in obj:
        print(x)



1 Answers1

0

Hi, Welcome to StackOverflow!

Both of these works:

with open('newfile.txt','w') as newfile:
    w=csv.writer(newfile)
    w.writerow(header_a+header_b[1:])

    for m in set(dict_a.keys()).union(dict_b.keys()):
        for n in dict_b.get(m, [[]]):
            w.writerow([m]+dict_a.get(m, [])+n)

OR

with open('newfile.txt','w') as newfile:
    w=csv.writer(newfile)
    w.writerow(header_a+header_b[1:])

    for m in dict_a.keys()+dict_b.keys():
        for n in dict_b.get(m, [[]]):
            w.writerow([m]+dict_a.get(m, [])+n)

Complete Code:

import collections
import csv
import sys

def c_merge(f1,f2):
    with open(f1,'r') as infile:
        obj=csv.reader(infile)
        header_a=next(obj)
        dict_a={row[0]: row[1:] for row in obj}

    with open(f2,'r') as infile:
        obj=csv.reader(infile)
        header_b=next(obj)
        dict_b=collections.defaultdict(list)
        for row in obj:
            dict_b[row[0]].append(row[1:])

    with open('newfile.txt','w') as newfile:
        w=csv.writer(newfile)
        w.writerow(header_a+header_b[1:])

        for m in set(dict_a.keys()).union(dict_b.keys()):
            for n in dict_b.get(m, [[]]):
                w.writerow([m]+dict_a.get(m, [])+n)

        # OR

        """
        for m in dict_a.keys()+dict_b.keys():
            for n in dict_b.get(m, [[]]):
                w.writerow([m]+dict_a.get(m, [])+n)
        """

if __name__ == "__main__":
    c_merge(sys.argv[0],sys.argv[1])
    obj=csv.reader(open('newfile.txt','r'))
    for x in obj:
        print(x)
Jiya
  • 745
  • 8
  • 19