I have an script to merge two csv using panda library merge function .I want to join on two column name so that i joined on multiple keys . please suggest me some changes so that i can join on multiple keys. When i run the code i got an error of
Traceback (most recent call last): File "mergercsv.py", line 23, in joinCSV merged = table1.merge(table2, how=join_type,on=['key1','key2']) File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\frame.py", line 7284, in merge return merge( File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\reshape\merge.py", line 73, in merge op = _MergeOperation( File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\reshape\merge.py", line 627, in init ) = self._get_merge_keys() File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\reshape\merge.py", line 983, in _get_merge_keys right_keys.append(right._get_label_or_level_values(rk)) File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\generic.py", line 1692, in _get_label_or_level_values raise KeyError(key) KeyError: 'key1'
please give me some suggestion
import pandas as pd
def joinCSV(filename1, filename2, key1,key11,key2=None header1=0, header2=0, join_type="outer"):
"""Read in two CSV files and perform the specified type of join. """
table1 = pd.read_csv(filename1, header=header1)
table2 = pd.read_csv(filename2, header=header2)
# change keys to ints if no header is presen
if header1 == None: key1 = int(key1)
if header2 == None: key2 = int(key2)
try:
# join on one or two keys, as appropriate
if key2 == None:
merged = table1.merge(table2, how=join_type,on=['key1','key2'])
else:
merged = table1.merge(table2, how=join_type, on=key1)
return merged
except KeyError as error:
missing_column = error.args[0].split(' ')[-1]
print >> sys.stderr, 'Column "%s" not found.' % (missing_column,)
sys.exit(1)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Join CSV files.')
parser.add_argument('file', nargs=2, help="Two filenames")
parser.add_argument('key1', nargs=2, help="Perform a join on this column. One key can be
used if the column has the same name in both files. Can be an index if the file(s) have no
headers.")
parser.add_argument('key2', nargs='?',default=None, help="(Optional) Perform a join on this column in
the
second file.")
parser.add_argument('-no1', '--noheader1', action="store_const", const=None, default=0,
help="First file has no header.")
parser.add_argument('-no2', '--noheader2', action="store_const", const=None, default=0,
help="Second file has no header.")
parser.add_argument('--type', default="outer", help="Type of join. Must be one of 'left',
'right', 'outer', or 'inner' (default).")
args = parser.parse_args()
merged = joinCSV(args.file[0], args.file[1], args.key1[0],args.key1[1],args.key2,
args.noheader1, args.noheader2, args.type)
merged.to_csv('merged.csv',index=False)