0

I have an script to merge two csv using panda library merge function .I want to join on two column name so that i joined on multiple keys . please suggest me some changes so that i can join on multiple keys. When i run the code i got an error of

Traceback (most recent call last): File "mergercsv.py", line 23, in joinCSV merged = table1.merge(table2, how=join_type,on=['key1','key2']) File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\frame.py", line 7284, in merge return merge( File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\reshape\merge.py", line 73, in merge op = _MergeOperation( File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\reshape\merge.py", line 627, in init ) = self._get_merge_keys() File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\reshape\merge.py", line 983, in _get_merge_keys right_keys.append(right._get_label_or_level_values(rk)) File "C:\Anaconda3\envs\adi\lib\site-packages\pandas\core\generic.py", line 1692, in _get_label_or_level_values raise KeyError(key) KeyError: 'key1'

please give me some suggestion

    import pandas as pd
      def joinCSV(filename1, filename2, key1,key11,key2=None header1=0, header2=0, join_type="outer"):
     """Read in two CSV files and perform the specified type of join. """

       table1 = pd.read_csv(filename1, header=header1)
       table2 = pd.read_csv(filename2, header=header2)

        # change keys to ints if no header is presen
       if header1 == None: key1 = int(key1)
       if header2 == None: key2 = int(key2)

        try:
              # join on one or two keys, as appropriate
           if key2 == None:
             merged = table1.merge(table2, how=join_type,on=['key1','key2'])
                   
       else: 
            merged = table1.merge(table2, how=join_type, on=key1)       
    return merged

except KeyError as error:
          missing_column = error.args[0].split(' ')[-1]
          print >> sys.stderr, 'Column "%s" not found.' % (missing_column,)
          sys.exit(1)



         if __name__ == "__main__":
              import argparse

         parser = argparse.ArgumentParser(description='Join CSV files.')
         parser.add_argument('file', nargs=2, help="Two filenames")
         parser.add_argument('key1', nargs=2, help="Perform a join on this column. One key can be 
         used if the column has the same name in both files. Can be an index if the file(s) have no 
         headers.")
             parser.add_argument('key2', nargs='?',default=None, help="(Optional) Perform a join on this column in 
              the 
             second file.")
            parser.add_argument('-no1', '--noheader1', action="store_const", const=None, default=0, 
            help="First file has no header.")
            parser.add_argument('-no2', '--noheader2', action="store_const", const=None, default=0, 
            help="Second file has no header.")
            parser.add_argument('--type', default="outer", help="Type of join. Must be one of 'left', 
            'right', 'outer', or 'inner' (default).")
    
                  args = parser.parse_args()
             merged = joinCSV(args.file[0], args.file[1], args.key1[0],args.key1[1],args.key2, 
           args.noheader1, args.noheader2, args.type)
           merged.to_csv('merged.csv',index=False)

0 Answers0