4

I need to make my script usable for UNIX commands, in order to make it possible for example to process 2 input files at once with my script. This script works perfectly well with command line arguments:

newlist = []
def f1()
 .... 
  return places 
  return persons
  return unknown

def f2(input_file):

  volume_id = sys.argv[3]   

  for line in input_data:  

    if any(place+'</dfn>' in line.decode('utf-8') for place in places):
      line = line.replace('"person"', '"place"')
      line = line.replace('id="', 'id="'+volume_id)
    elif any(unk+'</dfn>' in line.decode('utf-8') for unk in unknown):
      line = line.replace('"person"', '"undefined"')
      line = line.replace('id="', 'id="'+volume_id)
    elif 'class="person"' in line.decode('utf-8') and '<dfn' not in line:
      line = line.replace('class="person"', '')
      line = line.replace('id="', 'id="'+volume_id)
    elif 'id="' in line:
      line = line.replace('id="', 'id="'+volume_id)

    newlist.append(line)

  return  newlist                

def main():
  if len(sys.argv) < 4:
    print 'usage: ./myscript.py [file_in... file_out... volume_id]'
    sys.exit(1)

  else:

   filename = sys.argv[1]
   filename_out = sys.argv[2]

   tree = etree.parse(filename)
   extract(tree)

   input_file = open(filename, 'rU')
   change_class(input_file)

   file_new = open(filename_out, 'w')
   for x in newlist:

   if '\n' in x:                   
      x = x.replace('\n', '')                
      print>>file_new, x

When I tried to add stdin stdout to it, I first had a problem with reading the same input file first, and for this reason made some chages so that it would be actually open only once. I modified the following:

def f2(input_data) #instead of input_file

and I modified main():

        filename = sys.argv[1]
        filename_out = sys.argv[2]

        if filename == '-':
           input_file = sys.stdin
           input_file_open = io.open(input_file, encoding = 'utf-8', mode = 'rb')

           input_data = input_file_open
        else:
            input_file = open(filename, 'rU')
            input_data = input_file

        if filename_out == '-':
            filename_out = sys.stdout
            file_new = filename_out
        else:
            file_new = open(filename_out, 'w')

        tree = etree.parse(input_data)
        extract(tree)

        change_class(input_data)

        for x in newlist:

            if '\n' in x:                   
               x = x.replace('\n', '')                
            print>>file_new, x

I run my script like this:

./myscript.py - - volumeid < inputfile > outputfile

Then I got this error message:

input_file_open = io.open(input_file, encoding = 'utf-8', mode = 'rb')
TypeError: invalid file: <open file '<stdin>', mode 'r' at 0x7f9a2d5ab150>
user3241376
  • 407
  • 7
  • 20
  • 2
    `sys.stdin` is already an open file handle, not a file name. – chepner Feb 21 '14 at 19:09
  • I first tried not to open it as file additionally, but then got encoding error messages, here the topic about it: http://stackoverflow.com/questions/21911323/stdin-stdout-python-how-to-reuse-the-same-input-file-twice – user3241376 Feb 21 '14 at 19:37

0 Answers0