I need to make my script usable for UNIX commands, in order to make it possible for example to process 2 input files at once with my script. This script works perfectly well with command line arguments:
newlist = []
def f1()
....
return places
return persons
return unknown
def f2(input_file):
volume_id = sys.argv[3]
for line in input_data:
if any(place+'</dfn>' in line.decode('utf-8') for place in places):
line = line.replace('"person"', '"place"')
line = line.replace('id="', 'id="'+volume_id)
elif any(unk+'</dfn>' in line.decode('utf-8') for unk in unknown):
line = line.replace('"person"', '"undefined"')
line = line.replace('id="', 'id="'+volume_id)
elif 'class="person"' in line.decode('utf-8') and '<dfn' not in line:
line = line.replace('class="person"', '')
line = line.replace('id="', 'id="'+volume_id)
elif 'id="' in line:
line = line.replace('id="', 'id="'+volume_id)
newlist.append(line)
return newlist
def main():
if len(sys.argv) < 4:
print 'usage: ./myscript.py [file_in... file_out... volume_id]'
sys.exit(1)
else:
filename = sys.argv[1]
filename_out = sys.argv[2]
tree = etree.parse(filename)
extract(tree)
input_file = open(filename, 'rU')
change_class(input_file)
file_new = open(filename_out, 'w')
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
When I tried to add stdin stdout to it, I first had a problem with reading the same input file first, and for this reason made some chages so that it would be actually open only once. I modified the following:
def f2(input_data) #instead of input_file
and I modified main():
filename = sys.argv[1]
filename_out = sys.argv[2]
if filename == '-':
input_file = sys.stdin
else:
input_file = open(filename, 'rU')
if filename_out == '-':
filename_out = sys.stdout
file_new = filename_out
else:
file_new = open(filename_out, 'w')
input_data = input_file.read()
tree = etree.fromstring(input_data)
extract(tree)
change_class(input_data)
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
I run the program from the command line: ./myscript.py - - volumeid < inputfile > outputfile
And now I get an encoding problem:
Traceback (most recent call last):
File "./exportXMLstd.py", line 192, in <module>
main()
File "./exportXMLstd.py", line 182, in main
change_class(input_data)
File "./exportXMLstd.py", line 135, in change_class
if any(place+'</dfn>' in line.decode('utf-8') for place in places):
File "./exportXMLstd.py", line 135, in <genexpr>
if any(place+'</dfn>' in line.decode('utf-8') for place in places):
File "/usr/lib/python2.7/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xe2 in position 0: unexpected end of data
What I am doing wrong?