Problem Statement:
Ubuntu 18.04 with python 3.6.9
python 3.6 script 'generator.py' keeps overwriting previous output lines in 'combined_output_file.txt'
'Textgenerator.py' is the main script that imports 'generator' above which handles all open file reading (r) and writing (w+) functions.
python TextGenerator.py -t input_file_1.txt -c input_file_2.txt -o combined_output_file.txt
Contents of input_file_1.txt:
This is line 1 of the FIRST input file.
This is line 2 of the FIRST input file.
This is line 3 of the FIRST input file.
etc.Contents of input_file_2.txt:
This is line 1 of the SECOND input file.
This is line 2 of the SECOND input file.
This is line 3 of the SECOND input file.
etc.Correct output format for 'combined_output_file.txt'
This is line 1 of the FIRST input file.
This is line 1 of the SECOND input file.
1023 words generated via the gpt2 language model based on above lines ... etc.
This is line 2 of the FIRST input file.
This is line 2 of the SECOND input file.
1023 NEW words generated via the gpt2 language model based on above lines ... etc.
This is line 3 of the FIRST input file.
This is line 3 of the SECOND input file.
1023 MORE NEW words generated via the gpt2 language model based on above lines ... etc.
- Current overwritten output for 'combined_output_file.txt'
This is line 3 of the FIRST input file.
This is line 3 of the SECOND input file.
1023 words generated via the gpt2 language model based on above lines ... etc.
Attempted solutions:
Switched w+ to a+ and added '\n' Same overwritten output as above.
def write_sample_to_file(self, filename, sample):
"""Write a given sample to a file specified by the filename."""
with open(filename, 'a+', errors='surrogateescape', encoding='utf-8') as f:
f.write(sample + '\n') # added and changed w+ to a+ same output
how to write multiple lines in a file using python
Not sure if this is the correct approach. Can this be applied?
- Required open, read, write, and generation def code blocks follow:
import os
from gpt2handler import Gpt2Handler ''' for model only '''
def generate_from_files(self,
title_filename,
content_filename=None,
num_samples=1,
print_output=False,
output_file=None,
num_words=1023):
"""Read the title from a file and initial content from another file then use gpt2 to generate an article
and return it as a single string."""
with open(title_filename, 'r', errors='surrogateescape') as title_file:
for line in title_file: # added for 'input_file_1.txt'
title = line # reading to end of input_file_1 but copying over each output line 1
if content_filename:
with open(content_filename, 'r', errors='surrogateescape') as content_file:
for line in content_file: #added for 'input_file_1.txt'
initial_content = line # reading to end of input_file_1 but copying over each output line 1
else:
initial_content = ''
return self.generate(title, initial_content, num_samples, print_output, output_file, num_words)
def generate(self,
title,
initial_content=None,
num_samples=1,
print_output=False,
output_file=None,
num_words=1023):
"""Use gpt2 to generate an article based on a given title and initial content."""
if not initial_content:
initial_content = ''
samples = Gpt2Handler.get_instance().generate_as_tuple(title, initial_content, num_samples, num_words)
samples_str = [sample[0] + '\n' + sample[1] for sample in samples]
if print_output: # Print each article to the console is specified to
for sample in samples_str:
print(sample)
if output_file: # Write each of the samples to their own file if a base filename is specified
self.write_samples_to_file(output_file, samples_str)
return samples_str
def write_samples_to_file(self, filename, samples):
"""Write the given samples to a file. If there is more than one, write each to its own file."""
if len(samples) == 1:
self.write_sample_to_file(filename, samples[0])
else:
base, extension = os.path.splitext(filename)
for i in range(len(samples)):
new_filename = base + str(i) + extension
self.write_sample_to_file(new_filename, samples[i])
def write_sample_to_file(self, filename, sample):
"""Write a given sample to a file specified by the filename."""
with open(filename, 'w+', errors='surrogateescape', encoding='utf-8') as f:
f.write(sample + '\n') # added and changed w+ to a+ same output