Try the following
Code
import csv
def convert_csv(filenm):
" Produces structured data by converting to CSV file "
with open(filenm, 'r') as fin, open('out.txt', 'w') as csvfile:
csv_writer = csv.writer(csvfile, delimiter=' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
frames = []
frame_vals = []
for line in fin:
line = line.rstrip()
if line:
if line[0] == "#":
field, value = line[1:].split('=')
field, value = field.strip(), value.strip()
if field == 'Frame_Number':
frames.append(value) # current frame number
frame_vals.append([]) # new sublist for frame values
else:
frame_vals[-1].append(line.strip()) # append to current frame values
# Write header
fnames = ['Frame_' + str(v) for v in frames]
csv_writer.writerow(fnames)
# write other data
for row in zip(*frame_vals): # transposing to get each frame in a column
csv_writer.writerow(row)
convert_csv('testd.txt')
Test
Input: testd.txt
# Samples_per_Frame = 8192
# Chirp_Time_sec = 0.000133
# Pulse_Repetition_Time_sec = 0.00050355
# Frame_Period_sec = 0.2
# Frame_Number = 50
0.50061053
0.49938953
0.49426132
0.48962152
0.48791212
0.48937732
0.49523813
0.49914533
0.50158733
0.49914533
# Frame_Number = 51
0.50061053
0.49938953
0.49426132
0.48962152
0.48791212
0.48937732
0.49523813
0.49914533
0.50158733
0.49914533
# Frame_Number = 52
0.50793654
0.50647134
0.49841273
0.48937732
0.48644692
0.49035412
0.49768013
0.50647134
0.51282054
0.50940174
# Frame_Number = 53
0.49670333
0.49181932
0.4840049
0.48547012
0.48791212
0.49230772
0.49768013
0.49816853
0.49181932
0.48595852
# Frame_Number = 54
0.49352872
0.49597073
0.49987793
0.50354093
0.50402933
0.50036633
0.49841273
0.49743593
0.49865693
0.50012213
Output: out.txt
Frame_50 Frame_51 Frame_52 Frame_53 Frame_54
0.50061053 0.50061053 0.50793654 0.49670333 0.49352872
0.49938953 0.49938953 0.50647134 0.49181932 0.49597073
0.49426132 0.49426132 0.49841273 0.4840049 0.49987793
0.48962152 0.48962152 0.48937732 0.48547012 0.50354093
0.48791212 0.48791212 0.48644692 0.48791212 0.50402933
0.48937732 0.48937732 0.49035412 0.49230772 0.50036633
0.49523813 0.49523813 0.49768013 0.49768013 0.49841273
0.49914533 0.49914533 0.50647134 0.49816853 0.49743593
0.50158733 0.50158733 0.51282054 0.49181932 0.49865693
0.49914533 0.49914533 0.50940174 0.48595852 0.50012213
Regex Version
Changes
- Uses Regex to identified meta data
- Use dictionary to store field names and values
- field names are lines that begin with '#'
- field values are lines without '#'
Code
import re
import csv
def convert_csv(filenm):
" Produces structured data by converting to CSV file "
# https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row
with open(filenm, 'r') as fin, open('out.txt', 'w', newline='') as csvfile:
csv_writer = csv.writer(csvfile, delimiter=' ',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
# Regex attern for # followed by non-digits followed by = followed by number (integer or float)
meta_data_pattern = re.compile(r'# (\D+) = (\d+(?:\.\d+)?)')
all_data = {} # Will place unstructured data into this dictionary
# Lines with # will be key for dictionary
# Lines that follow without a '#' will be values for the dictionary key
for line in fin:
if (line := line.rstrip()): # remove end of line terminator
if (m := meta_data_pattern.match(line)): # check for match using assign expression (needs Python 3.8+)
all_data.setdefault(f'{line}', []) # start new append group with meta data
# Update key used for current data
last_key = next(reversed(all_data.keys())) # last key in dictionary
else:
all_data[last_key].append(line) # append to current field
# Remove fields with no data
all_data = {k:v for k, v in all_data.items() if v}
# Insure all fields the same length
max_len = len(max(all_data.values(), key = len))
for k, v in all_data.items():
all_data[k] += ['NaN'] * (max_len - len(v)) # Pad all to same length
# Get field names
fnames = [f"{m.group(1).split('_')[0]}_{m.group(2)}" for field in all_data.keys() if (m:=meta_data_pattern.match(field))]
# Dividing frame data into chunks by the number of frames by column
frame_data = list(zip(*all_data.values()))
csv_writer.writerow(fnames) # Write header
# write other data
for row in frame_data:
print(row)
csv_writer.writerow(row)
convert_csv('test.txt')