Here's a stab at it using regular expressions.
import re
lines = ["BAR: dG = -23.98 kcal/mol",
"BAR: dG = 23.98 kcal/mol",
"BAR: dG = +3.98 kcal/mol",
"BAR: dG = 10 kcal/mol"
"BAR: dG = .1 kcal/mol", # this will not find,
]
numbers = []
for line in lines:
finds = re.findall(r'([-+]?\d+\.?\d*)', line)
# [+-]? means find 0 or 1 of - or +
# \d+ means one or more digit (.1 REQUIRES a number before it. Could change it to \d* but you may pick up unwanted stuff if you find another "." somewhere
# \.? means 0 or 1 decimal point
# \d* means some numbers after
if finds:
try:
numbers.append(float(finds[0])) # only one number per line or multiple?
except:
print('Regex did not work as expected, it extracted')
print(finds)
else:
print('No number found on line:')
print(line)
EDIT
I think I misread. Is this what you want?
line = "BAR: dG = -23.98 kcal/mol"
key = "BAR: dG ="
n_start = line.find(key)
if n_start > -1:
rest_of_line = line[n_start+len(key):]
number = float(rest_of_line.strip().split()[0])
# strip removes lead and end spaces
# split separated it at each space
EDIT 2
Try this. You gotta do some work ;)
# ex:
line = "BAR: dG = -23.98 kcal/mol"
def find_dg(line):
key = "BAR: dG ="
n_start = line.find(key)
number = None
if n_start > -1:
rest_of_line = line[n_start+len(key):]
number = float(rest_of_line.strip().split()[0])
return number
# strip removes lead and end spaces
# split separated it at each space
import glob
for path in glob.iglob('*.txt'):
# quick google search: https://stackoverflow.com/questions/3277503/how-to-read-a-file-line-by-line-into-a-list
numbers = []
with open(path) as file:
lines = file.readlines()
number_to_add = None
for line in lines:
number_to_add = find_dg(line)
if number_to_add is not None:
break # exit this for loop if we find a number
numbers.append(number_to_add)