This is going to be a long question, I hope you guys have patience.
I am writing a program that checks if the syntax for a molecular formula is correct.
I have a BNF-syntax:
<formel>::= <mol> \n
<mol> ::= <group> | <group><mol>
<group> ::= <atom> |<atom><num> | (<mol>) <num>
<atom> ::= <LETTER> | <LETTER><letter>
<LETTER>::= A | B | C | ... | Z
<letter>::= a | b | c | ... | z
<num> ::= 2 | 3 | 4 | ...
and this is my code:
from linkedQFile import LinkedQ
import string
import sys
ATOMER = ["H","He","Li","Be","B","C","N","O","F","Ne","Na","Mg","Al","Si","P","S","Cl","Ar"]
class FormelError(Exception):
pass
class Gruppfel(Exception):
pass
q = LinkedQ()
formel= "(Cl)2)3"
for symbol in formel:
q.put(symbol)
def readNum():
"""Reads digits larger than 1. Raises exception if condition is not fulfilled."""
try:
if int(q.peek()) >= 2:
print(q.peek())
q.get()
return
else:
q.get()
print("Too small digit at the end of row: "+getRest())
sys.exit()
except (ValueError,TypeError):
raise FormelError("Not a number.")
def readletter():
"""Reads lowercase letters and returns them."""
if q.peek() in string.ascii_lowercase:
print(q.peek())
return q.get()
else:
raise FormelError("Expected lowercase letter.")
def readLetter():
"""Reads capital letters and returns them."""
if q.peek() in string.ascii_uppercase:
print(q.peek())
return q.get()
else:
raise FormelError("Expected capital letter.")
def readAtom():
"""Reads atoms on the form X and Xx. Raises Exception if the format for an atom is not fulfilled or if the atom does not exist."""
X = ""
try:
X += readLetter()
except FormelError:
print("Missing capital letter at end of row: "+getRest())
sys.exit()
return
try:
x = readletter()
atom = X+x
except (FormelError, TypeError):
atom = X
if atom in ATOMER:
return
else:
raise FormelError("Unknown atom.")
def readGroup():
if q.peek() in string.ascii_uppercase or q.peek() in string.ascii_lowercase:
try:
readAtom()
except:
print("Unknown atom at end of row: "+getRest())
sys.exit()
try:
while True:
readNum()
except FormelError:
pass
return
if q.peek() == "(":
print(q.peek())
q.get()
try:
readMol()
except FormelError:
pass
if q.peek() == ")":
print(q.peek())
q.get()
else:
print("Missing right parenthesis at end of row: "+ getRest())
sys.exit()
return
digitfound = False
try:
while True:
readNum()
digitfound = True
except:
if digitfound:
return
print("Missing digit at end of row: "+getRest())
sys.exit()
return
raise FormelError("Incorrect start of group")
def readMol():
try:
readGroup()
except FormelError:
print("Incorrect start of group at end of row: "+getRest())
raise FormelError
if q.peek() == None:
return
if not q.peek() == ")":
try:
readMol()
except FormelError:
pass
def readFormel():
try:
readMol()
except:
return
print("Correct formula")
def getRest():
rest = ""
while not q.isEmpty():
rest += q.get()
return rest
readFormel()
Now the code is supposed to accept some given formulas and provide an error code for some given incorrect formulas. Let's look at these given formulas:
Correct: Si(C3(COOH)2)4(H2O)7
Incorrect: H2O)Fe
(Cl)2)3
The program accepts the correct formula, but unfortunately also the incorrect ones. The reason for this is that the if statement in:
if not q.peek() == ")":
try:
readMol()
except FormelError:
pass
makes it so that parentheses unbalanced to the right (with one or more parenthesis too much on the right side) slip through the code, instead of being detected as incorrect starts of a "group". How can I fix this, while still having Si(C3(COOH)2)4(H2O)7 being accepted as syntactically correct?
Thank you for your patience :)