I am using a simple regex to try and classify strings as either relating to entitlements (wages) or other.
I have tested the regex in regex101.com and it seems to work as expected but when I run the code I get some strange results
import re
wages_regx = re.compile(r" pay | payment | salary | salaries | wages? | rates? | hourly rates? | allowances? | penalties | penalty | overtime | under payments? | over payments? | entitlements? ", re.I)
def classify(string):
if wages_regx.match(string):
return 'Entitlements'
else:
return 'Other'
Tests
classify('I have an question about my wages rate.')
# Returns: Other (Should return Entitlements) should match on (wages? and rates?)
classify(' wages ')
# Returns Entitlements as expected
classify('ras wages c')
# Returns Other (Should return Entitlements)
classify(' wages c')
# Returns Entitlements as expected