Below is a general currency parser that doesn't rely on the babel library commonly used for this kind of problem.
import numpy as np
import re
def currency_parser(cur_str):
# Remove any non-numerical characters
# except for ',' '.' or '-' (e.g. EUR)
cur_str = re.sub("[^-0-9.,]", '', cur_str)
# Remove any 000s separators (either , or .)
cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
if '.' in list(cur_str[-3:]):
num = float(cur_str)
elif ',' in list(cur_str[-3:]):
num = float(cur_str.replace(',', '.'))
else:
num = float(cur_str)
return np.round(num, 2)
Here is a pytest script that tests the function:
import numpy as np
import pytest
import re
def currency_parser(cur_str):
# Remove any non-numerical characters
# except for ',' '.' or '-' (e.g. EUR)
cur_str = re.sub("[^-0-9.,]", '', cur_str)
# Remove any 000s separators (either , or .)
cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
if '.' in list(cur_str[-3:]):
num = float(cur_str)
elif ',' in list(cur_str[-3:]):
num = float(cur_str.replace(',', '.'))
else:
num = float(cur_str)
return np.round(num, 2)
@pytest.mark.parametrize('currency_str, expected', [
(
'.3', 0.30
),
(
'1', 1.00
),
(
'1.3', 1.30
),
(
'43,324', 43324.00
),
(
'3,424', 3424.00
),
(
'-0.00', 0.00
),
(
'EUR433,432.53', 433432.53
),
(
'25.675,26 EUR', 25675.26
),
(
'2.447,93 EUR', 2447.93
),
(
'-540,89EUR', -540.89
),
(
'67.6 EUR', 67.60
),
(
'30.998,63 CHF', 30998.63
),
(
'0,00 CHF', 0.00
),
(
'159.750,00 DKK', 159750.00
),
(
'£ 2.237,85', 2237.85
),
(
'£ 2,237.85', 2237.85
),
(
'-1.876,85 SEK', -1876.85
),
(
'59294325.3', 59294325.30
),
(
'8,53 NOK', 8.53
),
(
'0,09 NOK', 0.09
),
(
'-.9 CZK', -0.9
),
(
'35.255,40 PLN', 35255.40
),
(
'-PLN123.456,78', -123456.78
),
(
'US$123.456,79', 123456.79
),
(
'-PLN123.456,78', -123456.78
),
(
'PLN123.456,79', 123456.79
),
(
'IDR123.457', 123457
),
(
'JP¥123.457', 123457
),
(
'-JP\xc2\xa5123.457', -123457
),
(
'CN\xc2\xa5123.456,79', 123456.79
),
(
'-CN\xc2\xa5123.456,78', -123456.78
),
])
def test_currency_parse(currency_str, expected):
assert currency_parser(currency_str) == expected