Here is some code that will both parse and generate Roman numerals: http://pyparsing.wikispaces.com/file/view/romanNumerals.py/30112817/romanNumerals.py
Convert your input Roman numerals to integers (using romanNumeral.parseString
from the linked code), compute the min and max values to get the range, then use makeRomanNumeral
in the linked code to convert the range values back to Roman nums.
Or if you don't have that many different Roman values to deal with, just define a dict that maps the Roman numerals to their actual values, and you can skip using the parser. Something like this:
romanLookup = {
'I' : 1,
'II' : 2,
'III' : 3,
'IV' : 4,
'V' : 5,
'VI' : 6,
'VII' : 7,
'VIII' : 8,
'IX' : 9,
'X' : 10,
'XI' : 11,
'XII' : 12,
'XIII' : 13,
'XIV' : 14,
'XV' : 15,
'XVI' : 16,
'XVII' : 17,
'XVIII' : 18,
'XIX' : 19,
'XX' : 20,
}
Here is your program:
from itertools import groupby
from collections import namedtuple
DataRec = namedtuple("DataRec", "country protein mg")
#123456789012345678901234567890
# X X X X
data = """\
1 China 42.8 II
2 China 42.3 III
3 China 41.9 III
4 United States 40.0 IV
5 China 43.2 I
6 China 42.5 IV
7 China 42.9 III
8 China 45.9 VI
9 Japan 42.3 VI
10 United States 40.9 III""".splitlines()
suppress=object()
def splitAt(cols,fns):
last = 0
slices = []
for c in cols:
slices.append(slice(last,c))
last = c+1
return lambda s: [fn(s[sl]) for sl,fn in zip(slices,fns)
if fn is not suppress]
splitter = splitAt([2,16,24,28],
[suppress, str.strip, float, str.strip])
recs = [DataRec(*splitter(d)) for d in data]
romanLookup = {
'I' : 1,
'II' : 2,
'III' : 3,
'IV' : 4,
'V' : 5,
'VI' : 6,
'VII' : 7,
'VIII' : 8,
'IX' : 9,
'X' : 10,
'XI' : 11,
'XII' : 12,
'XIII' : 13,
'XIV' : 14,
'XV' : 15,
'XVI' : 16,
'XVII' : 17,
'XVIII' : 18,
'XIX' : 19,
'XX' : 20,
}
# sort and group data by country
recs.sort(key=lambda x: x.country)
grouped = groupby(recs, key=lambda x: x.country)
# for each country group, compute average protein and min/max mg
for country,countryRecs in grouped:
datatuples = list(countryRecs)
mg_vals = [r.mg for r in datatuples]
ave = sum(r.protein for r in datatuples)/len(datatuples)
min_mg = min(mg_vals, key=romanLookup.__getitem__)
max_mg = max(mg_vals, key=romanLookup.__getitem__)
if min_mg == max_mg:
print "%s, %.2f, %s" % (country, ave, min_mg)
else:
print "%s, %.2f, %s-%s" % (country, ave, min_mg, max_mg)
Prints:
China, 43.07, I-VI
Japan, 42.30, VI
United States, 40.45, III-IV