3

I would like to group by year and city, while cumulating dates and pts as on the output, I am new to python and have no idea how to go about it, would you have any idea please? I extracted the lists list_year and list_city, not sure if this is useful? Thank you

list_pts = [
    {'city' : 'Madrid', 'year' : '2017', 'date' : '05/07/2017', 'pts' : 7},
    {'city' : 'Madrid', 'year' : '2017', 'date' : '14/11/2017', 'pts' : 5},
    {'city' : 'Londres', 'year' : '2018', 'date' : '25/02/2018', 'pts' : 5},
    {'city' : 'Paris', 'year' : '2019', 'date' : '17/04/2019', 'pts' : 4},
    {'city' : 'Londres', 'year' : '2019', 'date' : '15/06/2019', 'pts' : 8},    
    {'city' : 'Paris', 'year' : '2019', 'date' : '21/08/2019', 'pts' : 8},
    {'city' : 'Londres', 'year' : '2019', 'date' : '04/12/2019', 'pts' : 2}]

list_year = ['2017', '2018', '2019']
list_city = ['Paris', 'Madrid', 'Londres']

output =
    [{'year' : '2017', 'city' : 'Madrid', 'date' : ['05/07/2017', '14/11/2017'], 'pts' :[5, 7]},
    {'year' : '2018', 'city' : 'Londres', 'date' : ['25/02/2018'], 'pts' :[5]},
    {'year' : '2019', 'city' : 'Londres', 'date' : ['15/06/2019', '04/12/2019'], 'pts' :[8, 2]},
    {'year' : '2019', 'city' : 'Paris', 'date' : ['17/04/2019', '21/08/2019'], 'pts' :[4, 8]}]
Maxime Lara
  • 83
  • 1
  • 7

4 Answers4

3

You can use groupby() to group dicts in combination with itemgetter():

from itertools import groupby
from operator import itemgetter

list_pts = [
    {'city': 'Madrid', 'year': '2017', 'date': '05/07/2017', 'pts': 7},
    {'city': 'Madrid', 'year': '2017', 'date': '14/11/2017', 'pts': 5},
    {'city': 'Londres', 'year': '2018', 'date': '25/02/2018', 'pts': 5},
    {'city': 'Paris', 'year': '2019', 'date': '17/04/2019', 'pts' : 4},
    {'city': 'Londres', 'year': '2019', 'date': '15/06/2019', 'pts': 8},
    {'city': 'Paris', 'year': '2019', 'date': '21/08/2019', 'pts': 8},
    {'city': 'Londres', 'year': '2019', 'date': '04/12/2019', 'pts': 2}
]

city_year_getter = itemgetter('city', 'year')
date_pts_getter = itemgetter('date', 'pts')

result = []
for (city, year), objs in groupby(sorted(list_pts, key=city_year_getter), 
                                  city_year_getter):
    dates, ptss = zip(*map(date_pts_getter, objs))
    result.append({
        'city': city,
        'year': year,
        'date': list(dates),
        'pts': list(ptss)
    })
Olvin Roght
  • 7,677
  • 2
  • 16
  • 35
1

One approach, based on the use of collections.defaultdict for grouping the values, is to do the following:

from collections import defaultdict
from operator import itemgetter
import pprint

list_pts = [
    {'city': 'Madrid', 'year': '2017', 'date': '05/07/2017', 'pts': 7},
    {'city': 'Madrid', 'year': '2017', 'date': '14/11/2017', 'pts': 5},
    {'city': 'Londres', 'year': '2018', 'date': '25/02/2018', 'pts': 5},
    {'city': 'Paris', 'year': '2019', 'date': '17/04/2019', 'pts': 4},
    {'city': 'Londres', 'year': '2019', 'date': '15/06/2019', 'pts': 8},
    {'city': 'Paris', 'year': '2019', 'date': '21/08/2019', 'pts': 8},
    {'city': 'Londres', 'year': '2019', 'date': '04/12/2019', 'pts': 2}]

# function for extracting city and year (to be used as a grouping key)
city_and_year = itemgetter("city", "year")

# function for extracting dates and points 
date_and_points = itemgetter("date", "pts")

# group by key (city, year) by using a defaultdict
res = defaultdict(list)
for record in list_pts:
    res[city_and_year(record)].append(date_and_points(record))

# transform to the desired format
result = []
for (city, year), values in res.items():
    dates, points = zip(*values)
    result.append({"city": city, "year": year, "dates": list(dates), "pts": list(points)})

# use pprint to nicely print the output
pprint.pprint(result)

Output

[{'city': 'Madrid',
  'dates': ['05/07/2017', '14/11/2017'],
  'pts': [7, 5],
  'year': '2017'},
 {'city': 'Londres', 'dates': ['25/02/2018'], 'pts': [5], 'year': '2018'},
 {'city': 'Paris',
  'dates': ['17/04/2019', '21/08/2019'],
  'pts': [4, 8],
  'year': '2019'},
 {'city': 'Londres',
  'dates': ['15/06/2019', '04/12/2019'],
  'pts': [8, 2],
  'year': '2019'}]
Dani Mesejo
  • 61,499
  • 6
  • 49
  • 76
1
list_pts = [
  {'city': 'Madrid',  'year': '2017', 'date': '05/07/2017', 'pts': 7},
  {'city': 'Madrid',  'year': '2017', 'date': '14/11/2017', 'pts': 5},
  {'city': 'Londres', 'year': '2018', 'date': '25/02/2018', 'pts': 5},
  {'city': 'Paris',   'year': '2019', 'date': '17/04/2019', 'pts': 4},
  {'city': 'Londres', 'year': '2019', 'date': '15/06/2019', 'pts': 8},
  {'city': 'Paris',   'year': '2019', 'date': '21/08/2019', 'pts': 8},
  {'city': 'Londres', 'year': '2019', 'date': '04/12/2019', 'pts': 2}]

group_by = {}
for pt in list_pts:
  d = group_by.setdefault((pt['year'], pt['city']), {})
  for k, v in pt.items():
    if k not in {'year', 'city'}:
      d.setdefault(k, []).append(v)

output = []
for (year, city), rest in group_by.items():
  output.append({'year': year, 'city': city} | rest)

For Python prior to 3.9, see this answer about dictionary mergers.

It’s a matter of using the (year, city) pair as a key (which can be done with tuples in Python) and collecting all other items into lists. The transformation from group_by into output is a just a cosmetic thing.

Andrej Podzimek
  • 2,409
  • 9
  • 12
0

You could try out convtools library, which provides lots of data processing primitives and generates ad hoc python code under the hood.

from convtools import conversion as c

list_pts = [
    {"city": "Madrid", "year": "2017", "date": "05/07/2017", "pts": 7},
    {"city": "Madrid", "year": "2017", "date": "14/11/2017", "pts": 5},
    {"city": "Londres", "year": "2018", "date": "25/02/2018", "pts": 5},
    {"city": "Paris", "year": "2019", "date": "17/04/2019", "pts": 4},
    {"city": "Londres", "year": "2019", "date": "15/06/2019", "pts": 8},
    {"city": "Paris", "year": "2019", "date": "21/08/2019", "pts": 8},
    {"city": "Londres", "year": "2019", "date": "04/12/2019", "pts": 2},
]


# storing the converter for further reuse, because gen_converter triggers
# code generation
converter = (
    c.group_by(c.item("year"), c.item("city"))
    .aggregate(
        {
            "year": c.item("year"),
            "city": c.item("city"),
            "date": c.ReduceFuncs.Array(c.item("date")),
            "pts": c.ReduceFuncs.Array(c.item("pts")),
            # to collect unique dates if needed:
            # "unique_dates": c.ReduceFuncs.ArrayDistinct(c.item("date")),
        }
    )
    .gen_converter()
)

assert converter(list_pts) == [
    {
        "year": "2017",
        "city": "Madrid",
        "date": ["05/07/2017", "14/11/2017"],
        "pts": [7, 5],
    },
    {"year": "2018", "city": "Londres", "date": ["25/02/2018"], "pts": [5]},
    {
        "year": "2019",
        "city": "Paris",
        "date": ["17/04/2019", "21/08/2019"],
        "pts": [4, 8],
    },
    {
        "year": "2019",
        "city": "Londres",
        "date": ["15/06/2019", "04/12/2019"],
        "pts": [8, 2],
    },
]

westandskif
  • 972
  • 6
  • 9