2

Given:

dates = [
  datetime(2014, 10, 11), 

  datetime(2014, 10, 1), 
  datetime(2014, 10, 2),
  datetime(2014, 10, 3),

  datetime(2014, 10, 5), 
  datetime(2014, 10, 5), 
  datetime(2014, 10, 6), 

  datetime(2014, 10, 22), 
  datetime(2014, 10, 20),
  datetime(2014, 10, 21),

  datetime(2014, 10, 9), 

  datetime(2014, 10, 7),
  datetime(2014, 10, 6)
]

Expected output:

expect = [
  [datetime(2014, 10, 1), datetime(2014, 10, 3)],
  [datetime(2014, 10, 5), datetime(2014, 10, 7)],
   datetime(2014, 10, 9),
   datetime(2014, 10, 11),
  [datetime(2014, 10, 20), datetime(2014, 10, 22)]
]

Implementation in Python:

from datetime import datetime, timedelta

def parse_date_ranges(dates):
    if(not dates or not len(dates) > 0):
        return False

    # make sure to order dates
    dates.sort()

    # init values
    result  = []
    tupl    = [dates[0], dates[0]]
    it      = iter(dates)
    date    = True

    def add_tuple_to_result(tuple):
        # if first part of tuple differs from last part -> add full tuple
        # else -> add first part of tuple only
        result.append(tupl if tupl[0] != tupl[1] else tupl[0])

    while date:
        # get next date or False if no next date
        date = next(it, False)

        # filter double dates
        if(date in tupl):
            continue

        elif(date):
            if(date - timedelta(days=1) == tupl[1]):
                # consecutive date, so add date to end of current tuple
                tupl[1] = date
            else:
                # gap larger than 1 day: add current tuple to result
                # and create new tuple
                add_tuple_to_result(tupl)
                tupl = [date, date]

        else:
            # date == false, so this is the last step.
            # add the current tuple to result
            add_tuple_to_result(tupl)

    return result

See https://gist.github.com/mattes/8987332 for some more tests, too.

Question

The implementation works, but I am new to Python. So I was wondering if there are better ways to solve this issue? Or is it just fine?

mattes
  • 8,936
  • 5
  • 48
  • 73
  • Why 9 and 11 are not together? – thefourtheye Feb 14 '14 at 01:05
  • 1
    This really isn't a good question for SO. We can help you fix code that doesn't work, but asking people to write better code than your working code or promise that no such thing exists doesn't really fit here. [Code Review](http://codereview.stackexchange.com) might be better, but I don't know whether it's appropriate there either. – abarnert Feb 14 '14 at 01:06
  • 1
    However, you might want to look at [Grouping into runs of adjacent values](http://stupidpythonideas.blogspot.com/2014/01/grouping-into-runs-of-adjacent-values.html), which I wrote after helping someone with a similar problem. (He was trying to use `itertools` and got stuck, so I showed the various ways to get unstuck. Whether that's actually the best solution in the first place or not is a more subjective and tougher question…) – abarnert Feb 14 '14 at 01:07
  • 9 .. (10) .. 11, so they don't follow each other. – mattes Feb 14 '14 at 01:08
  • didn't know about code review. thanks for the hint. – mattes Feb 14 '14 at 01:11
  • `tupl = [dates[0], dates[0]]` that's not a tuple, it's a list. Tuple is a Python builtin type. The main difference from a list is that tuples are immutable. – GVH Feb 14 '14 at 01:31
  • Also, don't bother with that C++ style iterator syntax. Just use `for cur_date in dates:`. And, you don't need parenthesis in conditional statements. There's a datetime.date class - if you're not using the time component, you probably want that instead. Also, for that same reason you shouldn't use "date" as a variable name. Be careful with statements like `if date:` because lots of things evaluate to False - [], "", 0, {}, () are some. Try `if date is False`. – GVH Feb 14 '14 at 01:44

4 Answers4

7

I like itertools:

from itertools import tee, zip_longest
from datetime import datetime, timedelta

one_day = timedelta(days=1)

def pairwise(iterable):
    a, b = tee(iterable)
    next(b, None)
    return zip_longest(a, b, fillvalue=None)

def collapse_ranges(sorted_iterable, inc):
    pairs = pairwise(sorted_iterable)
    for start, tmp in pairs:
        if inc(start) == tmp:
            for end, tmp in pairs:
                if inc(end) != tmp:
                    break
            yield start, end
        else:
            yield start

# dates = [...]

numbers = [11, 1, 2, 3, 5, 5, 6, 22, 20, 21, 9, 7, 6]

if __name__ == '__main__':
    import pprint
    for each in collapse_ranges(sorted(set(dates)), lambda d: d + one_day):
        pprint.pprint(each)
    for each in collapse_ranges(sorted(set(numbers)), (1).__add__):
        pprint.pprint(each)

Result:

(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
(1, 3)
(5, 7)
9
11
(20, 22)
pillmuncher
  • 10,094
  • 2
  • 35
  • 33
3

You can avoid reinventing the wheel by adapting this answer to the similarly titled
Grouping consecutive dates together question to work withdatetimeobjects:

def parse_date_ranges(dates):

    def group_consecutive(dates):
        dates_iter = iter(sorted(set(dates)))  # de-dup and sort

        run = [next(dates_iter)]
        for d in dates_iter:
            if (d.toordinal() - run[-1].toordinal()) == 1:  # consecutive?
                run.append(d)
            else:  # [start, end] of range else singleton
                yield [run[0], run[-1]] if len(run) > 1 else run[0]
                run = [d]

        yield [run[0], run[-1]] if len(run) > 1 else run[0]

    return list(group_consecutive(dates)) if dates else False
Community
  • 1
  • 1
martineau
  • 119,623
  • 25
  • 170
  • 301
1

I wrote another solution for you, and wrote a couple of comments to try to explain the code.

from datetime import datetime, timedelta

dates = [
    datetime(2014, 10, 11),
    datetime(2014, 10, 1),
    datetime(2014, 10, 2),
    datetime(2014, 10, 3),
    datetime(2014, 10, 5),
    datetime(2014, 10, 5),
    datetime(2014, 10, 6),
    datetime(2014, 10, 22),
    datetime(2014, 10, 20),
    datetime(2014, 10, 21),
    datetime(2014, 10, 9),
    datetime(2014, 10, 7),
    datetime(2014, 10, 6)
]
# Remove duplicates, and sort the dates ascending
sorted_dates = sorted(set(dates))
# Set initial first and last element as the current element
first, last = sorted_dates[0], sorted_dates[0]
date_ranges = []

# Loop over the sorted list from the second value
for d in sorted_dates[1:]:
    # Check if the current date is exactly one day later then the current
    # "last" date
    if d - last != timedelta(days=1):
        date_ranges.append(tuple(sorted({first, last})))
        first, last = d, d
    else:
        last = d

# Handle last element
if first == last:
    date_ranges.append((first,))
else:
    date_ranges.append((first, last))

for dt_pair in date_ranges:
    print dt_pair

Output:

(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
(datetime.datetime(2014, 10, 9, 0, 0),)
(datetime.datetime(2014, 10, 11, 0, 0),)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
Steinar Lima
  • 7,644
  • 2
  • 39
  • 40
1

Adapting this answer to use datetime objects. This covers non-unique and non-sorted input and is python3 compatible too:

import itertools
from datetime import datetime, timedelta

def datetimes_to_ranges(iterable):
    iterable = sorted(set(iterable))
    keyfunc = lambda t: t[1] - timedelta(days=t[0])
    for key, group in itertools.groupby(enumerate(iterable), keyfunc):
        group = list(group)
        if len(group) == 1:
            yield group[0][1]
        else:
            yield group[0][1], group[-1][1]

Example:

>>> for i in datetimes_to_ranges(dates): i
... 
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
luca
  • 7,178
  • 7
  • 41
  • 55