0

Let's say I have 3 lists:

date_list = ['2020-02-28', '2020-03-11', '2020-03-12', '2020-04-01']
date_list2 = ['2020-02-02', '2020-12-11', '2020-13-11', '2020-29-12']
date_list3 = ['10-02-2002', '04-12-2011', '09-10-1911', '20-12-1912']

situation is I dont know the format of those. what I can check:

  1. 4 digits = year
  2. 2 digits and greater than 13 = date

How do I know the format of the rest? I mean, how do I differentiate between '03' as date and '03' as month?

def define_dformat(dl): 
    yy, mm, dd = "", "", ""
    for i, val in enumerate(dl):
        sep = re.search(r'\D', val).group(0)
        #print(sep) # -
        print(f"element #{i+1}: {val}")
        for idx, word in enumerate(val.split(sep)):
            #this is year
            if len(word) == 4:
                #print(f"the year is index #{idx}: {word}")
                yy = f"the year is word index #{idx}"
            if len(word) == 2:
                #this is date
                if int(word) > 12:
                    #print(f"the date is index #{idx}: {word}")
                    dd = f"the date is word index #{idx}"
                #else: # how to check <= 12, is it month of is it date?
    
    return(yy, mm, dd)
    
print(define_dformat(date_list))
print(define_dformat(date_list2))
print(define_dformat(date_list3))
Asocia
  • 5,935
  • 2
  • 21
  • 46

1 Answers1

1

You can create a list with all known formats and try to parse your dates with those. If no error occurs for a format, then it's your input list's format.

from datetime import datetime

date_list = ['2020-02-28', '2020-03-11', '2020-03-12', '2020-04-01']
date_list2 = ['2020-02-02', '2020-12-11', '2020-13-11', '2020-29-12']
date_list3 = ['10-02-2002', '04-12-2011', '09-10-1911', '20-12-1912']


def define_dformat(dl):
    dformats = ['%Y-%m-%d', '%Y-%d-%m', '%d-%m-%Y']
    for dformat in dformats:
        for date in dl:
            try:
                datetime.strptime(date, dformat)
            except ValueError:
                break
        else:
            return dformat
    raise Exception("date list didn't match with any known formats")


print(define_dformat(date_list))
print(define_dformat(date_list2))
print(define_dformat(date_list3))

%Y-%m-%d
%Y-%d-%m
%d-%m-%Y

Asocia
  • 5,935
  • 2
  • 21
  • 46