How to import all csv files from one file in chronological order with python?

Question

I have around 2000 CSV files in my folder. I want to read them in in their chronological order. They are named with numbers so it must be easy I thought.

I am reading them in with this following code. I can imagine a very simple solution since there must be an easy parameter for that. But I havent found anything :(((

def csv_to_df():
    dff_all_from_csv = []
    
    for root, dirs, files in os.walk("output/csv_files"):
        for file in files:
            df = pd.read_csv(os.path.join(root, file))
            dff_all_from_csv.append(df)
    return dff_all_from_csv

all csv file must be similar columns name, if true, you can will use for loop and set csv file names. — Tornike Kharitonishvili, May 10 '23 at 12:56
Sadly all the csv files are somehow different and also look different — Rebecka, May 10 '23 at 12:59
Does this answer your question? [Sort filenames in directory in ascending order](https://stackoverflow.com/questions/33159106/sort-filenames-in-directory-in-ascending-order) — JonSG, May 10 '23 at 13:05

Timeless · Answer 1 · 2023-05-10T13:04:03.197

You can split the filename and use the stem/number as a sorting key :

def csv_to_df():
    dff_all_from_csv = []
    
    for root, dirs, files in os.walk("output/csv_files"):
        for file in sorted(files, key=lambda x: int(x.split(".")[0])): # <- line updated
            df = pd.read_csv(os.path.join(root, file))
            dff_all_from_csv.append(df)
    return dff_all_from_csv

Or use natsorted from natsort :

#pip install natsort
from natsort import natsorted

    ...
    for root, dirs, files in os.walk("output/csv_files"):
        for file in natsorted(files): # <- line updated
        ...

score 0 · Answer 2 · answered May 10 '23 at 13:08

0

you can try:

column_df = pd.read_csv(r'1.csv')
column_df.columns

all_csv_df = pd.DataFrame(columns=column_df.columns)
for i in range(1,5):
    r = pd.read_csv(r''+str(i)+'.csv')
    all_csv_df = all_csv_df.append(r)

    
all_csv_df

answered May 10 '23 at 13:08

Tornike Kharitonishvili

457
4
9

Corralien · Answer 3 · 2023-05-10T13:20:26.347

0

You can use pathlib and lstat attribute to sort your file by creation time (st_ctime) or modification time (st_mtime):

import pathlib

DATA_DIR = 'output/csv_files'

dff_all_from_csv = [pd.read_csv(f) for f in sorted(DATA_DIR.glob('*.csv'),
                                                   key=lambda x: x.lstat().st_mtime)]

edited May 10 '23 at 13:20

answered May 10 '23 at 13:15

Corralien

109,409
8
28
52

score 0 · Answer 4 · answered May 10 '23 at 13:55

You can retrieve the date of a csv file using os.path.getmtime(). You can add the creation dates into a list that you can sort. Then you can open the dataframes from the sorted list.

import os
import time
import pandas as pd

path_to_csv_files = "./csv_files/"

# list in which we'll store the name and the last modification date of each csv file
metadata = list()

for _, _, files in os.walk("./csv_files"):
   for name in files:
      # retrieving the last modif date and formating it so it is is numerically sortable
      creation_date = time.strftime("%Y%m%d%H%M%S",time.gmtime(os.path.getmtime(f"{path_to_csv_files}{name}")))
      # turing it into an int so we can sort the metadata per date
      creation_date = int(creation_date)
      metadata.append((name, creation_date))

# sorting the metadata per date
metadata = sorted(
    metadata, 
    key=lambda x: x[1]
    )

# list of dataframes placed in date order
list_of_df_from_csv = list()

for name, _ in metadata:
   path_to_csv = path_to_csv_files+name
   df = pd.read_csv(path_to_csv)
   list_of_df_from_csv.append(df)

score 0 · Answer 5 · answered May 10 '23 at 14:03

I tried something like this and it works perfect:

import os
import pandas as pd

def csv_to_df():
    
    folder_path = "output/csv_files"
    
    files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')]
    files = sorted(files, key=os.path.getmtime)
    
    dff_all_from_csv = []
    for file in files:
        df = pd.read_csv(file)
        dff_all_from_csv.append(df)
    
    return dff_all_from_csv

How to import all csv files from one file in chronological order with python?

5 Answers5