0

I need to create a list of urls which varies the number of the date to the current for example: this is a url of my list https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230211.xls (WHICH CORRESPONDS TO A FILE OF THE DAY 02/11), I need to vary the numbers that I will leave the XX and the NN for example: https://www.coordinador.cl/wp-content/uploads/2023/NN/RIO23NNXX.xls (CONSIDER THAT NN IS THE MONTH AND XX THE DAY).

I have the following code which does it manually from the date 02/11 to 02/20:

import os
import requests
from time import time
from multiprocessing.pool import ThreadPool


def url_response(url):
    
    nombre, url = url
    path = f"RIO/{nombre}.xls"
    #path, url = url
    r = requests.get(url, stream = True)
 
    with open(path, 'wb') as f:
 
        for ch in r:
            f.write(ch)

urls = [("RIO230211", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230211.xls"),
("RIO230212", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230212.xls"),("RIO230213", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230213.xls"),("RIO230214", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230214.xls"),("RIO230215", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230215.xls"),("RIO230216", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230216.xls"),("RIO230217", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230217.xls"),("RIO230218", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230218.xls"),("RIO230219", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230219.xls"),("RIO230220", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230220.xls"),("RIO230221", "https://www.coordinador.cl/wp-content/uploads/2023/02/RIO230221.xls")]

ThreadPool(12).imap_unordered(url_response, urls)

  • 2
    You can use the chosen answer to this question (https://stackoverflow.com/questions/59882714/python-generating-a-list-of-dates-between-two-dates) to list all the dates you need, then construct the urls from the list – Swifty Feb 20 '23 at 20:02

2 Answers2

0

You can do:

from datetime import date, timedelta
import requests

start = date(2023, 2, 11)
end = date(2023, 2, 20)
for d in range((end-start).days+1):
    dt = start + timedelta(days=d)
    month = f'{dt.month:02}'
    day = f'{dt.day:02}'

    url = f'https://www.coordinador.cl/wp-content/uploads/2023/{month}/RIO23{month}{day}.xls'
    path = f'RIO/{month}{day}.xls'
 
    r = requests.get(url, stream=True)
    with open(path, 'wb') as f:
        print(path)
        f.write(r.content)
Corralien
  • 109,409
  • 8
  • 28
  • 52
0

Answer for what i need :

from datetime import datetime, timedelta

def generar_urls(fecha_inicio:datetime, dias) -> list:
    urls = []
    formato_url = "https://www.coordinador.cl/wp-content/uploads/{}/{}/{}.xls"

    for dia in range(dias):
        fecha_inicio += timedelta(days=1)
        year, month, day = fecha_inicio.strftime("%Y-%m-%d").split("-")
        codigo = f"RIO{year[-2:]}{month}{day}"
        urls.append((codigo, formato_url.format(year, month, codigo)))
    return urls