43

I am working with a selenium script where I am trying to download a Excel file and give it a specific name. This is my code:

Is there anyway that I can give the file being downloaded a specific name ?

Code:

#!/usr/bin/python
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile

profile = FirefoxProfile()
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain, application/vnd.ms-excel, text/csv, text/comma-separated-values, application/octet-stream")
profile.set_preference("browser.download.dir", "C:\\Downloads" )
browser = webdriver.Firefox(firefox_profile=profile)

browser.get('https://test.com/')
browser.find_element_by_partial_link_text("Excel").click() # Download file
  • Why not rename it after the download Using the [Shutil](https://docs.python.org/2/library/shutil.html#module-shutil) module? – Ron D. Dec 31 '15 at 15:04
  • 4
    Since the filname always have a random name, I want to name it when I am downloading it. So it will be easier to locate it later. –  Dec 31 '15 at 15:06

11 Answers11

43

Here is another simple solution, where you can wait until the download completed and then get the downloaded file name from chrome downloads.

Chrome:

# method to get the downloaded file name
def getDownLoadedFileName(waitTime):
    driver.execute_script("window.open()")
    # switch to new tab
    driver.switch_to.window(driver.window_handles[-1])
    # navigate to chrome downloads
    driver.get('chrome://downloads')
    # define the endTime
    endTime = time.time()+waitTime
    while True:
        try:
            # get downloaded percentage
            downloadPercentage = driver.execute_script(
                "return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList downloads-item').shadowRoot.querySelector('#progress').value")
            # check if downloadPercentage is 100 (otherwise the script will keep waiting)
            if downloadPercentage == 100:
                # return the file name once the download is completed
                return driver.execute_script("return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList downloads-item').shadowRoot.querySelector('div#content  #file-link').text")
        except:
            pass
        time.sleep(1)
        if time.time() > endTime:
            break

Firefox:

def getDownLoadedFileName(waitTime):
    driver.execute_script("window.open()")
    WebDriverWait(driver,10).until(EC.new_window_is_opened)
    driver.switch_to.window(driver.window_handles[-1])
    driver.get("about:downloads")

    endTime = time.time()+waitTime
    while True:
        try:
            fileName = driver.execute_script("return document.querySelector('#contentAreaDownloadsView .downloadMainArea .downloadContainer description:nth-of-type(1)').value")
            if fileName:
                return fileName
        except:
            pass
        time.sleep(1)
        if time.time() > endTime:
            break

Once you click on the download link/button, just call the above method.

 # click on download link
 browser.find_element_by_partial_link_text("Excel").click()
 # get the downloaded file name
 latestDownloadedFileName = getDownLoadedFileName(180) #waiting 3 minutes to complete the download
 print(latestDownloadedFileName)
 

JAVA + Chrome:

Here is the method in java.

public String waitUntilDonwloadCompleted(WebDriver driver) throws InterruptedException {
      // Store the current window handle
      String mainWindow = driver.getWindowHandle();
      
      // open a new tab
      JavascriptExecutor js = (JavascriptExecutor)driver;
      js.executeScript("window.open()");
     // switch to new tab
    // Switch to new window opened
      for(String winHandle : driver.getWindowHandles()){
          driver.switchTo().window(winHandle);
      }
     // navigate to chrome downloads
      driver.get("chrome://downloads");
      
      JavascriptExecutor js1 = (JavascriptExecutor)driver;
      // wait until the file is downloaded
      Long percentage = (long) 0;
      while ( percentage!= 100) {
          try {
              percentage = (Long) js1.executeScript("return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList downloads-item').shadowRoot.querySelector('#progress').value");
              //System.out.println(percentage);
          }catch (Exception e) {
            // Nothing to do just wait
        }
          Thread.sleep(1000);
      }
     // get the latest downloaded file name
      String fileName = (String) js1.executeScript("return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList downloads-item').shadowRoot.querySelector('div#content #file-link').text");
     // get the latest downloaded file url
      String sourceURL = (String) js1.executeScript("return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList downloads-item').shadowRoot.querySelector('div#content #file-link').href");
      // file downloaded location
      String donwloadedAt = (String) js1.executeScript("return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList downloads-item').shadowRoot.querySelector('div.is-active.focus-row-active #file-icon-wrapper img').src");
      System.out.println("Download deatils");
      System.out.println("File Name :-" + fileName);
      System.out.println("Donwloaded path :- " + donwloadedAt);
      System.out.println("Downloaded from url :- " + sourceURL);
     // print the details
      System.out.println(fileName);
      System.out.println(sourceURL);
     // close the downloads tab2
      driver.close();
     // switch back to main window
      driver.switchTo().window(mainWindow);
      return fileName;
  }

This is how to call this in your java script.

// download triggering step 
downloadExe.click();
// now waituntil download finish and then get file name
System.out.println(waitUntilDonwloadCompleted(driver));

Output:

Download deatils

File Name :-RubyMine-2019.1.2 (7).exe

Donwloaded path :- chrome://fileicon/C%3A%5CUsers%5Csupputuri%5CDownloads%5CRubyMine-2019.1.2%20(7).exe?scale=1.25x

Downloaded from url :- https://download-cf.jetbrains.com/ruby/RubyMine-2019.1.2.exe

RubyMine-2019.1.2 (7).exe

Community
  • 1
  • 1
supputuri
  • 13,644
  • 2
  • 21
  • 39
  • sorry, but I failed to understand what is webDriverWait is it some method in some package. If yest which package? I need to import it accordingly. – sangharsh Jul 12 '19 at 18:43
  • 2
    You need the below 3 imports. `from selenium.webdriver.support.ui import WebDriverWait`, `from selenium.webdriver.common.by import By` and `from selenium.webdriver.support import expected_conditions as EC` – supputuri Jul 12 '19 at 20:34
  • 1
    The chrome entry was helpful, but "except: pass" is a horrible idea. In incognito mode at least on the latest chrome, it spits out an error: selenium.common.exceptions.JavascriptException: Message: javascript error: Cannot read property 'shadowRoot' of null It can't find the percentage tag it seems, and fails with no proper error message due to your overly broad except case. – poleguy Apr 11 '20 at 19:38
  • Thanks for bringing up the behavior in the latest chrome version. There might be changes to the behavior of the browser, I will take a look into this. Please feel free to edit/leave comment here with your suggested changes, so that it will help the future readers. – supputuri Apr 12 '20 at 04:08
  • This is still the most efficient answer for me with a few tweaks though If you want the webdriver to always wait until the download is complete, you can remove the wait time and the if block – kcEmenike Aug 11 '20 at 03:13
  • Glad it's was helpful, Please make sure to upvote the answer for the benefit of future users. – supputuri Aug 11 '20 at 03:32
  • I Will check, can you please post the exception message here. – supputuri Feb 04 '21 at 01:49
  • @CristianAvendaño I don't see any issue with chrome 88 and it's working successfully. Can you please post the stack trace here, if any? – supputuri Feb 11 '21 at 02:52
  • This doesn't work for me. Chrome 89.0.4389.90 http://127.0.0.1:53513 "POST /session/88eb6adfb63900824576c0fd13038370/execute/sync HTTP/1.1" 500 1195 – alexsmail Apr 01 '21 at 19:46
  • @supputuri, see https://stackoverflow.com/a/61544031/1137529. You can take javascript section from their. – alexsmail Apr 01 '21 at 20:04
  • @supputuri This works fine, opens the new tab and gets downloads page, but returns `None`. Did the syntax change with newer versions of chrome? – double_wizz Dec 08 '21 at 20:05
  • The snippet does not work for my Chrome 97. Try getting the download items with `document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList').items` – nobody-74185 Jan 27 '22 at 03:22
  • 1
    Just a heads up that the download page for Chrome at `chrome://downloads` isn't available in `headless` mode. – CIRCLE Nov 25 '22 at 01:13
  • I had no idea you could access the downloads like this!! – John R Perry Dec 08 '22 at 19:45
36

You cannot specify name of download file through selenium. However, you can download the file, find the latest file in the downloaded folder, and rename as you want.

Note: borrowed methods from google searches may have errors. but you get the idea.

import os
import shutil
filename = max([Initial_path + "\\" + f for f in os.listdir(Initial_path)],key=os.path.getctime)
shutil.move(filename,os.path.join(Initial_path,r"newfilename.ext"))
parishodak
  • 4,506
  • 4
  • 34
  • 48
  • 5
    This gives me `File "/usr/local/Cellar/python/2.7.10_2/Frameworks/Python.framework/Versions/2.7/lib/python2.7/genericpath.py", line 72, in getctime return os.stat(filename).st_ctime OSError: [Errno 2] No such file or directory: '.localized'` – altabq Mar 09 '16 at 22:07
  • The problem with above code is that `filename` var gets its value before the download is finished. This will create two type of errors. If the download directory was empty, then `filename` will be empty and hence `shutil.move()` gives error. If the directory was not empty, `filename` will get the name of last file downloaded before current file (which is still downloading) and `shutil.move()` will rename that older last file to newfilename (not the file currently downladed file). You need to implement a method to wait for download to finish. – ePandit May 01 '20 at 13:38
  • Correct version: `filename = max([os.path.join(Initial_path, f) for f in os.listdir(Initial_path)], key=os.path.getctime)` – user898678 Dec 08 '20 at 10:24
  • It could be useful to also compare the file with current timestamp ( datetime.now().timestamp() ) to avoid the rename of and old file in case there is an error. – Cristian Avendaño Feb 04 '21 at 14:22
  • What's Initial_Path in this? – ShridharK Jun 01 '21 at 09:29
  • @ShridharK think you want to find fines under "C:\samples\" folder. If initial_path is empty, it will look for current directory of the program – parishodak Jun 02 '21 at 16:39
  • you rock man .. ! – arun Sep 30 '21 at 08:23
12

Hope this snippet is not that confusing. It took me a while to create this and is really useful, because there has not been a clear answer to this problem, with just this library.

import os
import time
def tiny_file_rename(newname, folder_of_download):
    filename = max([f for f in os.listdir(folder_of_download)], key=lambda xa :   os.path.getctime(os.path.join(folder_of_download,xa)))
    if '.part' in filename:
        time.sleep(1)
        os.rename(os.path.join(folder_of_download, filename), os.path.join(folder_of_download, newname))
    else:
        os.rename(os.path.join(folder_of_download, filename),os.path.join(folder_of_download,newname))

Hope this saves someone's day, cheers.

EDIT: Thanks to @Om Prakash editing my code, it made me remember that I didn't explain the code thoughly.

Using the max([]) function could lead to a race condition, leaving you with empty or corrupted file(I know it from experience). You want to check if the file is completely downloaded in the first place. This is due to the fact that selenium don't wait for the file download to complete, so when you check for the last created file, an incomplete file will show up on your generated list and it will try to move that file. And even then, you are better off waiting a little bit for the file to be free from Firefox.

EDIT 2: More Code

I was asked if 1 second was enough time and mostly it is, but in case you need to wait more than that you could change the above code to this:

import os
import time
def tiny_file_rename(newname, folder_of_download, time_to_wait=60):
    time_counter = 0
    filename = max([f for f in os.listdir(folder_of_download)], key=lambda xa :   os.path.getctime(os.path.join(folder_of_download,xa)))
    while '.part' in filename:
        time.sleep(1)
        time_counter += 1
        if time_counter > time_to_wait:
            raise Exception('Waited too long for file to download')
    filename = max([f for f in os.listdir(folder_of_download)], key=lambda xa :   os.path.getctime(os.path.join(folder_of_download,xa)))
    os.rename(os.path.join(folder_of_download, filename), os.path.join(folder_of_download, newname))
dmb
  • 288
  • 2
  • 9
  • Hi, dmb and Kreuni, I would like to add this snippet into my code with python3.6 on macOS 13.14. Can you please tell me which modification I would need to do to make it work on macOS? For example, would I need to add `filepath = os.path.expanduser("~")+"/Downloads/"` and change `folder_of_download` to `file path`? I ask because I did just that and it does not function, however it does not give me an error either to go with. – Til Hund Oct 12 '18 at 11:52
  • 1
    @TilHund Do, `os.path.abspath('~/Downloads')` and yes change `folder_of_download` to `filepath`. `os.path.abspath()` works by resolving the path string to the absolute string in your os, in your case Mac OS X. – dmb Oct 12 '18 at 11:55
  • Thank you, dmb. I added what you have said with the same outcome. No error message or complaint was given, but the two sample jpg files were not modified into `1.jpg` and `2.jpg`. – Til Hund Oct 12 '18 at 12:05
  • I just opened a question [here](https://stackoverflow.com/questions/52780120/rename-files-download-via-python-selenium-on-macos). You are welcome to answer. :) – Til Hund Oct 12 '18 at 13:04
  • How can you be sure that 1 second is sufficient a wait time? – Joel G Mathew Jan 23 '19 at 10:57
  • @JoelGMathew In most cases by experience, but you could always use a `while` to wait. Check the new edit for this code. – dmb Jan 23 '19 at 13:41
  • @dmb you need to update filename in the loop, you check the same value every time – hellpanderr Jan 13 '20 at 21:18
  • Partial file downloads in Chrome have `.crdownload` extension, so for Chrome use `.crdownload'` instead of `.part`. Also it is better to use `while filename.endswith('.crdownload'):` – ePandit May 01 '20 at 09:42
  • Above code won't work. `filename` var gets its value before the download is finished (or even started). `filename` will be either empty (if dir was empty) or get name of last downloaded file, not the current. In both cases it do not have `.crdownload` (or .part) in its filename. So the while loop will be totally avoided. So `shutil.move()` will either give error or rename that older last file to newfilename (not the file currently downladed file). – ePandit May 01 '20 at 14:04
  • I haven't tried this code but If I am not mistaken you are checking the same `filename` over and over again without updating it. – Burak Kaymakci Sep 09 '20 at 20:54
  • @dmb This saved the day for me! – ldias Jun 05 '22 at 19:59
8

There is something i would correct for @parishodak answer:

the filename here will only return the relative path (here the name of the file) not the absolute path.

That is why @FreshRamen got the following error after:

File "/usr/local/Cellar/python/2.7.10_2/Frameworks/Python.framework/Versions/2.7/lib/‌​python2.7/genericpath.py", 
line 72, in getctime return os.stat(filename).st_ctime OSError: 
[Errno 2] No such file or directory: '.localized'

There is the correct code:

import os
import shutil

filepath = 'c:\downloads'
filename = max([filepath +"\"+ f for f in os.listdir(filepath)], key=os.path.getctime)
shutil.move(os.path.join(dirpath,filename),newfilename)
toshiro92
  • 1,287
  • 5
  • 28
  • 42
6

I've come up with a different solution. Since you only care about the last downloaded file, then why not download it into a dummy_dir? So that, that file is going to be the only file in that directory. Once it's downloaded, you can move it to your destination_dir as well as changing it's name.

Here is an example that works with Firefox:

def rename_last_downloaded_file(dummy_dir, destination_dir, new_file_name):
    def get_last_downloaded_file_path(dummy_dir):
        """ Return the last modified -in this case last downloaded- file path.

            This function is going to loop as long as the directory is empty.
        """
        while not os.listdir(dummy_dir):
            time.sleep(1)
        return max([os.path.join(dummy_dir, f) for f in os.listdir(dummy_dir)], key=os.path.getctime)

    while '.part' in get_last_downloaded_file_path(dummy_dir):
        time.sleep(1)
    shutil.move(get_last_downloaded_file_path(dummy_dir), os.path.join(destination_dir, new_file_name))

You can fiddle with the sleep time and add a TimeoutException as well, as you see fit.

Burak Kaymakci
  • 662
  • 2
  • 16
  • 36
4

Here is the code sample I used to download pdf with a specific file name. First you need to configure chrome webdriver with required options. Then after clicking the button (to open pdf popup window), call a function to wait for download to finish and rename the downloaded file.

import os
import time
import shutil

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

# function to wait for download to finish and then rename the latest downloaded file
def wait_for_download_and_rename(newFilename):
    # function to wait for all chrome downloads to finish
    def chrome_downloads(drv):
        if not "chrome://downloads" in drv.current_url: # if 'chrome downloads' is not current tab
            drv.execute_script("window.open('');") # open a new tab
            drv.switch_to.window(driver.window_handles[1]) # switch to the new tab
            drv.get("chrome://downloads/") # navigate to chrome downloads
        return drv.execute_script("""
            return document.querySelector('downloads-manager')
            .shadowRoot.querySelector('#downloadsList')
            .items.filter(e => e.state === 'COMPLETE')
            .map(e => e.filePath || e.file_path || e.fileUrl || e.file_url);
            """)
    # wait for all the downloads to be completed
    dld_file_paths = WebDriverWait(driver, 120, 1).until(chrome_downloads) # returns list of downloaded file paths
    # Close the current tab (chrome downloads)
    if "chrome://downloads" in driver.current_url:
        driver.close()
    # Switch back to original tab
    driver.switch_to.window(driver.window_handles[0]) 
    # get latest downloaded file name and path
    dlFilename = dld_file_paths[0] # latest downloaded file from the list
    # wait till downloaded file appears in download directory
    time_to_wait = 20 # adjust timeout as per your needs
    time_counter = 0
    while not os.path.isfile(dlFilename):
        time.sleep(1)
        time_counter += 1
        if time_counter > time_to_wait:
            break
    # rename the downloaded file
    shutil.move(dlFilename, os.path.join(download_dir,newFilename))
    return

# specify custom download directory
download_dir = r'c:\Downloads\pdf_reports'

# for configuring chrome pdf viewer for downloading pdf popup reports
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('prefs', {
    "download.default_directory": download_dir, # Set own Download path
    "download.prompt_for_download": False, # Do not ask for download at runtime
    "download.directory_upgrade": True, # Also needed to suppress download prompt
    "plugins.plugins_disabled": ["Chrome PDF Viewer"], # Disable this plugin
    "plugins.always_open_pdf_externally": True, # Enable this plugin
    })

# get webdriver with options for configuring chrome pdf viewer
driver = webdriver.Chrome(options = chrome_options)

# open desired webpage
driver.get('https://mywebsite.com/mywebpage')

# click the button to open pdf popup
driver.find_element_by_id('someid').click()

# call the function to wait for download to finish and rename the downloaded file
wait_for_download_and_rename('My file.pdf')

# close the browser windows
driver.quit()

Set timeout (120) to the wait time as per your needs.

ePandit
  • 2,905
  • 2
  • 24
  • 15
1

I am using the following function. It checks for a file in the download location that you specify for chrome/selenium, and only is there is a file created as maxium 10 seconds ago (max_old_time), it renames it. Otherwise, it wait a maxium of 60 seconds (max_waiting_time)..

Not sure if is the best way, but it worked for me..

import os, shutil, time
from datetime import datetime

def rename_last_file(download_folder,destination_folder,newfilename):
    #Will wait for maxium max_waiting_time seconds for a new in folder.
    max_waiting_time=60
    #Will rename only is the file creation has less than max_old_stime seconds.
    max_old_time=10
    start_time=datetime.now().timestamp()
    while True:
        filelist=[]
        last_file_time=0
        for current_file in os.listdir(download_folder):
            filelist.append(current_file)
            current_file_fullpath=os.path.join(download_folder, current_file)
            current_file_time=os.path.getctime(current_file_fullpath)
            if os.path.isfile(current_file_fullpath):
                if last_file_time==0:
                    last_file=current_file
                last_file_time=os.path.getctime(os.path.join(download_folder, last_file))
                if current_file_time>last_file_time and os.path.isfile(current_file_fullpath):
                        last_file=current_file
        last_file_fullpath=os.path.join(download_folder, last_file)
        if start_time-last_file_time<max_old_time:
            shutil.move(last_file_fullpath,os.path.join(destination_folder,newfilename))
            print(last_file_fullpath)
            return(0)
        elif (datetime.now().timestamp()-start_time)>max_waiting_time:
            print("exit")
            return(1)
        else:
            print("waiting file...")
            time.sleep(5)
0

Using @dmb 's trick. Ive just made one correction: after .part control, below time.sleep(1) we must request filename again. Otherwise, the line below will try to rename a .part file, which no more exists.

0

Here is a browser-agnostic solution that waits for the download to finish then returns the file name.

from datetime import datetime, timedelta

def wait_for_download_and_get_file_name():
    print(f'Waiting for download to finish', end='')

    while True:
        # Get the name of the file with the latest creation time
        newest_file_name = max([os.path.join(DOWNLOAD_DIR, f) for f in os.listdir(DOWNLOAD_DIR)], key=os.path.getctime)
        # Get the creation time of the file
        file_creation_time = datetime.fromtimestamp(os.path.getctime(newest_file_name))

        five_seconds_ago = datetime.now() - timedelta(seconds=5)
        
        if file_creation_time < five_seconds_ago:
            # The file with the latest creation time is too old to be the file that we're waiting for
            print(f'.', end='')
            time.sleep(0.5)
        else:
            print(f'\nFinished downloading "{newest_file_name}"')
            break

    return newest_file_name

Caveat: this will not work if you have more than one thread or process downloading files to the same directory at the same time.

BrunoF
  • 3,239
  • 26
  • 39
0

In my case i downloading and rename .csv files, also i using as a reference files that has '__' in the title, but you can change '_' for your specific usage.

Add this block after download on your selenium script.

string = 'SOMETHING_OR_VARIABLE'


path = r'PATH_WHERE_FILE_ARE_BEING_DOWNLOAD'


files = [i for i in os.listdir(path) if os.path.isfile(os.path.join(path,i)) and \
            '_' in i]
if files != []:
    import os
    files = [i for i in os.listdir(path) if os.path.isfile(os.path.join(path,i)) and \
            '_' in i]
    print(files[0])
    os.rename(path + '\\' +files[0], path + '\\' +f'{string}.csv')
else:
    print('error')

Caio Euzébio
  • 182
  • 1
  • 1
  • 10
-3

You can download the file and name it at the same time using urlretrieve:

import urllib

url = browser.find_element_by_partial_link_text("Excel").get_attribute('href')
urllib.urlretrieve(url, "/choose/your/file_name.xlsx")
James Lemieux
  • 720
  • 1
  • 9
  • 26