Saving a File that automatically downloads from a url using Python

I’m trying to login to a website and download a csv. I’ve managed to login to the website and find the button that lets me download the csv. Once I click it, I get a new url which should automatically download the csv file. However, I have no idea how to save the url. Here is what I have so far:

import pandas as pd
from dotenv import load_dotenv, find_dotenv
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
# import re
from datetime import datetime, timedelta
import requests
from io import StringIO
import os
import csv
import json

# Load the .env with the secure credentials in it
os.chdir('/Users/alexfischer/Projects/Tetiaroa_Weather')
download_dir="/Users/alexfischer/Projects/Tetiaroa_Weather/Tetiaroa.csv"
load_dotenv(find_dotenv())

## Make sure path to the working directory is correct
os.chdir('/Users/alexfischer/Projects/Tetiaroa_Weather')

username="TETIAROA"
password = 'TBSA_Tetiaroa987!'



# Set the timeout duration in seconds
# If the webscraper gets stuck for some reason, it will close the browser in an hour
timeout_duration = 3600 #1 hour

# Create a FirefoxOptions object to set options for the Firefox WebDriver
firefox_options = Options()

# TODO: For some reason it is not using my specified default download directory
# Set the download directory in the Firefox profile
firefox_options.set_preference("browser.download.folderList", 2)
firefox_options.set_preference("browser.download.manager.showWhenStarting", False)
firefox_options.set_preference("browser.download.dir", download_dir)
firefox_options.set_preference("browser.download.useDownloadDir", True)
firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/csv") # may be text/plain accoring to content-type in network tab

# Add the headless option
# Uncomment this to have it run in the background
firefox_options.add_argument('--headless')

try:

    # Print current date and time (for the cronjob log, if using crontab)
    print('Current date and time: ' + str(datetime.now()))

    # Start the timer
    start_time = time.time()    

    # Open a browser
    print('Opening Firefox...')
    driver = webdriver.Firefox(options=firefox_options)
    
    # Launch the web app
    print('Logging into meteo-france...')
    driver.get("https://pro.meteofrance.com")
    time.sleep(5)
    
    # Find the input fields and enter the credentials
    username_element = driver.find_element(By.NAME, 'login')
    username_element.send_keys(username)
    
    password_element = driver.find_element(By.NAME, 'pass')
    password_element.send_keys(password)

    # Click the Login Button
    driver.find_element(By.XPATH,"//button[@type="submit"]").click()
    time.sleep(10)
    
    # Get the link that the csv button contains
    csv_button = driver.find_element(By.CLASS_NAME,"csv")
    csv_button.click()
    url = csv_button.get_attribute('href') # the url it takes you to when you click submit. Opening this automatically downloads the csv
    print(url) # just to check it

If I try opening the url I get using requests then I have to login again and repeat the same process over and over again. I’m stuck

  • Hello, and welcome to StackOverflow! You might need the cookies from the Selenium webdriver that authenticates you. Try passing these cookies to requests.get; check this answer for how to get the cookies from Selenium.

    – 

Leave a Comment