I’m trying to login to a website and download a csv. I’ve managed to login to the website and find the button that lets me download the csv. Once I click it, I get a new url which should automatically download the csv file. However, I have no idea how to save the url. Here is what I have so far:
import pandas as pd
from dotenv import load_dotenv, find_dotenv
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
# import re
from datetime import datetime, timedelta
import requests
from io import StringIO
import os
import csv
import json
# Load the .env with the secure credentials in it
os.chdir('/Users/alexfischer/Projects/Tetiaroa_Weather')
download_dir="/Users/alexfischer/Projects/Tetiaroa_Weather/Tetiaroa.csv"
load_dotenv(find_dotenv())
## Make sure path to the working directory is correct
os.chdir('/Users/alexfischer/Projects/Tetiaroa_Weather')
username="TETIAROA"
password = 'TBSA_Tetiaroa987!'
# Set the timeout duration in seconds
# If the webscraper gets stuck for some reason, it will close the browser in an hour
timeout_duration = 3600 #1 hour
# Create a FirefoxOptions object to set options for the Firefox WebDriver
firefox_options = Options()
# TODO: For some reason it is not using my specified default download directory
# Set the download directory in the Firefox profile
firefox_options.set_preference("browser.download.folderList", 2)
firefox_options.set_preference("browser.download.manager.showWhenStarting", False)
firefox_options.set_preference("browser.download.dir", download_dir)
firefox_options.set_preference("browser.download.useDownloadDir", True)
firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/csv") # may be text/plain accoring to content-type in network tab
# Add the headless option
# Uncomment this to have it run in the background
firefox_options.add_argument('--headless')
try:
# Print current date and time (for the cronjob log, if using crontab)
print('Current date and time: ' + str(datetime.now()))
# Start the timer
start_time = time.time()
# Open a browser
print('Opening Firefox...')
driver = webdriver.Firefox(options=firefox_options)
# Launch the web app
print('Logging into meteo-france...')
driver.get("https://pro.meteofrance.com")
time.sleep(5)
# Find the input fields and enter the credentials
username_element = driver.find_element(By.NAME, 'login')
username_element.send_keys(username)
password_element = driver.find_element(By.NAME, 'pass')
password_element.send_keys(password)
# Click the Login Button
driver.find_element(By.XPATH,"//button[@type="submit"]").click()
time.sleep(10)
# Get the link that the csv button contains
csv_button = driver.find_element(By.CLASS_NAME,"csv")
csv_button.click()
url = csv_button.get_attribute('href') # the url it takes you to when you click submit. Opening this automatically downloads the csv
print(url) # just to check it
If I try opening the url I get using requests then I have to login again and repeat the same process over and over again. I’m stuck
Hello, and welcome to StackOverflow! You might need the cookies from the Selenium webdriver that authenticates you. Try passing these cookies to
requests.get
; check this answer for how to get the cookies from Selenium.