I want to do web scraping and make it on all the pages of websites (I don’t know the number of pages) I tried to work on it but it only loop on the first page of the website and here is my code
try:
page_number = 1
while True:
url = f"{base_url}?page={page_number}"
driver.get(url)
# Wait for the car links to be present on the page
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.versions-item > a')))
# Extract car links
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
car_links = soup.select('.versions-item > a')
if not car_links:
print(f"No car links found on page {page_number}. Exiting.")
break
for car_link in car_links:
car_url_relative = car_link['href']
car_url = urljoin(base_url, car_url_relative)
print(f"Processing car link: {car_url}")
driver.get(car_url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'page-title')))
car_brand_name = driver.find_element(By.CSS_SELECTOR, '.bloc-title .page-title').text.strip()
print(car_brand_name)
table = driver.find_element(By.TAG_NAME, 'table')
rows = table.find_elements(By.TAG_NAME, 'tr')
data = []
for row in rows:
if row.find_elements(By.TAG_NAME, 'th'):
continue
version = row.find_element(By.CLASS_NAME, 'version').find_element(By.TAG_NAME, 'a').text
price = row.find_element(By.CLASS_NAME, 'price').text
data.append({'Car Brand': car_brand_name, 'Version': version, 'Price': price})
all_data.extend(data)
print(f"Processed car: {car_brand_name}")
# Check if the next button is present by class name
next_button_class="page-item-next"
try:
# Explicitly wait for the presence of the next button before clicking
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, next_button_class)))
next_button = driver.find_element(By.CLASS_NAME, next_button_class)
next_button.click()
except NoSuchElementException:
print("No next button found. Exiting.")
break
page_number += 1
however I tried to scrap the first 2 pages with a for loop in range(1,3) and it works
is not working doesn’t tell us anything. Please make sure you have a minimal reproducible example and a clearly described issue. You might also benefit from reading How To Debug Small Programs.
@baduker this code is scraping onlt the first page it is not looping on all the website pages