web scraping with loop pages is not working [closed]

Question

I want to do web scraping and make it on all the pages of websites (I don’t know the number of pages) I tried to work on it but it only loop on the first page of the website and here is my code

try:
    page_number = 1
    while True:
        url = f"{base_url}?page={page_number}"
        driver.get(url)

        # Wait for the car links to be present on the page
        WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.versions-item > a')))
        
        # Extract car links
        html_content = driver.page_source
        soup = BeautifulSoup(html_content, 'html.parser')
        car_links = soup.select('.versions-item > a')

        if not car_links:
            print(f"No car links found on page {page_number}. Exiting.")
            break

        for car_link in car_links:
            car_url_relative = car_link['href']
            car_url = urljoin(base_url, car_url_relative)
            print(f"Processing car link: {car_url}")

            driver.get(car_url)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'page-title')))
            car_brand_name = driver.find_element(By.CSS_SELECTOR, '.bloc-title .page-title').text.strip()
            print(car_brand_name)
            table = driver.find_element(By.TAG_NAME, 'table')
            rows = table.find_elements(By.TAG_NAME, 'tr')

            data = []
            for row in rows:
                if row.find_elements(By.TAG_NAME, 'th'):
                    continue
                version = row.find_element(By.CLASS_NAME, 'version').find_element(By.TAG_NAME, 'a').text
                price = row.find_element(By.CLASS_NAME, 'price').text
                data.append({'Car Brand': car_brand_name, 'Version': version, 'Price': price})

            all_data.extend(data)

            print(f"Processed car: {car_brand_name}")

        # Check if the next button is present by class name
        next_button_class="page-item-next"
        try:
            # Explicitly wait for the presence of the next button before clicking
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, next_button_class)))
            next_button = driver.find_element(By.CLASS_NAME, next_button_class)
            next_button.click()
        except NoSuchElementException:
            print("No next button found. Exiting.")
            break

        page_number += 1

however I tried to scrap the first 2 pages with a for loop in range(1,3) and it works

here is the html

Leave a Comment Cancel reply