Selenium web scraping is giving me wrong row, I don't where I did wrong?

Issue

Hello I am web scrapping a site

here is my code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
grades = all_laptops[0].find_elements(By.XPATH, '//div[@class="product-top"]/span[@class="product-metafild"]')
for i in range(len(grades)):
    item[0] = grades[i].text
    description = all_laptops[0].find_elements(By.CLASS_NAME, 'product-title')
    item[1] = description[i].text
    old_price= all_laptops[0].find_elements(By.CLASS_NAME, 'old-price')
    item[2] = old_price[i].text
    special_price= all_laptops[0].find_elements(By.CLASS_NAME, 'special-price')
    item[3] = special_price[i].text
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop.csv', index=False, header=False)

It is giving me a wrong output (rows), with grade of someone else product and price of someone else product. Please tell me where i did wrong.

Thanks

Solution

It is because your are not selecting the perfect class name which is a common thing in all laptops and have all the details about that particular laptop.

Here is the full working code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# set the page parameter its upto 14 for 24 items per page
search_url = "https://dealt.ae/collections/laptops?page=1"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
print(f"Getting {len(all_grids)} laptops")
for i in range(len(all_grids)):
    all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
    all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
    grades = all_grids[i].find_element(By.CLASS_NAME, 'product-metafild')
    item[0] = grades.text
    description =all_grids[i].find_element(By.CLASS_NAME, 'product-title')
    item[1] = description.text
    try:
        old_price= all_grids[i].find_element(By.CLASS_NAME, 'old-price')
        item[2] = old_price.text
        special_price= all_grids[i].find_element(By.CLASS_NAME, 'special-price')
        item[3] = special_price.text
    except NoSuchElementException:
        regular_price= all_grids[i].find_element(By.CLASS_NAME, 'price-regular')
        item[2]= regular_price.text
        item[3]= ""
    print(item)
    ALL_ITEMS.append(item.copy())    
                
for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop_13.csv', index=False, header=False)





Answered By – Djangodev

This Answer collected from stackoverflow, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Leave a Reply

(*) Required, Your email will not be published