Issue
Hello I am web scrapping a site
here is my code
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
search_url = "https://dealt.ae/collections/laptops"
driver.get(search_url)
time.sleep(3)
ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
grades = all_laptops[0].find_elements(By.XPATH, '//div[@class="product-top"]/span[@class="product-metafild"]')
for i in range(len(grades)):
item[0] = grades[i].text
description = all_laptops[0].find_elements(By.CLASS_NAME, 'product-title')
item[1] = description[i].text
old_price= all_laptops[0].find_elements(By.CLASS_NAME, 'old-price')
item[2] = old_price[i].text
special_price= all_laptops[0].find_elements(By.CLASS_NAME, 'special-price')
item[3] = special_price[i].text
print(item)
ALL_ITEMS.append(item.copy())
for item in ALL_ITEMS:
print(item)
my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop.csv', index=False, header=False)
It is giving me a wrong output (rows), with grade of someone else product and price of someone else product. Please tell me where i did wrong.
Thanks
Solution
It is because your are not selecting the perfect class name which is a common thing in all laptops and have all the details about that particular laptop.
Here is the full working code
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
# set the page parameter its upto 14 for 24 items per page
search_url = "https://dealt.ae/collections/laptops?page=1"
driver.get(search_url)
time.sleep(3)
ALL_ITEMS = []
item = ["", "", "", ""]
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
print(f"Getting {len(all_grids)} laptops")
for i in range(len(all_grids)):
all_laptops = driver.find_elements(By.XPATH, '//div[@class="product-collection products-grid row"]')
all_grids = all_laptops[0].find_elements(By.CLASS_NAME, 'grid-item')
grades = all_grids[i].find_element(By.CLASS_NAME, 'product-metafild')
item[0] = grades.text
description =all_grids[i].find_element(By.CLASS_NAME, 'product-title')
item[1] = description.text
try:
old_price= all_grids[i].find_element(By.CLASS_NAME, 'old-price')
item[2] = old_price.text
special_price= all_grids[i].find_element(By.CLASS_NAME, 'special-price')
item[3] = special_price.text
except NoSuchElementException:
regular_price= all_grids[i].find_element(By.CLASS_NAME, 'price-regular')
item[2]= regular_price.text
item[3]= ""
print(item)
ALL_ITEMS.append(item.copy())
for item in ALL_ITEMS:
print(item)
my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('laptop_13.csv', index=False, header=False)
Answered By – Djangodev
This Answer collected from stackoverflow, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0