Remove duplicated code by extracting repeated code into methods and add feedback for invalid urls
This commit is contained in:
60
bot/product_parser.py
Normal file
60
bot/product_parser.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import helpers
|
||||
import logging
|
||||
from lxml import etree
|
||||
|
||||
# Enable logging
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
|
||||
)
|
||||
|
||||
def get_title(soup):
|
||||
try:
|
||||
title = soup.find("span", attrs={"id":'productTitle'})
|
||||
title_value = title.string
|
||||
title_string = title_value.strip()
|
||||
except AttributeError as err:
|
||||
helpers.logging.info(f"Couldn't get title: {err}")
|
||||
title_string = ""
|
||||
|
||||
return title_string
|
||||
|
||||
def get_price(soup):
|
||||
try:
|
||||
price = soup.find("span", attrs={'class':'a-offscreen'}).string.strip()
|
||||
# A veces mete el título en el precio
|
||||
if "€" not in price:
|
||||
price = "N/A"
|
||||
except AttributeError:
|
||||
price = "N/A"
|
||||
except Exception as e:
|
||||
price = "N/A"
|
||||
logging.price(f"Error getting price, using N/A: {e}")
|
||||
logging.info(f"Price found: {price}")
|
||||
|
||||
return price
|
||||
|
||||
def get_image(soup, etree_soup):
|
||||
try:
|
||||
image = soup.find("img", attrs={'id':'landingImage'})
|
||||
image = image.get('src')
|
||||
logging.info("Image found")
|
||||
except AttributeError:
|
||||
try:
|
||||
logging.info("Couldn't retrieve image, trying with xpath method")
|
||||
dom = etree.HTML(str(etree_soup))
|
||||
image = dom.xpath('//*[@id="main-image-container"]/ul/li[4]/span/span/div/img')
|
||||
image = image[0].get('src')
|
||||
except IndexError:
|
||||
try:
|
||||
logging.info("Couldn't retrieve image, trying with xpath method 2")
|
||||
dom = etree.HTML(str(etree_soup))
|
||||
image = dom.xpath('//*[@id="main-image-container"]/ul/li[5]/span/span/div/img')
|
||||
image = image[0].get('src')
|
||||
except Exception as e:
|
||||
logging.info("Couldn't retrieve image with xpath method, falling back to placeholder")
|
||||
logging.info(f"Error retrieving image: {e}")
|
||||
image = "N/A"
|
||||
except Exception as e:
|
||||
logging.info(f"Error retrieving image: {e}")
|
||||
|
||||
return image
|
||||
Reference in New Issue
Block a user