60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
import helpers
|
|
import logging
|
|
from lxml import etree
|
|
|
|
# Enable logging
|
|
logging.basicConfig(
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
|
|
)
|
|
|
|
def get_title(soup):
|
|
try:
|
|
title = soup.find("span", attrs={"id":'productTitle'})
|
|
title_value = title.string
|
|
title_string = title_value.strip()
|
|
except AttributeError as err:
|
|
helpers.logging.info(f"Couldn't get title: {err}")
|
|
title_string = ""
|
|
|
|
return title_string
|
|
|
|
def get_price(soup):
|
|
try:
|
|
price = soup.find("span", attrs={'class':'a-offscreen'}).string.strip()
|
|
# A veces mete el título en el precio
|
|
if "€" not in price:
|
|
price = "N/A"
|
|
except AttributeError:
|
|
price = "N/A"
|
|
except Exception as e:
|
|
price = "N/A"
|
|
logging.price(f"Error getting price, using N/A: {e}")
|
|
logging.info(f"Price found: {price}")
|
|
|
|
return price
|
|
|
|
def get_image(soup, etree_soup):
|
|
try:
|
|
image = soup.find("img", attrs={'id':'landingImage'})
|
|
image = image.get('src')
|
|
logging.info("Image found")
|
|
except AttributeError:
|
|
try:
|
|
logging.info("Couldn't retrieve image, trying with xpath method")
|
|
dom = etree.HTML(str(etree_soup))
|
|
image = dom.xpath('//*[@id="main-image-container"]/ul/li[4]/span/span/div/img')
|
|
image = image[0].get('src')
|
|
except IndexError:
|
|
logging.info("Couldn't retrieve image, trying with xpath method")
|
|
dom = etree.HTML(str(etree_soup))
|
|
image = dom.xpath('//*[@id="main-image-container"]/ul/li[5]/span/span/div/img')
|
|
image = image[0].get('src')
|
|
except AttributeError:
|
|
logging.info("Couldn't retrieve image with xpath method, falling back to placeholder")
|
|
image = "N/A"
|
|
except Exception as e:
|
|
logging.info(f"Error retrieving image: {e}")
|
|
except Exception as e:
|
|
logging.info(f"Error retrieving image: {e}")
|
|
|
|
return image |