Files
amazon-telegram/bot/product_parser.py

61 lines
2.0 KiB
Python

import helpers
import logging
from lxml import etree
# Enable logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
)
def get_title(soup):
try:
title = soup.find("span", attrs={"id":'productTitle'})
title_value = title.string
title_string = title_value.strip()
except AttributeError as err:
helpers.logging.info(f"Couldn't get title: {err}")
title_string = ""
return title_string
def get_price(soup):
try:
price = soup.find("span", attrs={'class':'a-offscreen'}).string.strip()
# A veces mete el título en el precio
if "" not in price:
price = "N/A"
except AttributeError:
price = "N/A"
except Exception as e:
price = "N/A"
logging.price(f"Error getting price, using N/A: {e}")
logging.info(f"Price found: {price}")
return price
def get_image(soup, etree_soup):
try:
image = soup.find("img", attrs={'id':'landingImage'})
image = image.get('src')
logging.info("Image found")
except AttributeError:
try:
logging.info("Couldn't retrieve image, trying with xpath method")
dom = etree.HTML(str(etree_soup))
image = dom.xpath('//*[@id="main-image-container"]/ul/li[4]/span/span/div/img')
image = image[0].get('src')
except IndexError:
try:
logging.info("Couldn't retrieve image, trying with xpath method 2")
dom = etree.HTML(str(etree_soup))
image = dom.xpath('//*[@id="main-image-container"]/ul/li[5]/span/span/div/img')
image = image[0].get('src')
except Exception as e:
logging.info("Couldn't retrieve image with xpath method, falling back to placeholder")
logging.info(f"Error retrieving image: {e}")
image = "N/A"
except Exception as e:
logging.info(f"Error retrieving image: {e}")
return image