Added xpath method for obtaining image when normal fails

2023-07-11 12:56:02 +02:00
parent e3d7db2916
commit d99399f6ac
3 changed files with 23 additions and 6 deletions
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -60,9 +60,10 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
        driver.get(msg)
        logging.info("Scraping information and closing browser")
        soup = BeautifulSoup(driver.page_source, "lxml")
        etree_soup = BeautifulSoup(driver.page_source, "html.parser")
        logging.info("Getting title...")
-        title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
+        title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup, etree_soup)
        if title == "":
            logging.info(f"Title not found, not a valid product or captcha")
            captcha = AmazonCaptcha.fromdriver(driver)
@@ -80,8 +81,9 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
                driver.get(msg)
                logging.info("Scraping information")
                soup = BeautifulSoup(driver.page_source, "lxml")
                etree_soup = BeautifulSoup(driver.page_source, "html.parser")
                logging.info("Getting title...")
-                title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
+                title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup, etree_soup)
                if title == "":
                    logging.info(f"Title not found, not a valid product or failed captcha")
                    return
--- a/bot/helpers.py
+++ b/bot/helpers.py
@@ -43,7 +43,7 @@ def create_image(product_id, price):
    try:
        product_image = Image.open(f"/app/data/images/products/{product_id}.jpg")
    except:
-        product_image = Image.open(f"/app/data/images/placeholder.jpg")
+        product_image = Image.open(f"/app/data/images/placeholder.png")
    hpercent = (baseheight / float(product_image.size[1]))
    wsize = int((float(product_image.size[0]) * float(hpercent)))
    if wsize < wlimit:
--- a/bot/parser.py
+++ b/bot/parser.py
@@ -1,4 +1,11 @@
 import helpers
 import logging
 from lxml import etree
 # Enable logging
 logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
 )
 def get_title(soup):
    try:
@@ -22,11 +29,19 @@ def get_price(soup):
    return price
-def get_image(soup):
+def get_image(soup, etree_soup):
    try:
        image = soup.find("img", attrs={'id':'landingImage'})
        image = image.get('src')
        logging.info("Image found")
    except AttributeError:
-        image = "N/A"	
+        try:
-
+            logging.info("Couldn't retrieve image, trying with xpath method")
            dom = etree.HTML(str(etree_soup))
            image = dom.xpath('//*[@id="main-image-container"]/ul/li[4]/span/span/div/img')
            image = image[0].get('src')
        except AttributeError:
            logging.info("Couldn't retrieve image with xpath method, falling back to placeholder")
            image = "N/A"	
    return image