From d99399f6ac3fa6adc72c574c78062b5adca89f96 Mon Sep 17 00:00:00 2001
From: Joan <lgray.foxl@gmail.com>
Date: Tue, 11 Jul 2023 12:56:02 +0200
Subject: [PATCH] Added xpath method for obtaining image when normal fails

---
 bot/bot.py     |  6 ++++--
 bot/helpers.py |  2 +-
 bot/parser.py  | 21 ++++++++++++++++++---
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/bot/bot.py b/bot/bot.py
index 71015cd..46aa5d0 100644
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -60,9 +60,10 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
         driver.get(msg)
         logging.info("Scraping information and closing browser")
         soup = BeautifulSoup(driver.page_source, "lxml")
+        etree_soup = BeautifulSoup(driver.page_source, "html.parser")
 
         logging.info("Getting title...")
-        title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
+        title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup, etree_soup)
         if title == "":
             logging.info(f"Title not found, not a valid product or captcha")
             captcha = AmazonCaptcha.fromdriver(driver)
@@ -80,8 +81,9 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
                 driver.get(msg)
                 logging.info("Scraping information")
                 soup = BeautifulSoup(driver.page_source, "lxml")
+                etree_soup = BeautifulSoup(driver.page_source, "html.parser")
                 logging.info("Getting title...")
-                title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
+                title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup, etree_soup)
                 if title == "":
                     logging.info(f"Title not found, not a valid product or failed captcha")
                     return
diff --git a/bot/helpers.py b/bot/helpers.py
index d58e98e..def2498 100644
--- a/bot/helpers.py
+++ b/bot/helpers.py
@@ -43,7 +43,7 @@ def create_image(product_id, price):
     try:
         product_image = Image.open(f"/app/data/images/products/{product_id}.jpg")
     except:
-        product_image = Image.open(f"/app/data/images/placeholder.jpg")
+        product_image = Image.open(f"/app/data/images/placeholder.png")
     hpercent = (baseheight / float(product_image.size[1]))
     wsize = int((float(product_image.size[0]) * float(hpercent)))
     if wsize < wlimit:
diff --git a/bot/parser.py b/bot/parser.py
index c5729b4..65e99cd 100644
--- a/bot/parser.py
+++ b/bot/parser.py
@@ -1,4 +1,11 @@
 import helpers
+import logging
+from lxml import etree
+
+# Enable logging
+logging.basicConfig(
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
+)
 
 def get_title(soup):
     try:
@@ -22,11 +29,19 @@ def get_price(soup):
 
     return price
 
-def get_image(soup):
+def get_image(soup, etree_soup):
     try:
         image = soup.find("img", attrs={'id':'landingImage'})
         image = image.get('src')
+        logging.info("Image found")
     except AttributeError:
-        image = "N/A"	
-
+        try:
+            logging.info("Couldn't retrieve image, trying with xpath method")
+            dom = etree.HTML(str(etree_soup))
+            image = dom.xpath('//*[@id="main-image-container"]/ul/li[4]/span/span/div/img')
+            image = image[0].get('src')
+        except AttributeError:
+            logging.info("Couldn't retrieve image with xpath method, falling back to placeholder")
+            image = "N/A"	
+    
     return image
\ No newline at end of file