diff --git a/bot/bot.py b/bot/bot.py index 46aa5d0..08ee06b 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -53,14 +53,17 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None: user, chat, message_id = update.message.from_user, update.message.chat, update.message.message_id logging.info(f"Link sent by {user} - {msg}") + referurl = helpers.new_refer_url(productCode, extraparams) logging.info("Setting up new Chrome Browser") driver = webdriver.Chrome(options=get_chrome_options()) driver.set_window_size(random.randint(1200, 1800), random.randint(600, 1000)) logging.info("Loading Amazon webpage") - driver.get(msg) + driver.get(referurl) logging.info("Scraping information and closing browser") soup = BeautifulSoup(driver.page_source, "lxml") etree_soup = BeautifulSoup(driver.page_source, "html.parser") + # DEBUG + #logging.info(soup) logging.info("Getting title...") title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup, etree_soup) @@ -93,11 +96,10 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None: driver.close() context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id) - referurl = helpers.new_refer_url(productCode, extraparams) product_id = dbhelper.check_product(referurl, price) if not product_id: product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat['id'], msg, referurl, title, price, image) - helpers.create_image(product_id, price) + helpers.create_image(product_id, price) keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{referurl}")]] markup = InlineKeyboardMarkup(keyboard) context.bot.send_photo(chat_id=update.message.chat_id, photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'), caption=f"URL enviada por @{user['username']}: \n\n{title}", reply_markup=markup) diff --git a/bot/parser.py b/bot/parser.py index 65e99cd..c08f94b 100644 --- a/bot/parser.py +++ b/bot/parser.py @@ -40,8 +40,17 @@ def get_image(soup, etree_soup): dom = etree.HTML(str(etree_soup)) image = dom.xpath('//*[@id="main-image-container"]/ul/li[4]/span/span/div/img') image = image[0].get('src') + except IndexError: + logging.info("Couldn't retrieve image, trying with xpath method") + dom = etree.HTML(str(etree_soup)) + image = dom.xpath('//*[@id="main-image-container"]/ul/li[5]/span/span/div/img') + image = image[0].get('src') except AttributeError: logging.info("Couldn't retrieve image with xpath method, falling back to placeholder") - image = "N/A" + image = "N/A" + except Exception as e: + logging.info(f"Error retrieving image: {e}") + except Exception as e: + logging.info(f"Error retrieving image: {e}") return image \ No newline at end of file