From 9979672b46eb36b5ab66505e243df23d3bb53c58 Mon Sep 17 00:00:00 2001 From: Joan Date: Mon, 6 Mar 2023 22:34:43 +0100 Subject: [PATCH] Added captcha solving method --- bot/Dockerfile | 5 +++-- bot/bot.py | 31 ++++++++++++++++++++++++++++--- bot/requirements.txt | 5 +++-- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/bot/Dockerfile b/bot/Dockerfile index f55301f..7a6e618 100644 --- a/bot/Dockerfile +++ b/bot/Dockerfile @@ -1,7 +1,5 @@ FROM python:3.7 -ENV TELEGRAM_API_KEY=5707028834:AAFlX07ObRGWmm15KtHgwqcPZ4OHy-MkMks - # Adding trusting keys to apt for repositories RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - @@ -12,12 +10,15 @@ RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable RUN apt-get -y update # Magic happens +#RUN wget -O /tmp/google-chrome-stable.deb http://mirror.cs.uchicago.edu/google-chrome/pool/main/g/google-chrome-stable/google-chrome-stable_108.0.5359.71-1_amd64.deb +#RUN apt install -y /tmp/google-chrome-stable.deb RUN apt-get install -y google-chrome-stable # Installing Unzip RUN apt-get install -yqq unzip # Download the Chrome Driver +#RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/108.0.5359.71/chromedriver_linux64.zip RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip # Unzip the Chrome Driver into /usr/local/bin directory diff --git a/bot/bot.py b/bot/bot.py index 94b61c6..c683b66 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -5,8 +5,12 @@ import dbhelper import constants import helpers import parser +import time +from amazoncaptcha import AmazonCaptcha from bs4 import BeautifulSoup +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium import webdriver from telegram import Update, ForceReply @@ -56,15 +60,36 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None: driver.get(msg) logging.info("Scraping information and closing browser") soup = BeautifulSoup(driver.page_source, "lxml") - driver.close() logging.info("Getting title...") title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup) if title == "": - logging.info(f"Title not found, not a valid product") - return + logging.info(f"Title not found, not a valid product or captcha") + captcha = AmazonCaptcha.fromdriver(driver) + solution = captcha.solve() + if solution == 'Not solved': + logging.info("Couldn't solve the captcha, if there was any") + else: + logging.info(f"Captcha solution is {solution}, redirecting") + logging.info("Waiting for 5 seconds, humans are not that fast :)") + time.sleep(5) + fill_captcha_element = driver.find_element(By.ID, 'captchacharacters') + fill_captcha_element.send_keys(solution) + fill_captcha_element.send_keys(Keys.RETURN) + logging.info("Re-loading Amazon webpage") + driver.get(msg) + logging.info("Scraping information") + soup = BeautifulSoup(driver.page_source, "lxml") + logging.info("Getting title...") + title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup) + if title == "": + logging.info(f"Title not found, not a valid product or failed captcha") + return logging.info(f"Title found: {title}") + logging.info("Closing browser") + driver.close() + context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id) referurl = helpers.new_refer_url(productCode, extraparams) product_id = dbhelper.check_product(referurl, price) diff --git a/bot/requirements.txt b/bot/requirements.txt index f8395ca..4d49785 100644 --- a/bot/requirements.txt +++ b/bot/requirements.txt @@ -2,5 +2,6 @@ python-telegram-bot==13.13 requests==2.28.1 beautifulsoup4==4.11.1 lxml==4.9.1 -selenium==4.4.0 -Pillow==9.2.0 \ No newline at end of file +selenium==4.2.0 +Pillow==9.4.0 +amazoncaptcha==0.5.9 \ No newline at end of file