Added captcha solving method

This commit is contained in:
Joan
2023-03-06 22:34:43 +01:00
parent e40ca2f165
commit 9979672b46
3 changed files with 34 additions and 7 deletions

View File

@@ -1,7 +1,5 @@
FROM python:3.7
ENV TELEGRAM_API_KEY=5707028834:AAFlX07ObRGWmm15KtHgwqcPZ4OHy-MkMks
# Adding trusting keys to apt for repositories
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
@@ -12,12 +10,15 @@ RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable
RUN apt-get -y update
# Magic happens
#RUN wget -O /tmp/google-chrome-stable.deb http://mirror.cs.uchicago.edu/google-chrome/pool/main/g/google-chrome-stable/google-chrome-stable_108.0.5359.71-1_amd64.deb
#RUN apt install -y /tmp/google-chrome-stable.deb
RUN apt-get install -y google-chrome-stable
# Installing Unzip
RUN apt-get install -yqq unzip
# Download the Chrome Driver
#RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/108.0.5359.71/chromedriver_linux64.zip
RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
# Unzip the Chrome Driver into /usr/local/bin directory

View File

@@ -5,8 +5,12 @@ import dbhelper
import constants
import helpers
import parser
import time
from amazoncaptcha import AmazonCaptcha
from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from telegram import Update, ForceReply
@@ -56,15 +60,36 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
driver.get(msg)
logging.info("Scraping information and closing browser")
soup = BeautifulSoup(driver.page_source, "lxml")
driver.close()
logging.info("Getting title...")
title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
if title == "":
logging.info(f"Title not found, not a valid product")
return
logging.info(f"Title not found, not a valid product or captcha")
captcha = AmazonCaptcha.fromdriver(driver)
solution = captcha.solve()
if solution == 'Not solved':
logging.info("Couldn't solve the captcha, if there was any")
else:
logging.info(f"Captcha solution is {solution}, redirecting")
logging.info("Waiting for 5 seconds, humans are not that fast :)")
time.sleep(5)
fill_captcha_element = driver.find_element(By.ID, 'captchacharacters')
fill_captcha_element.send_keys(solution)
fill_captcha_element.send_keys(Keys.RETURN)
logging.info("Re-loading Amazon webpage")
driver.get(msg)
logging.info("Scraping information")
soup = BeautifulSoup(driver.page_source, "lxml")
logging.info("Getting title...")
title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
if title == "":
logging.info(f"Title not found, not a valid product or failed captcha")
return
logging.info(f"Title found: {title}")
logging.info("Closing browser")
driver.close()
context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id)
referurl = helpers.new_refer_url(productCode, extraparams)
product_id = dbhelper.check_product(referurl, price)

View File

@@ -2,5 +2,6 @@ python-telegram-bot==13.13
requests==2.28.1
beautifulsoup4==4.11.1
lxml==4.9.1
selenium==4.4.0
Pillow==9.2.0
selenium==4.2.0
Pillow==9.4.0
amazoncaptcha==0.5.9