Added captcha solving method
This commit is contained in:
@@ -1,7 +1,5 @@
|
|||||||
FROM python:3.7
|
FROM python:3.7
|
||||||
|
|
||||||
ENV TELEGRAM_API_KEY=5707028834:AAFlX07ObRGWmm15KtHgwqcPZ4OHy-MkMks
|
|
||||||
|
|
||||||
# Adding trusting keys to apt for repositories
|
# Adding trusting keys to apt for repositories
|
||||||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
|
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
|
||||||
|
|
||||||
@@ -12,12 +10,15 @@ RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable
|
|||||||
RUN apt-get -y update
|
RUN apt-get -y update
|
||||||
|
|
||||||
# Magic happens
|
# Magic happens
|
||||||
|
#RUN wget -O /tmp/google-chrome-stable.deb http://mirror.cs.uchicago.edu/google-chrome/pool/main/g/google-chrome-stable/google-chrome-stable_108.0.5359.71-1_amd64.deb
|
||||||
|
#RUN apt install -y /tmp/google-chrome-stable.deb
|
||||||
RUN apt-get install -y google-chrome-stable
|
RUN apt-get install -y google-chrome-stable
|
||||||
|
|
||||||
# Installing Unzip
|
# Installing Unzip
|
||||||
RUN apt-get install -yqq unzip
|
RUN apt-get install -yqq unzip
|
||||||
|
|
||||||
# Download the Chrome Driver
|
# Download the Chrome Driver
|
||||||
|
#RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/108.0.5359.71/chromedriver_linux64.zip
|
||||||
RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
|
RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
|
||||||
|
|
||||||
# Unzip the Chrome Driver into /usr/local/bin directory
|
# Unzip the Chrome Driver into /usr/local/bin directory
|
||||||
|
|||||||
29
bot/bot.py
29
bot/bot.py
@@ -5,8 +5,12 @@ import dbhelper
|
|||||||
import constants
|
import constants
|
||||||
import helpers
|
import helpers
|
||||||
import parser
|
import parser
|
||||||
|
import time
|
||||||
|
|
||||||
|
from amazoncaptcha import AmazonCaptcha
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from selenium.webdriver.common.keys import Keys
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from telegram import Update, ForceReply
|
from telegram import Update, ForceReply
|
||||||
@@ -56,15 +60,36 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
|||||||
driver.get(msg)
|
driver.get(msg)
|
||||||
logging.info("Scraping information and closing browser")
|
logging.info("Scraping information and closing browser")
|
||||||
soup = BeautifulSoup(driver.page_source, "lxml")
|
soup = BeautifulSoup(driver.page_source, "lxml")
|
||||||
driver.close()
|
|
||||||
|
|
||||||
logging.info("Getting title...")
|
logging.info("Getting title...")
|
||||||
title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
|
title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
|
||||||
if title == "":
|
if title == "":
|
||||||
logging.info(f"Title not found, not a valid product")
|
logging.info(f"Title not found, not a valid product or captcha")
|
||||||
|
captcha = AmazonCaptcha.fromdriver(driver)
|
||||||
|
solution = captcha.solve()
|
||||||
|
if solution == 'Not solved':
|
||||||
|
logging.info("Couldn't solve the captcha, if there was any")
|
||||||
|
else:
|
||||||
|
logging.info(f"Captcha solution is {solution}, redirecting")
|
||||||
|
logging.info("Waiting for 5 seconds, humans are not that fast :)")
|
||||||
|
time.sleep(5)
|
||||||
|
fill_captcha_element = driver.find_element(By.ID, 'captchacharacters')
|
||||||
|
fill_captcha_element.send_keys(solution)
|
||||||
|
fill_captcha_element.send_keys(Keys.RETURN)
|
||||||
|
logging.info("Re-loading Amazon webpage")
|
||||||
|
driver.get(msg)
|
||||||
|
logging.info("Scraping information")
|
||||||
|
soup = BeautifulSoup(driver.page_source, "lxml")
|
||||||
|
logging.info("Getting title...")
|
||||||
|
title, price, image = parser.get_title(soup), parser.get_price(soup), parser.get_image(soup)
|
||||||
|
if title == "":
|
||||||
|
logging.info(f"Title not found, not a valid product or failed captcha")
|
||||||
return
|
return
|
||||||
logging.info(f"Title found: {title}")
|
logging.info(f"Title found: {title}")
|
||||||
|
|
||||||
|
logging.info("Closing browser")
|
||||||
|
driver.close()
|
||||||
|
|
||||||
context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id)
|
context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id)
|
||||||
referurl = helpers.new_refer_url(productCode, extraparams)
|
referurl = helpers.new_refer_url(productCode, extraparams)
|
||||||
product_id = dbhelper.check_product(referurl, price)
|
product_id = dbhelper.check_product(referurl, price)
|
||||||
|
|||||||
@@ -2,5 +2,6 @@ python-telegram-bot==13.13
|
|||||||
requests==2.28.1
|
requests==2.28.1
|
||||||
beautifulsoup4==4.11.1
|
beautifulsoup4==4.11.1
|
||||||
lxml==4.9.1
|
lxml==4.9.1
|
||||||
selenium==4.4.0
|
selenium==4.2.0
|
||||||
Pillow==9.2.0
|
Pillow==9.4.0
|
||||||
|
amazoncaptcha==0.5.9
|
||||||
Reference in New Issue
Block a user