Add support for groups with threads by including the thread_id into the bot requests
This commit is contained in:
75
bot/bot.py
75
bot/bot.py
@@ -1,20 +1,21 @@
|
||||
import logging
|
||||
import re
|
||||
import random
|
||||
import dbhelper
|
||||
import constants
|
||||
import helpers
|
||||
import product_parser
|
||||
import re
|
||||
import time
|
||||
|
||||
from amazoncaptcha import AmazonCaptcha
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
|
||||
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CallbackContext
|
||||
|
||||
import constants
|
||||
import dbhelper
|
||||
import helpers
|
||||
import product_parser
|
||||
from amazon_product import AmazonProduct
|
||||
|
||||
|
||||
@@ -57,14 +58,16 @@ def extract_amazon_url_with_referer(text: str):
|
||||
start = text.find(constants.searchURL)
|
||||
if start != -1:
|
||||
text = f"https://{text[start:].split(' ')[0]}"
|
||||
product_code_regex_result = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',text) # Gets product code in amazon
|
||||
vendor_and_smid_result = re.findall(r'(?:[&|?]m=[\w]*)|(?:[&|?]smid=[\w]*)', text) # Gets vendor and smid parameter that it seems to be like a variant and keeps it
|
||||
product_code_regex_result = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',
|
||||
text) # Gets product code in amazon
|
||||
vendor_and_smid_result = re.findall(r'(?:[&|?]m=[\w]*)|(?:[&|?]smid=[\w]*)',
|
||||
text) # Gets vendor and smid parameter that it seems to be like a variant and keeps it
|
||||
if product_code_regex_result:
|
||||
product_code = product_code_regex_result.group(0)
|
||||
if vendor_and_smid_result:
|
||||
extra_params = (''.join(str(w) for w in vendor_and_smid_result if w is not None))
|
||||
extra_params = extra_params.replace('?','&')
|
||||
|
||||
extra_params = extra_params.replace('?', '&')
|
||||
|
||||
return helpers.new_refer_url(product_code, extra_params), original_message
|
||||
|
||||
# we return here the original message without modification and None for the URL marking that the url is not valid
|
||||
@@ -74,7 +77,7 @@ def extract_amazon_url_with_referer(text: str):
|
||||
def resolve_captcha(driver: webdriver):
|
||||
logging.info("Trying to resolve captcha")
|
||||
captcha = AmazonCaptcha.fromdriver(driver)
|
||||
solution = captcha.solve()
|
||||
solution = captcha.solve()
|
||||
return solution if solution != 'Not solved' else None
|
||||
|
||||
|
||||
@@ -92,7 +95,8 @@ def scrape_data(page_html: str) -> AmazonProduct:
|
||||
soup = BeautifulSoup(page_html, "lxml")
|
||||
etree_soup = BeautifulSoup(page_html, "html.parser")
|
||||
|
||||
title, price, image = product_parser.get_title(soup), product_parser.get_price(soup), product_parser.get_image(soup, etree_soup)
|
||||
title, price, image = product_parser.get_title(soup), product_parser.get_price(soup), product_parser.get_image(soup,
|
||||
etree_soup)
|
||||
return AmazonProduct(title=title, price=price, image=image) if title != "" else None
|
||||
|
||||
|
||||
@@ -102,9 +106,13 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
||||
except AttributeError:
|
||||
logging.info(f"Received message has no text")
|
||||
return
|
||||
|
||||
user, chat, chat_id, message_id = update.message.from_user, update.message.chat, update.message.chat_id, update.message.message_id
|
||||
|
||||
|
||||
user, chat, chat_id, message_id, thread_id = (update.message.from_user,
|
||||
update.message.chat,
|
||||
update.message.chat_id,
|
||||
update.message.message_id,
|
||||
update.message.message_thread_id)
|
||||
|
||||
amazon_url_with_referer, original_message = extract_amazon_url_with_referer(msg)
|
||||
original_message = f"\n\nMensaje original: {original_message}" if original_message != '<enlace>' else ''
|
||||
|
||||
@@ -113,8 +121,8 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
||||
with create_selenium_driver(get_chrome_options()) as chrome_driver:
|
||||
logging.info("Loading page for scraping information")
|
||||
page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver)
|
||||
product_data = scrape_data(page_html)
|
||||
|
||||
product_data = scrape_data(page_html)
|
||||
|
||||
if not product_data:
|
||||
logging.info(f"Title not found, not a valid product or captcha")
|
||||
|
||||
@@ -124,26 +132,36 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
||||
apply_captcha(captcha_solution, chrome_driver)
|
||||
|
||||
logging.info("Re-loading Amazon webpage")
|
||||
page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver, screenshot_type="_aftercaptcha")
|
||||
page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver,
|
||||
screenshot_type="_aftercaptcha")
|
||||
product_data = scrape_data(page_html)
|
||||
else:
|
||||
logging.info("Couldn't solve the captcha, if there was any")
|
||||
|
||||
if not product_data: #if after applying the captcha we don't have any data yet, stop the execution and reply to the user
|
||||
if not product_data: # if after applying the captcha we don't have any data yet, stop the execution and reply to the user
|
||||
logging.info("Unable to get the product information")
|
||||
await context.bot.send_message(chat_id=chat_id, text="Unable to get product attributes from the provided url", reply_to_message_id=message_id)
|
||||
await context.bot.send_message(chat_id=chat_id,
|
||||
text="Unable to get product attributes from the provided url",
|
||||
reply_to_message_id=message_id,
|
||||
message_thread_id=thread_id)
|
||||
return
|
||||
|
||||
|
||||
logging.info(f"Product information found: {product_data}")
|
||||
|
||||
await context.bot.deleteMessage(chat_id=chat_id, message_id=message_id)
|
||||
product_id = dbhelper.check_product(amazon_url_with_referer, product_data.price)
|
||||
if not product_id:
|
||||
product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat_id, msg, amazon_url_with_referer, product_data.title, product_data.price, product_data.image)
|
||||
product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat_id, msg,
|
||||
amazon_url_with_referer, product_data.title, product_data.price,
|
||||
product_data.image)
|
||||
helpers.create_image(product_id, product_data.price)
|
||||
keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{amazon_url_with_referer}")]]
|
||||
markup = InlineKeyboardMarkup(keyboard)
|
||||
await context.bot.send_photo(chat_id=chat_id, photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'), caption=f"URL enviada por @{user['username']}: \n\n{product_data.title}{original_message}", reply_markup=markup)
|
||||
await context.bot.send_photo(chat_id=chat_id,
|
||||
photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'),
|
||||
caption=f"URL enviada por @{user['username']}: \n\n{product_data.title}{original_message}",
|
||||
reply_markup=markup,
|
||||
message_thread_id=thread_id)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
@@ -151,13 +169,16 @@ def main() -> None:
|
||||
|
||||
if constants.telegram_proxy:
|
||||
logging.info("Creating application with socks5 proxy")
|
||||
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(constants.TELEGRAM_API_KEY).proxy(constants.proxy_url).get_updates_proxy(constants.proxy_url).build()
|
||||
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(
|
||||
constants.TELEGRAM_API_KEY).proxy(constants.proxy_url).get_updates_proxy(constants.proxy_url).build()
|
||||
else:
|
||||
logging.info("Creating application without socks5 proxy")
|
||||
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(constants.TELEGRAM_API_KEY).build()
|
||||
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(
|
||||
constants.TELEGRAM_API_KEY).build()
|
||||
|
||||
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, find_amazon_link))
|
||||
application.run_polling()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user