Add support for groups with threads by including the thread_id into the bot requests

This commit is contained in:
Alejandro Perez Lopez
2024-04-19 12:44:34 +02:00
parent 23e5cfd878
commit 7751924f21

View File

@@ -1,20 +1,21 @@
import logging import logging
import re
import random import random
import dbhelper import re
import constants
import helpers
import product_parser
import time import time
from amazoncaptcha import AmazonCaptcha from amazoncaptcha import AmazonCaptcha
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CallbackContext from telegram.ext import ApplicationBuilder, MessageHandler, filters, CallbackContext
import constants
import dbhelper
import helpers
import product_parser
from amazon_product import AmazonProduct from amazon_product import AmazonProduct
@@ -57,13 +58,15 @@ def extract_amazon_url_with_referer(text: str):
start = text.find(constants.searchURL) start = text.find(constants.searchURL)
if start != -1: if start != -1:
text = f"https://{text[start:].split(' ')[0]}" text = f"https://{text[start:].split(' ')[0]}"
product_code_regex_result = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',text) # Gets product code in amazon product_code_regex_result = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',
vendor_and_smid_result = re.findall(r'(?:[&|?]m=[\w]*)|(?:[&|?]smid=[\w]*)', text) # Gets vendor and smid parameter that it seems to be like a variant and keeps it text) # Gets product code in amazon
vendor_and_smid_result = re.findall(r'(?:[&|?]m=[\w]*)|(?:[&|?]smid=[\w]*)',
text) # Gets vendor and smid parameter that it seems to be like a variant and keeps it
if product_code_regex_result: if product_code_regex_result:
product_code = product_code_regex_result.group(0) product_code = product_code_regex_result.group(0)
if vendor_and_smid_result: if vendor_and_smid_result:
extra_params = (''.join(str(w) for w in vendor_and_smid_result if w is not None)) extra_params = (''.join(str(w) for w in vendor_and_smid_result if w is not None))
extra_params = extra_params.replace('?','&') extra_params = extra_params.replace('?', '&')
return helpers.new_refer_url(product_code, extra_params), original_message return helpers.new_refer_url(product_code, extra_params), original_message
@@ -92,7 +95,8 @@ def scrape_data(page_html: str) -> AmazonProduct:
soup = BeautifulSoup(page_html, "lxml") soup = BeautifulSoup(page_html, "lxml")
etree_soup = BeautifulSoup(page_html, "html.parser") etree_soup = BeautifulSoup(page_html, "html.parser")
title, price, image = product_parser.get_title(soup), product_parser.get_price(soup), product_parser.get_image(soup, etree_soup) title, price, image = product_parser.get_title(soup), product_parser.get_price(soup), product_parser.get_image(soup,
etree_soup)
return AmazonProduct(title=title, price=price, image=image) if title != "" else None return AmazonProduct(title=title, price=price, image=image) if title != "" else None
@@ -103,7 +107,11 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
logging.info(f"Received message has no text") logging.info(f"Received message has no text")
return return
user, chat, chat_id, message_id = update.message.from_user, update.message.chat, update.message.chat_id, update.message.message_id user, chat, chat_id, message_id, thread_id = (update.message.from_user,
update.message.chat,
update.message.chat_id,
update.message.message_id,
update.message.message_thread_id)
amazon_url_with_referer, original_message = extract_amazon_url_with_referer(msg) amazon_url_with_referer, original_message = extract_amazon_url_with_referer(msg)
original_message = f"\n\nMensaje original: {original_message}" if original_message != '<enlace>' else '' original_message = f"\n\nMensaje original: {original_message}" if original_message != '<enlace>' else ''
@@ -124,14 +132,18 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
apply_captcha(captcha_solution, chrome_driver) apply_captcha(captcha_solution, chrome_driver)
logging.info("Re-loading Amazon webpage") logging.info("Re-loading Amazon webpage")
page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver, screenshot_type="_aftercaptcha") page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver,
screenshot_type="_aftercaptcha")
product_data = scrape_data(page_html) product_data = scrape_data(page_html)
else: else:
logging.info("Couldn't solve the captcha, if there was any") logging.info("Couldn't solve the captcha, if there was any")
if not product_data: #if after applying the captcha we don't have any data yet, stop the execution and reply to the user if not product_data: # if after applying the captcha we don't have any data yet, stop the execution and reply to the user
logging.info("Unable to get the product information") logging.info("Unable to get the product information")
await context.bot.send_message(chat_id=chat_id, text="Unable to get product attributes from the provided url", reply_to_message_id=message_id) await context.bot.send_message(chat_id=chat_id,
text="Unable to get product attributes from the provided url",
reply_to_message_id=message_id,
message_thread_id=thread_id)
return return
logging.info(f"Product information found: {product_data}") logging.info(f"Product information found: {product_data}")
@@ -139,11 +151,17 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
await context.bot.deleteMessage(chat_id=chat_id, message_id=message_id) await context.bot.deleteMessage(chat_id=chat_id, message_id=message_id)
product_id = dbhelper.check_product(amazon_url_with_referer, product_data.price) product_id = dbhelper.check_product(amazon_url_with_referer, product_data.price)
if not product_id: if not product_id:
product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat_id, msg, amazon_url_with_referer, product_data.title, product_data.price, product_data.image) product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat_id, msg,
amazon_url_with_referer, product_data.title, product_data.price,
product_data.image)
helpers.create_image(product_id, product_data.price) helpers.create_image(product_id, product_data.price)
keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{amazon_url_with_referer}")]] keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{amazon_url_with_referer}")]]
markup = InlineKeyboardMarkup(keyboard) markup = InlineKeyboardMarkup(keyboard)
await context.bot.send_photo(chat_id=chat_id, photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'), caption=f"URL enviada por @{user['username']}: \n\n{product_data.title}{original_message}", reply_markup=markup) await context.bot.send_photo(chat_id=chat_id,
photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'),
caption=f"URL enviada por @{user['username']}: \n\n{product_data.title}{original_message}",
reply_markup=markup,
message_thread_id=thread_id)
def main() -> None: def main() -> None:
@@ -151,13 +169,16 @@ def main() -> None:
if constants.telegram_proxy: if constants.telegram_proxy:
logging.info("Creating application with socks5 proxy") logging.info("Creating application with socks5 proxy")
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(constants.TELEGRAM_API_KEY).proxy(constants.proxy_url).get_updates_proxy(constants.proxy_url).build() application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(
constants.TELEGRAM_API_KEY).proxy(constants.proxy_url).get_updates_proxy(constants.proxy_url).build()
else: else:
logging.info("Creating application without socks5 proxy") logging.info("Creating application without socks5 proxy")
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(constants.TELEGRAM_API_KEY).build() application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(
constants.TELEGRAM_API_KEY).build()
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, find_amazon_link)) application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, find_amazon_link))
application.run_polling() application.run_polling()
if __name__ == '__main__': if __name__ == '__main__':
main() main()