Add support for groups with threads by including the thread_id into the bot requests

This commit is contained in:
Alejandro Perez Lopez
2024-04-19 12:44:34 +02:00
parent 23e5cfd878
commit 7751924f21

View File

@@ -1,20 +1,21 @@
import logging
import re
import random
import dbhelper
import constants
import helpers
import product_parser
import re
import time
from amazoncaptcha import AmazonCaptcha
from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CallbackContext
import constants
import dbhelper
import helpers
import product_parser
from amazon_product import AmazonProduct
@@ -57,14 +58,16 @@ def extract_amazon_url_with_referer(text: str):
start = text.find(constants.searchURL)
if start != -1:
text = f"https://{text[start:].split(' ')[0]}"
product_code_regex_result = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',text) # Gets product code in amazon
vendor_and_smid_result = re.findall(r'(?:[&|?]m=[\w]*)|(?:[&|?]smid=[\w]*)', text) # Gets vendor and smid parameter that it seems to be like a variant and keeps it
product_code_regex_result = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',
text) # Gets product code in amazon
vendor_and_smid_result = re.findall(r'(?:[&|?]m=[\w]*)|(?:[&|?]smid=[\w]*)',
text) # Gets vendor and smid parameter that it seems to be like a variant and keeps it
if product_code_regex_result:
product_code = product_code_regex_result.group(0)
if vendor_and_smid_result:
extra_params = (''.join(str(w) for w in vendor_and_smid_result if w is not None))
extra_params = extra_params.replace('?','&')
extra_params = extra_params.replace('?', '&')
return helpers.new_refer_url(product_code, extra_params), original_message
# we return here the original message without modification and None for the URL marking that the url is not valid
@@ -74,7 +77,7 @@ def extract_amazon_url_with_referer(text: str):
def resolve_captcha(driver: webdriver):
logging.info("Trying to resolve captcha")
captcha = AmazonCaptcha.fromdriver(driver)
solution = captcha.solve()
solution = captcha.solve()
return solution if solution != 'Not solved' else None
@@ -92,7 +95,8 @@ def scrape_data(page_html: str) -> AmazonProduct:
soup = BeautifulSoup(page_html, "lxml")
etree_soup = BeautifulSoup(page_html, "html.parser")
title, price, image = product_parser.get_title(soup), product_parser.get_price(soup), product_parser.get_image(soup, etree_soup)
title, price, image = product_parser.get_title(soup), product_parser.get_price(soup), product_parser.get_image(soup,
etree_soup)
return AmazonProduct(title=title, price=price, image=image) if title != "" else None
@@ -102,9 +106,13 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
except AttributeError:
logging.info(f"Received message has no text")
return
user, chat, chat_id, message_id = update.message.from_user, update.message.chat, update.message.chat_id, update.message.message_id
user, chat, chat_id, message_id, thread_id = (update.message.from_user,
update.message.chat,
update.message.chat_id,
update.message.message_id,
update.message.message_thread_id)
amazon_url_with_referer, original_message = extract_amazon_url_with_referer(msg)
original_message = f"\n\nMensaje original: {original_message}" if original_message != '<enlace>' else ''
@@ -113,8 +121,8 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
with create_selenium_driver(get_chrome_options()) as chrome_driver:
logging.info("Loading page for scraping information")
page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver)
product_data = scrape_data(page_html)
product_data = scrape_data(page_html)
if not product_data:
logging.info(f"Title not found, not a valid product or captcha")
@@ -124,26 +132,36 @@ async def find_amazon_link(update: Update, context: CallbackContext) -> None:
apply_captcha(captcha_solution, chrome_driver)
logging.info("Re-loading Amazon webpage")
page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver, screenshot_type="_aftercaptcha")
page_html = load_page(url=amazon_url_with_referer, driver=chrome_driver,
screenshot_type="_aftercaptcha")
product_data = scrape_data(page_html)
else:
logging.info("Couldn't solve the captcha, if there was any")
if not product_data: #if after applying the captcha we don't have any data yet, stop the execution and reply to the user
if not product_data: # if after applying the captcha we don't have any data yet, stop the execution and reply to the user
logging.info("Unable to get the product information")
await context.bot.send_message(chat_id=chat_id, text="Unable to get product attributes from the provided url", reply_to_message_id=message_id)
await context.bot.send_message(chat_id=chat_id,
text="Unable to get product attributes from the provided url",
reply_to_message_id=message_id,
message_thread_id=thread_id)
return
logging.info(f"Product information found: {product_data}")
await context.bot.deleteMessage(chat_id=chat_id, message_id=message_id)
product_id = dbhelper.check_product(amazon_url_with_referer, product_data.price)
if not product_id:
product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat_id, msg, amazon_url_with_referer, product_data.title, product_data.price, product_data.image)
product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat_id, msg,
amazon_url_with_referer, product_data.title, product_data.price,
product_data.image)
helpers.create_image(product_id, product_data.price)
keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{amazon_url_with_referer}")]]
markup = InlineKeyboardMarkup(keyboard)
await context.bot.send_photo(chat_id=chat_id, photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'), caption=f"URL enviada por @{user['username']}: \n\n{product_data.title}{original_message}", reply_markup=markup)
await context.bot.send_photo(chat_id=chat_id,
photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'),
caption=f"URL enviada por @{user['username']}: \n\n{product_data.title}{original_message}",
reply_markup=markup,
message_thread_id=thread_id)
def main() -> None:
@@ -151,13 +169,16 @@ def main() -> None:
if constants.telegram_proxy:
logging.info("Creating application with socks5 proxy")
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(constants.TELEGRAM_API_KEY).proxy(constants.proxy_url).get_updates_proxy(constants.proxy_url).build()
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(
constants.TELEGRAM_API_KEY).proxy(constants.proxy_url).get_updates_proxy(constants.proxy_url).build()
else:
logging.info("Creating application without socks5 proxy")
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(constants.TELEGRAM_API_KEY).build()
application = ApplicationBuilder().get_updates_http_version('1.1').http_version('1.1').token(
constants.TELEGRAM_API_KEY).build()
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, find_amazon_link))
application.run_polling()
if __name__ == '__main__':
main()