diff --git a/bot/bot.py b/bot/bot.py index fc368a1..be99ed3 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -22,8 +22,8 @@ def get_chrome_options(): chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_prefs = {} - chrome_options.experimental_options["prefs"] = chrome_prefs chrome_prefs["profile.default_content_settings"] = {"images": 2} + chrome_options.experimental_options["prefs"] = chrome_prefs return chrome_options def find_amazon_link(update: Update, context: CallbackContext) -> None: @@ -34,33 +34,22 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None: except AttributeError: logging.info(f"Received message has no text") return - start = msg.find("amzn.to") - if start != -1: - msg = helpers.unshort_url(msg[start:].split()[0]) - start = msg.find("amzn.eu") - if start != -1: - msg = helpers.unshort_url(msg[start:].split()[0]) - start = msg.find(constants.searchURL) - if start != -1: - msg = f"https://{msg[start:].split(' ')[0]}" - m = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',msg) # Gets product code in amazon - m_e = re.search(r'(?:&m=[\w]*)',msg) # Gets vendor and keeps it - if m != None: - productCode = m.group(0) - if m_e != None: - extraparams = m_e.group(0) + + user, chat, message_id = update.message.from_user, update.message.chat, update.message.message_id + logging.info(f"Link sent by {user} - {msg}") - user, chat, message_id = update.message.from_user, update.message.chat, update.message.message_id - logging.info(f"Link sent by {user} - {msg}") + amazon_url_with_referer = extract_amazon_url_with_referer(msg) - referurl = helpers.new_refer_url(productCode, extraparams) + if amazon_url_with_referer: logging.info("Setting up new Chrome Browser") driver = webdriver.Chrome(options=get_chrome_options()) driver.set_window_size(random.randint(1200, 1800), random.randint(600, 1000)) logging.info("Loading Amazon webpage") - driver.get(referurl) + + driver.get(amazon_url_with_referer) driver.save_screenshot('/app/data/last_screenshot.png') logging.info("Scraping information and closing browser") + soup = BeautifulSoup(driver.page_source, "lxml") etree_soup = BeautifulSoup(driver.page_source, "html.parser") # DEBUG @@ -82,7 +71,7 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None: fill_captcha_element.send_keys(solution) fill_captcha_element.send_keys(Keys.RETURN) logging.info("Re-loading Amazon webpage") - driver.get(msg) + driver.get(amazon_url_with_referer) logging.info("Scraping information") soup = BeautifulSoup(driver.page_source, "lxml") etree_soup = BeautifulSoup(driver.page_source, "html.parser") @@ -97,14 +86,37 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None: driver.close() context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id) - product_id = dbhelper.check_product(referurl, price) + product_id = dbhelper.check_product(amazon_url_with_referer, price) if not product_id: - product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat['id'], msg, referurl, title, price, image) + product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat['id'], msg, amazon_url_with_referer, title, price, image) helpers.create_image(product_id, price) - keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{referurl}")]] + keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{amazon_url_with_referer}")]] markup = InlineKeyboardMarkup(keyboard) context.bot.send_photo(chat_id=update.message.chat_id, photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'), caption=f"URL enviada por @{user['username']}: \n\n{title}", reply_markup=markup) +def extract_amazon_url_with_referer(text: str): + + start = text.find("amzn.to") + if start != -1: + text = helpers.unshort_url(text[start:].split()[0]) + start = text.find("amzn.eu") + if start != -1: + text = helpers.unshort_url(text[start:].split()[0]) + start = text.find(constants.searchURL) + if start != -1: + text = f"https://{text[start:].split(' ')[0]}" + m = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',text) # Gets product code in amazon + m_e = re.search(r'(?:&m=[\w]*)',text) # Gets vendor and keeps it + if m != None: + productCode = m.group(0) + if m_e != None: + extraparams = m_e.group(0) + + return helpers.new_refer_url(productCode, extraparams) + + return None + + def main() -> None: dbhelper.setup_db()