Extract into a separated method the logic for building the referer url
This commit is contained in:
56
bot/bot.py
56
bot/bot.py
@@ -22,8 +22,8 @@ def get_chrome_options():
|
|||||||
chrome_options.add_argument("--no-sandbox")
|
chrome_options.add_argument("--no-sandbox")
|
||||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||||
chrome_prefs = {}
|
chrome_prefs = {}
|
||||||
chrome_options.experimental_options["prefs"] = chrome_prefs
|
|
||||||
chrome_prefs["profile.default_content_settings"] = {"images": 2}
|
chrome_prefs["profile.default_content_settings"] = {"images": 2}
|
||||||
|
chrome_options.experimental_options["prefs"] = chrome_prefs
|
||||||
return chrome_options
|
return chrome_options
|
||||||
|
|
||||||
def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
||||||
@@ -34,33 +34,22 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
logging.info(f"Received message has no text")
|
logging.info(f"Received message has no text")
|
||||||
return
|
return
|
||||||
start = msg.find("amzn.to")
|
|
||||||
if start != -1:
|
|
||||||
msg = helpers.unshort_url(msg[start:].split()[0])
|
|
||||||
start = msg.find("amzn.eu")
|
|
||||||
if start != -1:
|
|
||||||
msg = helpers.unshort_url(msg[start:].split()[0])
|
|
||||||
start = msg.find(constants.searchURL)
|
|
||||||
if start != -1:
|
|
||||||
msg = f"https://{msg[start:].split(' ')[0]}"
|
|
||||||
m = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',msg) # Gets product code in amazon
|
|
||||||
m_e = re.search(r'(?:&m=[\w]*)',msg) # Gets vendor and keeps it
|
|
||||||
if m != None:
|
|
||||||
productCode = m.group(0)
|
|
||||||
if m_e != None:
|
|
||||||
extraparams = m_e.group(0)
|
|
||||||
|
|
||||||
user, chat, message_id = update.message.from_user, update.message.chat, update.message.message_id
|
user, chat, message_id = update.message.from_user, update.message.chat, update.message.message_id
|
||||||
logging.info(f"Link sent by {user} - {msg}")
|
logging.info(f"Link sent by {user} - {msg}")
|
||||||
|
|
||||||
referurl = helpers.new_refer_url(productCode, extraparams)
|
amazon_url_with_referer = extract_amazon_url_with_referer(msg)
|
||||||
|
|
||||||
|
if amazon_url_with_referer:
|
||||||
logging.info("Setting up new Chrome Browser")
|
logging.info("Setting up new Chrome Browser")
|
||||||
driver = webdriver.Chrome(options=get_chrome_options())
|
driver = webdriver.Chrome(options=get_chrome_options())
|
||||||
driver.set_window_size(random.randint(1200, 1800), random.randint(600, 1000))
|
driver.set_window_size(random.randint(1200, 1800), random.randint(600, 1000))
|
||||||
logging.info("Loading Amazon webpage")
|
logging.info("Loading Amazon webpage")
|
||||||
driver.get(referurl)
|
|
||||||
|
driver.get(amazon_url_with_referer)
|
||||||
driver.save_screenshot('/app/data/last_screenshot.png')
|
driver.save_screenshot('/app/data/last_screenshot.png')
|
||||||
logging.info("Scraping information and closing browser")
|
logging.info("Scraping information and closing browser")
|
||||||
|
|
||||||
soup = BeautifulSoup(driver.page_source, "lxml")
|
soup = BeautifulSoup(driver.page_source, "lxml")
|
||||||
etree_soup = BeautifulSoup(driver.page_source, "html.parser")
|
etree_soup = BeautifulSoup(driver.page_source, "html.parser")
|
||||||
# DEBUG
|
# DEBUG
|
||||||
@@ -82,7 +71,7 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
|||||||
fill_captcha_element.send_keys(solution)
|
fill_captcha_element.send_keys(solution)
|
||||||
fill_captcha_element.send_keys(Keys.RETURN)
|
fill_captcha_element.send_keys(Keys.RETURN)
|
||||||
logging.info("Re-loading Amazon webpage")
|
logging.info("Re-loading Amazon webpage")
|
||||||
driver.get(msg)
|
driver.get(amazon_url_with_referer)
|
||||||
logging.info("Scraping information")
|
logging.info("Scraping information")
|
||||||
soup = BeautifulSoup(driver.page_source, "lxml")
|
soup = BeautifulSoup(driver.page_source, "lxml")
|
||||||
etree_soup = BeautifulSoup(driver.page_source, "html.parser")
|
etree_soup = BeautifulSoup(driver.page_source, "html.parser")
|
||||||
@@ -97,14 +86,37 @@ def find_amazon_link(update: Update, context: CallbackContext) -> None:
|
|||||||
driver.close()
|
driver.close()
|
||||||
|
|
||||||
context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id)
|
context.bot.deleteMessage(chat_id=chat['id'], message_id=message_id)
|
||||||
product_id = dbhelper.check_product(referurl, price)
|
product_id = dbhelper.check_product(amazon_url_with_referer, price)
|
||||||
if not product_id:
|
if not product_id:
|
||||||
product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat['id'], msg, referurl, title, price, image)
|
product_id = dbhelper.add_product(user['username'], chat['title'], user['id'], chat['id'], msg, amazon_url_with_referer, title, price, image)
|
||||||
helpers.create_image(product_id, price)
|
helpers.create_image(product_id, price)
|
||||||
keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{referurl}")]]
|
keyboard = [[InlineKeyboardButton("Ir a Amazon", url=f"{amazon_url_with_referer}")]]
|
||||||
markup = InlineKeyboardMarkup(keyboard)
|
markup = InlineKeyboardMarkup(keyboard)
|
||||||
context.bot.send_photo(chat_id=update.message.chat_id, photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'), caption=f"URL enviada por @{user['username']}: \n\n{title}", reply_markup=markup)
|
context.bot.send_photo(chat_id=update.message.chat_id, photo=open(f"/app/data/images/products/{product_id}_composed.png", 'rb'), caption=f"URL enviada por @{user['username']}: \n\n{title}", reply_markup=markup)
|
||||||
|
|
||||||
|
def extract_amazon_url_with_referer(text: str):
|
||||||
|
|
||||||
|
start = text.find("amzn.to")
|
||||||
|
if start != -1:
|
||||||
|
text = helpers.unshort_url(text[start:].split()[0])
|
||||||
|
start = text.find("amzn.eu")
|
||||||
|
if start != -1:
|
||||||
|
text = helpers.unshort_url(text[start:].split()[0])
|
||||||
|
start = text.find(constants.searchURL)
|
||||||
|
if start != -1:
|
||||||
|
text = f"https://{text[start:].split(' ')[0]}"
|
||||||
|
m = re.search(r'(?:dp\/[\w]*)|(?:gp\/product\/[\w]*)',text) # Gets product code in amazon
|
||||||
|
m_e = re.search(r'(?:&m=[\w]*)',text) # Gets vendor and keeps it
|
||||||
|
if m != None:
|
||||||
|
productCode = m.group(0)
|
||||||
|
if m_e != None:
|
||||||
|
extraparams = m_e.group(0)
|
||||||
|
|
||||||
|
return helpers.new_refer_url(productCode, extraparams)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
dbhelper.setup_db()
|
dbhelper.setup_db()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user