Files
wallamanta/wallamanta/worker.py
Joan Cano 53004cb3d1 -
2023-03-01 22:19:10 +01:00

136 lines
6.5 KiB
Python

import time
import requests
import telegram
import os
import logging
import json
TELEGRAM_CHANNEL_ID = os.getenv("TELEGRAM_CHANNEL_ID")
TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
LATITUDE = os.getenv("LATITUDE")
LONGITUDE = os.getenv("LONGITUDE")
SLEEP_TIME = int(os.getenv("SLEEP_TIME"))
# Enable logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
)
logger = logging.getLogger(__name__)
class Worker:
def request(self, product_name, n_articles, latitude=LATITUDE, longitude=LONGITUDE, distance='0', condition='all', min_price=0, max_price=10000000):
url = (f"http://api.wallapop.com/api/v3/general/search?keywords={product_name}"
f"&order_by=newest&latitude={latitude}"
f"&longitude={longitude}"
f"&distance={distance}"
f"&min_sale_price={min_price}"
f"&max_sale_price={max_price}"
f"&filters_source=quick_filters&language=es_ES")
if condition != "all":
url = url + f"&condition={condition}" # new, as_good_as_new, good, fair, has_given_it_all
while True:
response = requests.get(url)
try:
if response.status_code == 200:
break
else:
logging.info(f"\'{product_name}\' -> Wallapop returned status {response.status_code}. Illegal parameters or Wallapop service is down. Retrying...")
except Exception as e:
logging.info("Exception: " + e)
time.sleep(3)
json_data = response.json()
return json_data['search_objects']
def first_run(self, product):
list = []
articles = self.request(product['product_name'], 0, product['latitude'], product['longitude'], product['distance'], product['condition'], product['min_price'], product['max_price'])
for article in articles:
list.insert(0, article['id'])
return list
def work(self, product, list):
exec_times = []
bot = telegram.Bot(token = TELEGRAM_TOKEN)
while True:
f = open("products.json")
products = json.load(f)
found = False
for fproduct in products:
if fproduct['product_name'] == product['product_name']:
found = True
break
if not found:
break # Exits worker if product not in list anymore
start_time = time.time()
articles = self.request(product['product_name'], 0, product['latitude'], product['longitude'], product['distance'], product['condition'], product['min_price'], product['max_price'])
for article in articles:
if not article['id'] in list:
logging.info("Found article {}".format(article['title']))
try:
if not self.has_excluded_words(article['title'].lower(), article['description'].lower(), product['exclude']) and not self.is_title_key_word_excluded(article['title'].lower(), product['title_keyword_exclude']):
try:
text = f"*Artículo*: {article['title']}\n*Descripción*: {article['description']}\n*Precio*: {article['price']} {article['currency']}\n[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})".replace(".", "\.")
url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage?chat_id={TELEGRAM_CHANNEL_ID}&text={text}&parse_mode=MarkdownV2"
logging.info(requests.get(url).json())
except:
text = f"*Artículo*: {article['title']}\n*Descripción*: {article['description']}\n*Precio*: {article['price']} {article['currency']}\n[Ir al anuncio](https://es.wallapop.com/item/{article['web_slug']})".replace(".", "\.")
url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage?chat_id={TELEGRAM_CHANNEL_ID}&text={text}&parse_mode=MarkdownV2"
requests.get(url)
time.sleep(1) # Avoid Telegram flood restriction
list.insert(0, article['id'])
except Exception as e:
logging.info("---------- EXCEPTION -----------")
logging.info(f"{product['product_name']} worker crashed. {e}")
logging.info(f"{product['product_name']}: Trying to parse {article['id']}: {article['title']} .\n")
time.sleep(SLEEP_TIME)
exec_times.append(time.time() - start_time)
logging.info(f"\'{product['product_name']}\' node-> last: {exec_times[-1]} max: {self.get_max_time(exec_times)} avg: {self.get_average_time(exec_times)}")
def has_excluded_words(self, title, description, excluded_words):
for word in excluded_words:
logging.info("EXCLUDER: Checking '" + word + "' for title: '" + title)
if word in title or word in description:
logging.info("EXCLUDE!")
return True
return False
def is_title_key_word_excluded(self, title, excluded_words):
for word in excluded_words:
logging.info("Checking '" + word + "' for title: '" + title)
if word in title:
return True
return False
def get_average_time(self, exec_times):
sum = 0
for i in exec_times:
sum = sum + i
return sum / len(exec_times)
def get_max_time(self, exec_times):
largest = 0
for i in exec_times:
if i > largest:
largest = i
return largest
def run(product):
worker = Worker()
list = worker.first_run(product)
while True:
try:
logging.info(f"Wallapop monitor worker started. Checking for new items containing: \'{product['product_name']}\' with given parameters periodically")
worker.work(product, list)
break
except Exception as e:
logging.info(f"Exception: {e}")
logging.info(f"{product['product_name']} worker crashed. Restarting worker...")
time.sleep(10)
logging.info(f"Wallapop monitor worker stopped for: \'{product['product_name']}\'")