Added xpath method for obtaining image when normal fails

This commit is contained in:
Joan
2023-07-11 12:56:02 +02:00
parent e3d7db2916
commit d99399f6ac
3 changed files with 23 additions and 6 deletions

View File

@@ -1,4 +1,11 @@
import helpers
import logging
from lxml import etree
# Enable logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
)
def get_title(soup):
try:
@@ -22,11 +29,19 @@ def get_price(soup):
return price
def get_image(soup):
def get_image(soup, etree_soup):
try:
image = soup.find("img", attrs={'id':'landingImage'})
image = image.get('src')
logging.info("Image found")
except AttributeError:
image = "N/A"
try:
logging.info("Couldn't retrieve image, trying with xpath method")
dom = etree.HTML(str(etree_soup))
image = dom.xpath('//*[@id="main-image-container"]/ul/li[4]/span/span/div/img')
image = image[0].get('src')
except AttributeError:
logging.info("Couldn't retrieve image with xpath method, falling back to placeholder")
image = "N/A"
return image