Selenium logs, on delete cascade, tasks timeout, parser url host requirement

This commit is contained in:
Luciano Gervasoni
2025-04-07 09:25:43 +02:00
parent 64d2efd314
commit af3d7e030c
6 changed files with 27 additions and 13 deletions

View File

@@ -10,6 +10,16 @@ class FetchParser():
def __init__(self) -> None:
logger.debug("Initializing Fetcher Parser")
def _post_process_urls(self, raw_urls, obj_search):
# Searching URL Host based? Make sure results belong to that site
if (obj_search.type == Search.TYPE_ENUM.URL_HOST):
# Get clean URL host
url_host_clean = obj_search.search.replace("www.", "").replace("http://", "").replace("https://", "")
# Ensure URL host in URL
raw_urls = [u for u in raw_urls if url_host_clean in u]
return raw_urls
def run(self):
try:
logger.debug("Starting FetchParser.run() for {}")
@@ -39,6 +49,9 @@ class FetchParser():
except Exception as e:
logger.warning("Exception while parsing input URL {}\n{}".format(url_host_protocol, str(e)))
urls_fetched = []
# Post process URLs
urls_fetched = self._post_process_urls(urls_fetched, obj_search)
# Write to DB
DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)