Selenium logs, on delete cascade, tasks timeout, parser url host requirement
This commit is contained in:
@@ -10,6 +10,16 @@ class FetchParser():
|
||||
def __init__(self) -> None:
|
||||
logger.debug("Initializing Fetcher Parser")
|
||||
|
||||
def _post_process_urls(self, raw_urls, obj_search):
|
||||
# Searching URL Host based? Make sure results belong to that site
|
||||
if (obj_search.type == Search.TYPE_ENUM.URL_HOST):
|
||||
# Get clean URL host
|
||||
url_host_clean = obj_search.search.replace("www.", "").replace("http://", "").replace("https://", "")
|
||||
# Ensure URL host in URL
|
||||
raw_urls = [u for u in raw_urls if url_host_clean in u]
|
||||
|
||||
return raw_urls
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
logger.debug("Starting FetchParser.run() for {}")
|
||||
@@ -39,6 +49,9 @@ class FetchParser():
|
||||
except Exception as e:
|
||||
logger.warning("Exception while parsing input URL {}\n{}".format(url_host_protocol, str(e)))
|
||||
urls_fetched = []
|
||||
|
||||
# Post process URLs
|
||||
urls_fetched = self._post_process_urls(urls_fetched, obj_search)
|
||||
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
|
||||
|
||||
Reference in New Issue
Block a user