Workers light,default,heavy

This commit is contained in:
Luciano Gervasoni
2025-09-08 12:34:47 +02:00
parent 892fb984d1
commit 2ed86e31ec
4 changed files with 48 additions and 31 deletions

View File

@@ -100,7 +100,7 @@ class DB_Handler():
# URLs duplciate association
UrlsDuplicate.objects.get_or_create(id_url_canonical=obj_url_canonical, id_url_duplicated=obj_url)
def _process_single_url(self, obj_url, status_pattern_match, raise_exception_on_error, paywall_bypass=False):
def _process_single_url(self, obj_url, status_pattern_match, raise_exception_on_error, paywall_bypass=False, request_timeout=15):
##########################################################################
# URL pattern: missingkids.org/poster OR missingkids.org/new-poster
if ("missingkids.org" in obj_url.url) and ("poster" in obj_url.url):
@@ -147,8 +147,7 @@ class DB_Handler():
try:
# Extract URL content
dict_url_data = process_url(obj_url.url, paywall_bypass)
logger.debug("Processing raw URL EXTRACT URL CONTENT OK: {}".format(obj_url.url))
dict_url_data = process_url(obj_url.url, paywall_bypass, request_timeout)
except Exception as e:
if (raise_exception_on_error):
# Simply raise exception, handled in a different way
@@ -238,12 +237,10 @@ class DB_Handler():
# Per URL
for obj_url in raw_urls:
logger.debug("Processing raw URL: {}".format(obj_url.url))
# Override status if pattern matching?
status_pattern_match = _get_status_pattern_matching(obj_url.url, list_pattern_status_tuple)
# Process URL
self._process_single_url(obj_url, status_pattern_match, raise_exception_on_error=False)
logger.debug("Processing raw URL OK: {}".format(obj_url.url))
logger.info("Updated #{} raw URLs".format(len(raw_urls)))
except Exception as e: