diff --git a/app_urls/fetcher/src/db_utils.py b/app_urls/fetcher/src/db_utils.py index 2c8d7fd..18a4618 100644 --- a/app_urls/fetcher/src/db_utils.py +++ b/app_urls/fetcher/src/db_utils.py @@ -148,6 +148,7 @@ class DB_Handler(): try: # Extract URL content dict_url_data = process_url(obj_url.url, paywall_bypass) + logger.debug("Processing raw URL EXTRACT URL CONTENT OK: {}".format(obj_url.url)) except Exception as e: if (raise_exception_on_error): # Simply raise exception, handled in a different way @@ -242,6 +243,7 @@ class DB_Handler(): status_pattern_match = _get_status_pattern_matching(obj_url.url, list_pattern_status_tuple) # Process URL self._process_single_url(obj_url, status_pattern_match, raise_exception_on_error=False) + logger.debug("Processing raw URL OK: {}".format(obj_url.url)) logger.info("Updated #{} raw URLs".format(len(raw_urls))) except Exception as e: diff --git a/app_urls/fetcher/src/fetch_utils_url_processor.py b/app_urls/fetcher/src/fetch_utils_url_processor.py index 8d04b09..086e817 100644 --- a/app_urls/fetcher/src/fetch_utils_url_processor.py +++ b/app_urls/fetcher/src/fetch_utils_url_processor.py @@ -40,6 +40,7 @@ def url_host_slowdown(url, url_host_slowdown_seconds): cache.set("process_{}".format(url_host).encode("utf-8"), time.time(), timeout=60*5) # Expire after 5 minutes def process_url(url, paywall_bypass=False): + logger.debug("Processing raw URL 1: {}".format(url)) if (paywall_bypass): # TODO: Implement self-hosted instance