diff --git a/app_urls/api/src/db_utils.py b/app_urls/api/src/db_utils.py index f38391e..d34c075 100644 --- a/app_urls/api/src/db_utils.py +++ b/app_urls/api/src/db_utils.py @@ -101,20 +101,22 @@ class DB_Handler(): try: # Get data dict_url_data = process_url(obj_url.url) - # Not none or handle as exception - assert(dict_url_data is not None) except Exception as e: if (raise_exception_on_error): - # Simply raise exception - raise Exception("Error processing URL") + # Simply raise exception, handled in a different way + raise Exception("Error processing URL, raising exception as expected") else: + logger.debug("Error processing URL: {}\n{}\n{}".format(obj_url.url, str(e), traceback.format_exc())) # Set status to error - logger.debug("Error processing URL: {}\n{}\n".format(obj_url.url, str(e), traceback.format_exc())) - # Update status - set_status(obj_url, Urls.STATUS_ENUM.ERROR) - # Next URL - return - + dict_url_data = None + + # (dict_url_data is None) or (Exception while processing URL) ? -> Error status + if (dict_url_data is None): + # Update status + set_status(obj_url, Urls.STATUS_ENUM.ERROR) + # Next URL + return + # Invalid? e.g. binary data if (dict_url_data.get("override_status") == "invalid"): # Update status diff --git a/app_urls/api/src/url_processor.py b/app_urls/api/src/url_processor.py index 90a575d..56b5a33 100644 --- a/app_urls/api/src/url_processor.py +++ b/app_urls/api/src/url_processor.py @@ -52,17 +52,17 @@ def process_url(url): # Too many requests? Cool down... if ("Status code 429" in str(e.args)): # TODO: cool down and retry once?, proxy/VPN, ... - logger.debug("TODO: Implement code 429") + logger.debug("TODO: process_url Implement code 429") # Unavailable for legal reasons if ("Status code 451" in str(e.args)): # TODO: Bypass with VPN - logger.debug("TODO: Implement code 451") + logger.debug("TODO: process_url Implement code 451") # CloudFlare protection? if ("Website protected with Cloudflare" in str(e.args)): - logger.debug("TODO: Implement bypass CloudFlare") + logger.debug("TODO: process_url Implement bypass CloudFlare") # PerimeterX protection? if ("Website protected with PerimeterX" in str(e.args)): - logger.debug("TODO: Implement bypass PerimeterX") + logger.debug("TODO: process_url Implement bypass PerimeterX") logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args))) return None diff --git a/app_urls/api/templates/filtered_urls.html b/app_urls/api/templates/filtered_urls.html index 9b50d2b..72263ac 100644 --- a/app_urls/api/templates/filtered_urls.html +++ b/app_urls/api/templates/filtered_urls.html @@ -152,11 +152,13 @@ input[type="checkbox"] {