Days handling URLs visualization, exception handling url_processor
This commit is contained in:
@@ -101,20 +101,22 @@ class DB_Handler():
|
||||
try:
|
||||
# Get data
|
||||
dict_url_data = process_url(obj_url.url)
|
||||
# Not none or handle as exception
|
||||
assert(dict_url_data is not None)
|
||||
except Exception as e:
|
||||
if (raise_exception_on_error):
|
||||
# Simply raise exception
|
||||
raise Exception("Error processing URL")
|
||||
# Simply raise exception, handled in a different way
|
||||
raise Exception("Error processing URL, raising exception as expected")
|
||||
else:
|
||||
logger.debug("Error processing URL: {}\n{}\n{}".format(obj_url.url, str(e), traceback.format_exc()))
|
||||
# Set status to error
|
||||
logger.debug("Error processing URL: {}\n{}\n".format(obj_url.url, str(e), traceback.format_exc()))
|
||||
# Update status
|
||||
set_status(obj_url, Urls.STATUS_ENUM.ERROR)
|
||||
# Next URL
|
||||
return
|
||||
|
||||
dict_url_data = None
|
||||
|
||||
# (dict_url_data is None) or (Exception while processing URL) ? -> Error status
|
||||
if (dict_url_data is None):
|
||||
# Update status
|
||||
set_status(obj_url, Urls.STATUS_ENUM.ERROR)
|
||||
# Next URL
|
||||
return
|
||||
|
||||
# Invalid? e.g. binary data
|
||||
if (dict_url_data.get("override_status") == "invalid"):
|
||||
# Update status
|
||||
|
||||
@@ -52,17 +52,17 @@ def process_url(url):
|
||||
# Too many requests? Cool down...
|
||||
if ("Status code 429" in str(e.args)):
|
||||
# TODO: cool down and retry once?, proxy/VPN, ...
|
||||
logger.debug("TODO: Implement code 429")
|
||||
logger.debug("TODO: process_url Implement code 429")
|
||||
# Unavailable for legal reasons
|
||||
if ("Status code 451" in str(e.args)):
|
||||
# TODO: Bypass with VPN
|
||||
logger.debug("TODO: Implement code 451")
|
||||
logger.debug("TODO: process_url Implement code 451")
|
||||
# CloudFlare protection?
|
||||
if ("Website protected with Cloudflare" in str(e.args)):
|
||||
logger.debug("TODO: Implement bypass CloudFlare")
|
||||
logger.debug("TODO: process_url Implement bypass CloudFlare")
|
||||
# PerimeterX protection?
|
||||
if ("Website protected with PerimeterX" in str(e.args)):
|
||||
logger.debug("TODO: Implement bypass PerimeterX")
|
||||
logger.debug("TODO: process_url Implement bypass PerimeterX")
|
||||
|
||||
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args)))
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user