Url bypass handle error case
This commit is contained in:
@@ -42,9 +42,9 @@ def process_url(url, paywall_bypass=False):
|
||||
|
||||
if (paywall_bypass):
|
||||
# TODO: Implement self-hosted instance
|
||||
url_paywall_bypass_base = "https://marreta.pcdomanual.com/p/"
|
||||
url_paywall_bypass_base = "https://marreta.pcdomanual.com"
|
||||
# Override URL for request
|
||||
url_of_interest = os.path.join(url_paywall_bypass_base, url)
|
||||
url_of_interest = os.path.join(url_paywall_bypass_base, "p", url)
|
||||
else:
|
||||
url_of_interest = url
|
||||
|
||||
@@ -84,10 +84,11 @@ def process_url(url, paywall_bypass=False):
|
||||
return {"override_status": "invalid"}
|
||||
|
||||
if (paywall_bypass):
|
||||
# Canonical link is paywall bypass URL? -> Invalid
|
||||
# Canonical link contains URL of paywall bypass? Unsuccessful bypassing -> Error / Unknown
|
||||
if (url_paywall_bypass_base in article.canonical_link):
|
||||
logger.debug("Invalid URL found: {}".format(url))
|
||||
return {"override_status": "invalid"}
|
||||
logger.debug("Paywall bypass not successful for URL: {}".format(url))
|
||||
# return {"override_status": "unknown"}
|
||||
return None
|
||||
|
||||
# Valid URL? -> Update source URL
|
||||
scheme = newspaper.urls.get_scheme(url)
|
||||
|
||||
Reference in New Issue
Block a user