Url bypass handle error case
This commit is contained in:
@@ -42,9 +42,9 @@ def process_url(url, paywall_bypass=False):
|
|||||||
|
|
||||||
if (paywall_bypass):
|
if (paywall_bypass):
|
||||||
# TODO: Implement self-hosted instance
|
# TODO: Implement self-hosted instance
|
||||||
url_paywall_bypass_base = "https://marreta.pcdomanual.com/p/"
|
url_paywall_bypass_base = "https://marreta.pcdomanual.com"
|
||||||
# Override URL for request
|
# Override URL for request
|
||||||
url_of_interest = os.path.join(url_paywall_bypass_base, url)
|
url_of_interest = os.path.join(url_paywall_bypass_base, "p", url)
|
||||||
else:
|
else:
|
||||||
url_of_interest = url
|
url_of_interest = url
|
||||||
|
|
||||||
@@ -84,10 +84,11 @@ def process_url(url, paywall_bypass=False):
|
|||||||
return {"override_status": "invalid"}
|
return {"override_status": "invalid"}
|
||||||
|
|
||||||
if (paywall_bypass):
|
if (paywall_bypass):
|
||||||
# Canonical link is paywall bypass URL? -> Invalid
|
# Canonical link contains URL of paywall bypass? Unsuccessful bypassing -> Error / Unknown
|
||||||
if (url_paywall_bypass_base in article.canonical_link):
|
if (url_paywall_bypass_base in article.canonical_link):
|
||||||
logger.debug("Invalid URL found: {}".format(url))
|
logger.debug("Paywall bypass not successful for URL: {}".format(url))
|
||||||
return {"override_status": "invalid"}
|
# return {"override_status": "unknown"}
|
||||||
|
return None
|
||||||
|
|
||||||
# Valid URL? -> Update source URL
|
# Valid URL? -> Update source URL
|
||||||
scheme = newspaper.urls.get_scheme(url)
|
scheme = newspaper.urls.get_scheme(url)
|
||||||
|
|||||||
Reference in New Issue
Block a user