Bypass paywall feature
This commit is contained in:
@@ -82,7 +82,7 @@ class DB_Handler():
|
||||
logger.warning("Exception inserting raw URLs: {}\n{}".format(e, traceback.format_exc()))
|
||||
|
||||
|
||||
def _process_single_url(self, obj_url, status_pattern_match, raise_exception_on_error, override_url=None):
|
||||
def _process_single_url(self, obj_url, status_pattern_match, raise_exception_on_error, paywall_bypass=False):
|
||||
|
||||
def set_status(obj_url, status):
|
||||
# Update status if setting a new value
|
||||
@@ -100,13 +100,8 @@ class DB_Handler():
|
||||
return
|
||||
|
||||
try:
|
||||
# Override URL for request?
|
||||
if (override_url is not None):
|
||||
url_of_interest = override_url
|
||||
else:
|
||||
url_of_interest = obj_url.url
|
||||
# Extract URL content
|
||||
dict_url_data = process_url(url_of_interest)
|
||||
dict_url_data = process_url(obj_url.url, paywall_bypass)
|
||||
except Exception as e:
|
||||
if (raise_exception_on_error):
|
||||
# Simply raise exception, handled in a different way
|
||||
@@ -234,9 +229,8 @@ class DB_Handler():
|
||||
continue
|
||||
|
||||
try:
|
||||
# Process URL
|
||||
override_url = "https://marreta.pcdomanual.com/p/{}".format(obj_url.url)
|
||||
self._process_single_url(obj_url, status_pattern_match=None, raise_exception_on_error=True, override_url=override_url)
|
||||
# Process URL, try bypassing paywall
|
||||
self._process_single_url(obj_url, status_pattern_match=None, raise_exception_on_error=True, paywall_bypass=True)
|
||||
num_urls_processed += 1
|
||||
except Exception as e:
|
||||
# Error, cache to avoid re-processing for X time
|
||||
|
||||
Reference in New Issue
Block a user