URL redirect get before newspaper processing
This commit is contained in:
@@ -56,13 +56,13 @@ def process_url(url, paywall_bypass=False):
|
||||
user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
|
||||
|
||||
# Process
|
||||
if ("foxnews.com" in url_of_interest):
|
||||
if ("foxnews.com" in url_of_interest) or ("zerohedge" in url_of_interest):
|
||||
# Request
|
||||
r = requests.get(url, headers={"User-Agent": user_agent})
|
||||
# Raise for error code
|
||||
r.raise_for_status()
|
||||
# Parse
|
||||
article = newspaper.Article(url=url).download(input_html=r.text).parse()
|
||||
article = newspaper.Article(url=r.url).download(input_html=r.text).parse()
|
||||
else:
|
||||
# Config: Fake user agent
|
||||
config = newspaper.configuration.Configuration()
|
||||
|
||||
Reference in New Issue
Block a user