URLs view refactor, article exception handling, visualize logs, charts
This commit is contained in:
@@ -62,7 +62,7 @@ def search_gnews(keyword_search, period="1d", language="en", country="US", max_r
|
||||
|
||||
def search_ddg(keyword_search, category="news", timelimit="d", max_results=None, region="wt-wt"):
|
||||
# [source] [category] [period] [language-country] [max_results]
|
||||
source = "ddg {} {} {} max_results={}".format(category, timelimit, region, max_results).replace("None", "").strip()
|
||||
source = "ddg {} {} {} max_results={}".format(category, timelimit, region, max_results).replace("max_results=None", "").strip()
|
||||
logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
|
||||
|
||||
# region="{}-{}".format(langauge, country.lower())
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import logging
|
||||
|
||||
import os
|
||||
|
||||
''' TODO: PATH LOGS
|
||||
PATH_LOGS_ERROR=logs/log_app_fetcher_error.log
|
||||
PATH_LOGS=logs/log_app_fetcher.log
|
||||
'''
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')
|
||||
|
||||
@@ -50,21 +50,21 @@ def process_url(url):
|
||||
except newspaper.ArticleException as e:
|
||||
|
||||
# Too many requests? Cool down...
|
||||
if ("Status code 429" in str(e)):
|
||||
if ("Status code 429" in str(e.args)):
|
||||
# TODO: cool down and retry once?, proxy/VPN, ...
|
||||
logger.debug("TODO: Implement code 429")
|
||||
# Unavailable for legal reasons
|
||||
if ("Status code 451" in str(e)):
|
||||
if ("Status code 451" in str(e.args)):
|
||||
# TODO: Bypass with VPN
|
||||
logger.debug("TODO: Implement code 451")
|
||||
# CloudFlare protection?
|
||||
if ("Website protected with Cloudflare" in str(e)):
|
||||
if ("Website protected with Cloudflare" in str(e.args)):
|
||||
logger.debug("TODO: Implement bypass CloudFlare")
|
||||
# PerimeterX protection?
|
||||
if ("Website protected with PerimeterX" in str(e)):
|
||||
if ("Website protected with PerimeterX" in str(e.args)):
|
||||
logger.debug("TODO: Implement bypass PerimeterX")
|
||||
|
||||
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e)))
|
||||
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args)))
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("Exception for input URL {}\n{}".format(url, str(e)))
|
||||
|
||||
Reference in New Issue
Block a user