URLs view refactor, article exception handling, visualize logs, charts

This commit is contained in:
Luciano Gervasoni
2025-03-26 14:28:57 +01:00
parent 9d2550b374
commit e1f4787119
8 changed files with 739 additions and 9 deletions

View File

@@ -62,7 +62,7 @@ def search_gnews(keyword_search, period="1d", language="en", country="US", max_r
def search_ddg(keyword_search, category="news", timelimit="d", max_results=None, region="wt-wt"):
# [source] [category] [period] [language-country] [max_results]
source = "ddg {} {} {} max_results={}".format(category, timelimit, region, max_results).replace("None", "").strip()
source = "ddg {} {} {} max_results={}".format(category, timelimit, region, max_results).replace("max_results=None", "").strip()
logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
# region="{}-{}".format(langauge, country.lower())

View File

@@ -1,6 +1,10 @@
import logging
import os
''' TODO: PATH LOGS
PATH_LOGS_ERROR=logs/log_app_fetcher_error.log
PATH_LOGS=logs/log_app_fetcher.log
'''
os.makedirs("logs", exist_ok=True)
logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')

View File

@@ -50,21 +50,21 @@ def process_url(url):
except newspaper.ArticleException as e:
# Too many requests? Cool down...
if ("Status code 429" in str(e)):
if ("Status code 429" in str(e.args)):
# TODO: cool down and retry once?, proxy/VPN, ...
logger.debug("TODO: Implement code 429")
# Unavailable for legal reasons
if ("Status code 451" in str(e)):
if ("Status code 451" in str(e.args)):
# TODO: Bypass with VPN
logger.debug("TODO: Implement code 451")
# CloudFlare protection?
if ("Website protected with Cloudflare" in str(e)):
if ("Website protected with Cloudflare" in str(e.args)):
logger.debug("TODO: Implement bypass CloudFlare")
# PerimeterX protection?
if ("Website protected with PerimeterX" in str(e)):
if ("Website protected with PerimeterX" in str(e.args)):
logger.debug("TODO: Implement bypass PerimeterX")
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e)))
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args)))
return None
except Exception as e:
logger.warning("Exception for input URL {}\n{}".format(url, str(e)))