Files
matitos_news/app_urls/api/obsolete_src/fetch_search.py
2025-03-12 17:56:40 +01:00

73 lines
3.6 KiB
Python

from .db_utils import DB_Handler
from .utils import get_searxng_instances
from .fetch_search_sources import FetcherDuckDuckGo, FetcherGNews, FetcherGoogleNews, FetcherSearxNews, FetcherPreSearch
from .logger import get_logger
logger = get_logger()
class FetchSearcher():
def __init__(self, db_handler: DB_Handler, full=True) -> None:
logger.debug("Initializing News feed")
self.db_handler = db_handler
self.full_search = full
def _run_fetching(self, search_text):
logger.debug("Starting _run_fetching() for {}".format(search_text))
# Common parameters
lang, region = "en", "US"
### PreSearch
dict_params_news = {"search": search_text}
FetcherPreSearch(**dict_params_news).fetch_articles(self.db_handler)
### DuckDuckGo
period = "d"
dict_params_news = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "news", "period": period}
FetcherDuckDuckGo(**dict_params_news).fetch_articles(self.db_handler)
dict_params_general = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "general", "period": period}
FetcherDuckDuckGo(**dict_params_general).fetch_articles(self.db_handler)
if (self.full_search):
# Avoid site:{} search due to G-Bypass required time
if ("site:" not in search_text):
### GNews
dict_params = {"search": search_text, "lang": "wt", "region": "wt", "period": period}
FetcherGNews(**dict_params).fetch_articles(self.db_handler)
### GoogleNews
dict_params_news = {"search": search_text, "lang": lang, "region": region, "search_category": "news", "period": period}
FetcherGoogleNews(**dict_params_news).fetch_articles(self.db_handler)
# dict_params_general = {"search": search_text, "lang": lang, "region": region, "search_category": "general", "period": period}
if False:
### SearxNG
period = "day"
for searx_instance in get_searxng_instances():
dict_params_news = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "news", "period": period}
dict_params_general = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "general", "period": period}
# Append thread
FetcherSearxNews(**dict_params_news).fetch_articles(self.db_handler)
FetcherSearxNews(**dict_params_general).fetch_articles(self.db_handler)
logger.debug("Finished _run_fetching()")
def run(self):
try:
logger.info("Fetching text searches & URL hosts of interest")
# Get text searches of interest
list_search_text_of_interest = self.db_handler._get_search_list()
# Get URL host of interest
list_url_host = self.db_handler._get_url_host_list()
# Get text searches for URL hosts
list_search_text_url_host = ["site:{}".format(l) for l in list_url_host]
for search_text in list_search_text_of_interest + list_search_text_url_host:
logger.debug("Fetching news for search: {}".format(search_text))
self._run_fetching(search_text)
logger.info("Finished fetching text searches & URL hosts of interest")
except Exception as e:
logger.warning("Exception in NewsSearch.run(): {}".format(str(e)))