from .db_utils import DB_Handler from .utils import get_searxng_instances from .fetch_search_sources import FetcherDuckDuckGo, FetcherGNews, FetcherGoogleNews, FetcherSearxNews, FetcherPreSearch from .logger import get_logger logger = get_logger() class FetchSearcher(): def __init__(self, db_handler: DB_Handler, full=True) -> None: logger.debug("Initializing News feed") self.db_handler = db_handler self.full_search = full def _run_fetching(self, search_text): logger.debug("Starting _run_fetching() for {}".format(search_text)) # Common parameters lang, region = "en", "US" ### PreSearch dict_params_news = {"search": search_text} FetcherPreSearch(**dict_params_news).fetch_articles(self.db_handler) ### DuckDuckGo period = "d" dict_params_news = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "news", "period": period} FetcherDuckDuckGo(**dict_params_news).fetch_articles(self.db_handler) dict_params_general = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "general", "period": period} FetcherDuckDuckGo(**dict_params_general).fetch_articles(self.db_handler) if (self.full_search): # Avoid site:{} search due to G-Bypass required time if ("site:" not in search_text): ### GNews dict_params = {"search": search_text, "lang": "wt", "region": "wt", "period": period} FetcherGNews(**dict_params).fetch_articles(self.db_handler) ### GoogleNews dict_params_news = {"search": search_text, "lang": lang, "region": region, "search_category": "news", "period": period} FetcherGoogleNews(**dict_params_news).fetch_articles(self.db_handler) # dict_params_general = {"search": search_text, "lang": lang, "region": region, "search_category": "general", "period": period} if False: ### SearxNG period = "day" for searx_instance in get_searxng_instances(): dict_params_news = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "news", "period": period} dict_params_general = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "general", "period": period} # Append thread FetcherSearxNews(**dict_params_news).fetch_articles(self.db_handler) FetcherSearxNews(**dict_params_general).fetch_articles(self.db_handler) logger.debug("Finished _run_fetching()") def run(self): try: logger.info("Fetching text searches & URL hosts of interest") # Get text searches of interest list_search_text_of_interest = self.db_handler._get_search_list() # Get URL host of interest list_url_host = self.db_handler._get_url_host_list() # Get text searches for URL hosts list_search_text_url_host = ["site:{}".format(l) for l in list_url_host] for search_text in list_search_text_of_interest + list_search_text_url_host: logger.debug("Fetching news for search: {}".format(search_text)) self._run_fetching(search_text) logger.info("Finished fetching text searches & URL hosts of interest") except Exception as e: logger.warning("Exception in NewsSearch.run(): {}".format(str(e)))