Selenium based fetch of different sources

This commit is contained in:
Luciano Gervasoni
2025-07-08 18:18:26 +02:00
parent f729bd1cb2
commit 0cf61026e8
10 changed files with 235 additions and 31 deletions

View File

@@ -0,0 +1,42 @@
from .db_utils import DB_Handler
from ..models import Search, Source
import traceback
import requests
import os
from .logger import get_logger
logger = get_logger()
class FetchSeleniumSourceSearch():
def __init__(self) -> None:
logger.debug("Initializing Selenium Source Search")
def run(self):
try:
logger.debug("Starting FetchSeleniumSourceSearch.run()")
# Get keyword searches
list_keyword_search = Search.objects.filter(type=Search.TYPE_ENUM.KEYWORD_SEARCH)
logger.debug("Fetching news Selenium based for keyword searches: {}".format([e.search for e in list_keyword_search]))
# Run selenium search for each keyword search
for obj_search in list_keyword_search:
try:
# Selenium fetching endpoint
selenium_fetch_endpoint = os.path.join(os.getenv("SELENIUM_ENDPOINT", "http://localhost:80"), "fetch_search/")
data = {"search": obj_search.search}
# POST
r = requests.post(selenium_fetch_endpoint, json=data, timeout=900)
# Jsonify
results = r.json()
logger.debug("Selenium results for URL {}: {}".format(obj_search.search, str(results)))
for source, urls_fetched in results.items():
# Get source object
obj_source, created = Source.objects.get_or_create(source="selenium {}".format(source))
# Write to DB
DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
except Exception as e:
logger.warning("Exception while fetching selenium search: {}\n{}".format(obj_search.search, str(e)))
except Exception as e:
logger.warning("Exception in FetchSeleniumSourceSearch.run(): {}\n{}".format(e, traceback.format_exc()))