Selenium based fetch of different sources
This commit is contained in:
42
app_urls/fetcher/src/fetch_selenium.py
Normal file
42
app_urls/fetcher/src/fetch_selenium.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from .db_utils import DB_Handler
|
||||
from ..models import Search, Source
|
||||
import traceback
|
||||
import requests
|
||||
import os
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
class FetchSeleniumSourceSearch():
|
||||
def __init__(self) -> None:
|
||||
logger.debug("Initializing Selenium Source Search")
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
logger.debug("Starting FetchSeleniumSourceSearch.run()")
|
||||
|
||||
# Get keyword searches
|
||||
list_keyword_search = Search.objects.filter(type=Search.TYPE_ENUM.KEYWORD_SEARCH)
|
||||
logger.debug("Fetching news Selenium based for keyword searches: {}".format([e.search for e in list_keyword_search]))
|
||||
|
||||
# Run selenium search for each keyword search
|
||||
for obj_search in list_keyword_search:
|
||||
try:
|
||||
# Selenium fetching endpoint
|
||||
selenium_fetch_endpoint = os.path.join(os.getenv("SELENIUM_ENDPOINT", "http://localhost:80"), "fetch_search/")
|
||||
data = {"search": obj_search.search}
|
||||
# POST
|
||||
r = requests.post(selenium_fetch_endpoint, json=data, timeout=900)
|
||||
# Jsonify
|
||||
results = r.json()
|
||||
logger.debug("Selenium results for URL {}: {}".format(obj_search.search, str(results)))
|
||||
|
||||
for source, urls_fetched in results.items():
|
||||
# Get source object
|
||||
obj_source, created = Source.objects.get_or_create(source="selenium {}".format(source))
|
||||
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
|
||||
except Exception as e:
|
||||
logger.warning("Exception while fetching selenium search: {}\n{}".format(obj_search.search, str(e)))
|
||||
except Exception as e:
|
||||
logger.warning("Exception in FetchSeleniumSourceSearch.run(): {}\n{}".format(e, traceback.format_exc()))
|
||||
Reference in New Issue
Block a user