from .db_utils import DB_Handler from ..models import Search, Source import os import requests import json import traceback from .logger import get_logger logger = get_logger() class FetchMissingKids(): def __init__(self) -> None: logger.debug("Initializing Fetcher MissingKids") def run(self, number_pages=-1): try: logger.debug("Starting MissingKids.run(), processing #{} pages".format(number_pages)) # Get source object obj_source, created = Source.objects.get_or_create(source="missingkids.org") # Get search object obj_search, created = Search.objects.get_or_create(search="missingkids.org/poster", type=Search.TYPE_ENUM.URL_HOST) try: # Missing kids fetching endpoint, parameter number of pages to fetch missingkids_fetch_endpoint = os.path.join(os.getenv("SELENIUM_ENDPOINT", "http://localhost:80"), "get_missing_kids/?pages={}".format(number_pages)) # Timeout if (number_pages > 15) or (number_pages == -1): timeout = 60*90 # 1.5h else: timeout = 60*10 # 10 min logger.debug("Running request on MissingKids endpoint: {}".format(missingkids_fetch_endpoint)) # Request r = requests.get(missingkids_fetch_endpoint, timeout=timeout) # Decode urls_fetched = json.loads(r.text).get("list_urls", []) except Exception as e: logger.warning("Timeout on request: {}. {}".format(missingkids_fetch_endpoint, str(e))) urls_fetched = [] # Write to DB DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search) except Exception as e: logger.warning("Exception in MissingKids.run(): {}\n{}".format(e, traceback.format_exc()))