Dockerization, whitenoise serving static, refactor
This commit is contained in:
42
app_urls/fetcher/src/fetch_missing_kids.py
Normal file
42
app_urls/fetcher/src/fetch_missing_kids.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from .db_utils import DB_Handler
|
||||
from ..models import Search, Source
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
import traceback
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
class FetchMissingKids():
|
||||
def __init__(self) -> None:
|
||||
logger.debug("Initializing Fetcher MissingKids")
|
||||
|
||||
def run(self, number_pages=-1):
|
||||
try:
|
||||
logger.debug("Starting MissingKids.run(), processing #{} pages".format(number_pages))
|
||||
|
||||
# Get source object
|
||||
obj_source, created = Source.objects.get_or_create(source="missingkids.org")
|
||||
# Get search object
|
||||
obj_search, created = Search.objects.get_or_create(search="missingkids.org/poster", type=Search.TYPE_ENUM.URL_HOST)
|
||||
|
||||
try:
|
||||
# Missing kids fetching endpoint, parameter number of pages to fetch
|
||||
missingkids_fetch_endpoint = os.path.join(os.getenv("SELENIUM_ENDPOINT", "http://localhost:80"), "get_missing_kids/?pages={}".format(number_pages))
|
||||
# Timeout
|
||||
if (number_pages > 15) or (number_pages == -1):
|
||||
timeout = 60*90 # 1.5h
|
||||
else:
|
||||
timeout = 60*10 # 10 min
|
||||
# Request
|
||||
r = requests.get(missingkids_fetch_endpoint, timeout=timeout)
|
||||
# Decode
|
||||
urls_fetched = json.loads(r.text).get("list_urls", [])
|
||||
except Exception as e:
|
||||
logger.warning("Timeout on request: {}. {}".format(missingkids_fetch_endpoint, str(e)))
|
||||
urls_fetched = []
|
||||
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
|
||||
except Exception as e:
|
||||
logger.warning("Exception in MissingKids.run(): {}\n{}".format(e, traceback.format_exc()))
|
||||
Reference in New Issue
Block a user