Refactoring fetcher, working feeds and raw url writer
This commit is contained in:
36
app_urls/api/obsolete_src/missing_kids_fetch.py
Normal file
36
app_urls/api/obsolete_src/missing_kids_fetch.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from .db_utils import DB_Handler
|
||||
import requests
|
||||
import json
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
class MissingKidsFetch():
|
||||
def __init__(self, db_handler: DB_Handler, num_pages) -> None:
|
||||
logger.debug("Initializing News MissingKids")
|
||||
self.db_handler = db_handler
|
||||
self.num_pages = num_pages
|
||||
self.missingkids_fetch_endpoint = "http://selenium_app:80/get_missing_kids/?pages={}"
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
logger.debug("Starting NewsMissingKids.run()")
|
||||
try:
|
||||
# Timeout
|
||||
if (self.num_pages > 15):
|
||||
timeout = 60*90 # 1.5h
|
||||
else:
|
||||
timeout = 60*5 # 5 min
|
||||
# Request
|
||||
r = requests.get(self.missingkids_fetch_endpoint.format(self.num_pages), timeout=timeout)
|
||||
# Decode
|
||||
urls_fetched = json.loads(r.text).get("list_urls", [])
|
||||
except Exception as e:
|
||||
logger.warning("Timeout on request: {}. {}".format(missingkids_fetch_endpoint, str(e)))
|
||||
urls_fetched = []
|
||||
|
||||
# URL fetching source
|
||||
source = "missingkids fetcher"
|
||||
# Write to DB
|
||||
self.db_handler.write_batch(urls_fetched, source)
|
||||
except Exception as e:
|
||||
logger.warning("Exception in NewsMissingKids.run(): {}".format(str(e)))
|
||||
Reference in New Issue
Block a user