Refactoring fetcher, working feeds and raw url writer

This commit is contained in:
Luciano Gervasoni
2025-03-12 17:56:40 +01:00
parent e124dbc21a
commit 61c31ee9aa
24 changed files with 2085 additions and 194 deletions

View File

@@ -1,13 +1,65 @@
from django_rq import job
import time
from .src.fetch_feed import FetchFeeds
from .src.db_utils import DB_Handler
'''
from src.fetch_parser import FetchParser
from src.fetch_search import FetchSearcher
from src.missing_kids_fetch import MissingKidsFetch
from src.missing_kids_status import MissingKidsStatus
from src.url_status import UpdateErrorURLs
from src.db_utils import DB_Handler
from src.credentials import db_connect_info, redis_connect_info
# DB Handler
db_handler = DB_Handler(db_connect_info, redis_connect_info)
'''
import logging
logger = logging.getLogger(__name__)
@job
def task_1(message):
logger.info("Message: {}".format(message))
def background_task(process_type: str):
logger.info("Task triggered: {}".format(process_type))
try:
time.sleep(5) # Simulate a long-running task
print(f"Task completed: {message}")
FetchFeeds().run()
# DB_Handler().process_raw_urls()
'''
if (process_type == "fetch_feeds"):
FetchFeeds(db_handler).run()
elif (process_type == "fetch_parser"):
FetchParser(db_handler).run()
elif (process_type == "search") or (process_type == "search_full"):
FetchSearcher(cred.db_connect_info, cred.redis_connect_info, full=True).run()
elif (process_type == "search_reduced"):
FetchSearcher(cred.db_connect_info, cred.redis_connect_info, full=False).run()
# Selenium based
elif (process_type == "fetch_missing_kids_reduced"):
MissingKidsFetch(db_handler, num_pages=4).run()
elif (process_type == "fetch_missing_kids_full"):
MissingKidsFetch(db_handler, num_pages=100000).run()
elif (process_type == "update_missing_kids_status_reduced"):
MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=50).update_missing_kids_status()
elif (process_type == "update_missing_kids_status_full"):
MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=None).update_missing_kids_status()
elif (process_type == "update_error_urls"):
UpdateErrorURLs(cred.db_connect_info, cred.redis_connect_info, num_urls=100).update_error_urls_status()
else:
logger.error("Task error, unknown type: {}".format(process_type))
return
'''
logger.info("Task completed: {}".format(process_type))
except Exception as e:
logger.error(e)