Working fetch search, refactoring DB towards source search

This commit is contained in:
Luciano Gervasoni
2025-03-20 11:42:33 +01:00
parent 83f76232b2
commit 05e17266f1
14 changed files with 558 additions and 120 deletions

View File

@@ -2,6 +2,7 @@ from django_rq import job
from .src.fetch_feed import FetchFeeds
from .src.fetch_parser import FetchParser
from .src.fetch_search import FetchSearcher
from .src.db_utils import DB_Handler
'''
from src.fetch_search import FetchSearcher
@@ -21,16 +22,20 @@ def background_task(process_type: str):
FetchFeeds().run()
elif (process_type == "fetch_parser"):
FetchParser().run()
# TODO: ENCODE BATCH_SIZE IN PROCESS_tYPE..
elif (process_type == "process_raw_urls"):
DB_Handler().process_raw_urls(batch_size=50)
elif (process_type == "process_error_urls"):
DB_Handler().process_error_urls(batch_size=50)
elif (process_type == "process_missing_kids_urls"):
DB_Handler().process_missing_kids_urls(batch_size=50)
elif ("process_missing_kids_urls" in process_type):
elif (process_type == "fetch_search"):
FetchSearcher().run()
#elif (process_type == "fetch_missingkids"):
# FetchMissingKids().run()
elif ("process_" in process_type):
# Batch size encoded in URL
batch_size = int(process_type.split("_")[-1])
DB_Handler().process_missing_kids_urls(batch_size=batch_size)
# Task type
if ("process_raw_urls" in process_type):
DB_Handler().process_raw_urls(batch_size=batch_size)
elif ("process_error_urls" in process_type):
DB_Handler().process_error_urls(batch_size=batch_size)
elif ("process_missing_kids_urls" in process_type):
DB_Handler().process_missing_kids_urls(batch_size=batch_size)
else:
logger.info("Task unknown!: {}".format(process_type))
@@ -47,15 +52,7 @@ def background_task(process_type: str):
MissingKidsFetch(db_handler, num_pages=4).run()
elif (process_type == "fetch_missing_kids_full"):
MissingKidsFetch(db_handler, num_pages=100000).run()
elif (process_type == "update_missing_kids_status_reduced"):
MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=50).update_missing_kids_status()
elif (process_type == "update_missing_kids_status_full"):
MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=None).update_missing_kids_status()
elif (process_type == "update_error_urls"):
UpdateErrorURLs(cred.db_connect_info, cred.redis_connect_info, num_urls=100).update_error_urls_status()
else:
logger.error("Task error, unknown type: {}".format(process_type))
return