Dockerization, whitenoise serving static, refactor

This commit is contained in:
Luciano Gervasoni
2025-04-04 10:53:16 +02:00
parent 5addfa5ba9
commit 4dbe2e55ef
39 changed files with 708 additions and 1238 deletions

View File

@@ -0,0 +1,57 @@
from .db_utils import DB_Handler
from ..models import Search
from django.db.models import Q
import traceback
import time
import os
from .fetch_search_instances import ListSearchInstances
from .logger import get_logger
logger = get_logger()
class FetchSearcher():
def __init__(self) -> None:
logger.debug("Initializing Fetcher Searcher")
def run(self):
try:
logger.debug("Starting FetchSearcher.run()")
# Get search objects of interest
list_search_obj = Search.objects.filter(Q(type=Search.TYPE_ENUM.URL_HOST) | Q(type=Search.TYPE_ENUM.KEYWORD_SEARCH))
logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj]))
# Search
for obj_search in list_search_obj:
# TODO: language & country customization
# Search
keyword_search = "{}{}".format("site:" if obj_search.type == Search.TYPE_ENUM.URL_HOST else "", obj_search.search)
if (obj_search.type == Search.TYPE_ENUM.KEYWORD_SEARCH):
# Add search with intitle keyword
# TODO: allintitle: "child abuse"
# TODO: intitle: "child abuse"
pass
# language, country = obj_search.language_country.split("-")
logger.debug("Starting keyword search: {}".format(keyword_search))
logger.debug("Search type: {}".format(obj_search.type))
# DB writer
db_writer = DB_Handler()
# Keyword arguments
args = {
"language": "en",
"country": "US",
# "period": ["7d", "1d"], # TODO: List of periods to iterate
}
for SearchInstance in ListSearchInstances:
# Sleep between requests, avoid too many requests...
time.sleep(int(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5)))
SearchInstance(args).fetch_articles(db_writer, obj_search)
# TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master
except Exception as e:
logger.warning("Exception in FetchSearcher.run(): {}\n{}".format(e, traceback.format_exc()))