Dockerization, whitenoise serving static, refactor
This commit is contained in:
57
app_urls/fetcher/src/fetch_search.py
Normal file
57
app_urls/fetcher/src/fetch_search.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from .db_utils import DB_Handler
|
||||
from ..models import Search
|
||||
from django.db.models import Q
|
||||
import traceback
|
||||
import time
|
||||
import os
|
||||
from .fetch_search_instances import ListSearchInstances
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
class FetchSearcher():
|
||||
def __init__(self) -> None:
|
||||
logger.debug("Initializing Fetcher Searcher")
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
logger.debug("Starting FetchSearcher.run()")
|
||||
|
||||
# Get search objects of interest
|
||||
list_search_obj = Search.objects.filter(Q(type=Search.TYPE_ENUM.URL_HOST) | Q(type=Search.TYPE_ENUM.KEYWORD_SEARCH))
|
||||
logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj]))
|
||||
|
||||
# Search
|
||||
for obj_search in list_search_obj:
|
||||
# TODO: language & country customization
|
||||
|
||||
# Search
|
||||
keyword_search = "{}{}".format("site:" if obj_search.type == Search.TYPE_ENUM.URL_HOST else "", obj_search.search)
|
||||
|
||||
if (obj_search.type == Search.TYPE_ENUM.KEYWORD_SEARCH):
|
||||
# Add search with intitle keyword
|
||||
# TODO: allintitle: "child abuse"
|
||||
# TODO: intitle: "child abuse"
|
||||
pass
|
||||
# language, country = obj_search.language_country.split("-")
|
||||
|
||||
logger.debug("Starting keyword search: {}".format(keyword_search))
|
||||
logger.debug("Search type: {}".format(obj_search.type))
|
||||
|
||||
# DB writer
|
||||
db_writer = DB_Handler()
|
||||
|
||||
# Keyword arguments
|
||||
args = {
|
||||
"language": "en",
|
||||
"country": "US",
|
||||
# "period": ["7d", "1d"], # TODO: List of periods to iterate
|
||||
}
|
||||
|
||||
for SearchInstance in ListSearchInstances:
|
||||
# Sleep between requests, avoid too many requests...
|
||||
time.sleep(int(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5)))
|
||||
SearchInstance(args).fetch_articles(db_writer, obj_search)
|
||||
|
||||
# TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master
|
||||
except Exception as e:
|
||||
logger.warning("Exception in FetchSearcher.run(): {}\n{}".format(e, traceback.format_exc()))
|
||||
Reference in New Issue
Block a user