Docker and deployment to fetcher server

This commit is contained in:
Luciano Gervasoni
2025-06-27 09:14:44 +02:00
parent f659d4adb3
commit 8b689729bf
12 changed files with 148 additions and 222 deletions

View File

@@ -54,6 +54,7 @@ class FetchSearcher():
for SearchInstance in ListSearchInstances:
# Sleep between requests, avoid too many requests...
time.sleep(float(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5)))
# TODO: Random proxy / VPN
SearchInstance(args).fetch_articles(db_writer, obj_search)
# TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master

View File

@@ -1,8 +1,6 @@
import time
import feedparser
import os
from django.utils import timezone
from datetime import timedelta
from ..models import Search, Source
from .fetch_utils_gnews import decode_gnews_urls
from .logger import get_logger

View File

@@ -11,7 +11,7 @@ logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(messa
logger = logging.getLogger("fetcher")
logger.setLevel(logging.DEBUG)
# To file log: INFO / WARNING / ERROR / CRITICAL
# To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL
fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1)
fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
fh.setLevel(logging.DEBUG)

View File

@@ -74,7 +74,7 @@ def process_missing_kids_urls_all(batch_size=None):
logger.info("Task completed: {}".format(task))
@job('default')
def clean_old_url_content(older_than_days=60):
def clean_old_url_content(older_than_days=14):
task = "Clean old URL content"
logger.info("Task triggered: {}".format(task))
DB_Handler().clean_old_url_content(older_than_days=older_than_days)