From cb621c9d6b73e481c6896db4f299ef71d647cc8d Mon Sep 17 00:00:00 2001 From: Luciano Gervasoni Date: Thu, 17 Jul 2025 22:29:06 +0200 Subject: [PATCH] Switching to django celery for workers --- app_urls/README.md | 11 + app_urls/core/__init__.py | 3 + app_urls/core/celery.py | 14 + app_urls/core/settings.py | 42 +- app_urls/core/urls.py | 1 - app_urls/fetcher/src/logger.py | 29 +- app_urls/fetcher/tasks.py | 23 +- app_urls/fetcher/urls.py | 2 - app_urls/fetcher/views.py | 2 +- app_urls/fetcher/views_base.py | 11 +- app_urls/initialize.sh | 3 +- app_urls/requirements.txt | 2 +- app_urls/run.sh | 9 +- app_urls/scheduled_tasks.json | 730 ++++++++++++++++++++------------- docker-compose-dev.yml | 6 +- 15 files changed, 540 insertions(+), 348 deletions(-) create mode 100644 app_urls/core/celery.py diff --git a/app_urls/README.md b/app_urls/README.md index 5c63486..9475fd3 100644 --- a/app_urls/README.md +++ b/app_urls/README.md @@ -73,6 +73,17 @@ class Meta: * Environment variables * In docker-compose.yml +* Tasks +``` +python manage.py dumpdata \ + django_celery_beat.PeriodicTask \ + django_celery_beat.IntervalSchedule \ + django_celery_beat.CrontabSchedule \ + django_celery_beat.SolarSchedule \ + django_celery_beat.ClockedSchedule \ + --indent 2 > scheduled_tasks.json +``` + * Deploy ``` # Check environments variables on .env file diff --git a/app_urls/core/__init__.py b/app_urls/core/__init__.py index e69de29..fb989c4 100644 --- a/app_urls/core/__init__.py +++ b/app_urls/core/__init__.py @@ -0,0 +1,3 @@ +from .celery import app as celery_app + +__all__ = ('celery_app',) diff --git a/app_urls/core/celery.py b/app_urls/core/celery.py new file mode 100644 index 0000000..8f95a57 --- /dev/null +++ b/app_urls/core/celery.py @@ -0,0 +1,14 @@ +# core/celery.py +import os +from celery import Celery + +# Set default Django settings module +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') + +app = Celery('core') + +# Load config from Django settings, namespace CELERY +app.config_from_object('django.conf:settings', namespace='CELERY') + +# Auto-discover tasks from all registered Django app configs +app.autodiscover_tasks() diff --git a/app_urls/core/settings.py b/app_urls/core/settings.py index a7eaacb..3240048 100644 --- a/app_urls/core/settings.py +++ b/app_urls/core/settings.py @@ -12,14 +12,12 @@ https://docs.djangoproject.com/en/5.1/ref/settings/ from pathlib import Path import os -from typing import Dict -from scheduler.types import SchedulerConfiguration, Broker, QueueConfiguration - +# Queues and routing +from kombu import Queue # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent - # Quick-start development settings - unsuitable for production # SECURITY WARNING: keep the secret key used in production secret! @@ -40,7 +38,7 @@ INSTALLED_APPS = [ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', - 'scheduler', + 'django_celery_beat', 'fetcher', ] @@ -110,27 +108,21 @@ CACHES = { } } -SCHEDULER_CONFIG = SchedulerConfiguration( - EXECUTIONS_IN_PAGE=20, - SCHEDULER_INTERVAL=10, - BROKER=Broker.REDIS, - CALLBACK_TIMEOUT=60, # Callback timeout in seconds (success/failure/stopped) - # Default values, can be overriden per task/job - DEFAULT_SUCCESS_TTL=10 * 60, # Time To Live (TTL) in seconds to keep successful job results - DEFAULT_FAILURE_TTL=365 * 24 * 60 * 60, # Time To Live (TTL) in seconds to keep job failure information - DEFAULT_JOB_TTL=10 * 60, # Time To Live (TTL) in seconds to keep job information - DEFAULT_JOB_TIMEOUT=os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # timeout (seconds) for a job - # General configuration values - DEFAULT_WORKER_TTL=10 * 60, # Time To Live (TTL) in seconds to keep worker information after last heartbeat - DEFAULT_MAINTENANCE_TASK_INTERVAL=10 * 60, # The interval to run maintenance tasks in seconds. 10 minutes. - DEFAULT_JOB_MONITORING_INTERVAL=30, # The interval to monitor jobs in seconds. - SCHEDULER_FALLBACK_PERIOD_SECS=120, # Period (secs) to wait before requiring to reacquire locks + + +# Celery configuration +CELERY_BROKER_URL = 'redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0)) +CELERY_RESULT_BACKEND = 'redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB_RESULTS", 1)) +CELERY_ACCEPT_CONTENT = ['json'] +CELERY_TASK_SERIALIZER = 'json' + +# Celery Beat scheduler (required for django-celery-beat to work) +CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers.DatabaseScheduler' + +CELERY_TASK_QUEUES = ( + Queue('default'), + Queue('low'), ) -SCHEDULER_QUEUES: Dict[str, QueueConfiguration] = { - 'default': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))), - 'high': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))), - 'low': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))), -} # Password validation diff --git a/app_urls/core/urls.py b/app_urls/core/urls.py index 62d899a..5a909df 100644 --- a/app_urls/core/urls.py +++ b/app_urls/core/urls.py @@ -19,6 +19,5 @@ from django.urls import path, include urlpatterns = [ path('admin/', admin.site.urls), - path('scheduler/', include('scheduler.urls')), path('', include('fetcher.urls')), ] diff --git a/app_urls/fetcher/src/logger.py b/app_urls/fetcher/src/logger.py index cc9e800..2fae297 100644 --- a/app_urls/fetcher/src/logger.py +++ b/app_urls/fetcher/src/logger.py @@ -1,48 +1,37 @@ import logging import os +# Set to warning +logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("newspaper").setLevel(logging.WARNING) + # Get env var logs_directory = os.getenv("PATH_LOGS_DIRECTORY", "logs") # Directory of logs os.makedirs(logs_directory, exist_ok=True) -class PPIDFilter(logging.Filter): - def filter(self, record): - # record.ppid = str(os.getppid()) + " " + multiprocessing.current_process().name # os.environ.get("PPID", "*" + os.environ.get("PID")) - record.ppid = os.getppid() - return True - -logging.basicConfig(format='%(filename)s | PPID=%(ppid)s | %(levelname)s | %(asctime)s | %(message)s') +logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s') logger = logging.getLogger("fetcher") -# logger.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) -logger.addFilter(PPIDFilter()) -logger.setLevel(logging.INFO) +logger.setLevel(logging.DEBUG) # To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1) -fh.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) -fh.addFilter(PPIDFilter()) +fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s')) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # To file log: INFO / WARNING / ERROR fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "info.log"), mode="a", maxBytes=10000000, backupCount=1) -fh.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) -fh.addFilter(PPIDFilter()) +fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s')) fh.setLevel(logging.INFO) logger.addHandler(fh) # To file log: WARNING / ERROR / CRITICAL fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "warning.log"), mode="a", maxBytes=10000000, backupCount=1) -fh.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) -fh.addFilter(PPIDFilter()) +fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s')) fh.setLevel(logging.WARNING) logger.addHandler(fh) -# Set to warning -logging.getLogger("urllib3").setLevel(logging.WARNING) -logging.getLogger("newspaper").setLevel(logging.WARNING) - def get_logger(): return logger diff --git a/app_urls/fetcher/tasks.py b/app_urls/fetcher/tasks.py index 50ca9f0..05f994f 100644 --- a/app_urls/fetcher/tasks.py +++ b/app_urls/fetcher/tasks.py @@ -1,4 +1,4 @@ -from scheduler import job +from celery import shared_task from .src.fetch_feed import FetchFeeds from .src.fetch_parser import FetchParser @@ -11,63 +11,64 @@ from .src.publisher import Publisher from .src.logger import get_logger logger = get_logger() -@job('default') + +@shared_task(queue='default') def fetch_feeds(): task = "Fetch Feeds" logger.info("Task triggered: {}".format(task)) FetchFeeds().run() logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='default') def fetch_parser(): task = "Fetch Parser" logger.info("Task triggered: {}".format(task)) FetchParser().run() logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='default') def fetch_search(): task = "Fetch Search" logger.info("Task triggered: {}".format(task)) FetchSearcher().run() logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='low') def fetch_selenium_search(): task = "Fetch Selenium search" logger.info("Task triggered: {}".format(task)) FetchSeleniumSourceSearch().run() logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='low') def fetch_missing_kids(number_pages=5): task = "Fetch MissingKids" logger.info("Task triggered: {}".format(task)) FetchMissingKids().run(number_pages) logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='default') def process_raw_urls(batch_size=100): task = "Process raw URLs" logger.info("Task triggered: {}".format(task)) DB_Handler().process_raw_urls(batch_size=batch_size) logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='default') def process_error_urls(batch_size=50): task = "Process error URLs" logger.info("Task triggered: {}".format(task)) DB_Handler().process_error_urls(batch_size=batch_size) logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='low') def process_missing_kids_urls(batch_size=None, process_status_only=None): task = "Process Missing Kids URLs - batch_size={} process_status_only={}".format(batch_size, process_status_only) logger.info("Task triggered: {}".format(task)) DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only=process_status_only) logger.info("Task completed: {}".format(task)) -@job('default') +@shared_task(queue='default') def clean_old_url_content(older_than_days=14): task = "Clean old URL content" logger.info("Task triggered: {}".format(task)) @@ -75,6 +76,7 @@ def clean_old_url_content(older_than_days=14): logger.info("Task completed: {}".format(task)) +''' @job('default') def background_task(process_type: str): logger.info("Task triggered: {}".format(process_type)) @@ -143,3 +145,4 @@ def background_task(process_type: str): logger.info("Task completed: {}".format(process_type)) except Exception as e: logger.error(e) +''' \ No newline at end of file diff --git a/app_urls/fetcher/urls.py b/app_urls/fetcher/urls.py index 9f1903c..69605fb 100644 --- a/app_urls/fetcher/urls.py +++ b/app_urls/fetcher/urls.py @@ -7,8 +7,6 @@ urlpatterns = [ path('logs/database', views.log_db, name='log_db'), path('logs/', views.logs, name='logs'), # - path('task/', views.trigger_task, name='trigger_task'), - # path('urls/charts/', views.charts, name='charts'), path('urls-by-fetch-date/', views.urls_by_fetch_date, name='urls_by_fetch_date'), path('urls-per-status/', views.urls_per_status, name='urls_per_status'), diff --git a/app_urls/fetcher/views.py b/app_urls/fetcher/views.py index 3f773eb..8cbd23d 100644 --- a/app_urls/fetcher/views.py +++ b/app_urls/fetcher/views.py @@ -1,4 +1,4 @@ -from .views_base import link_list, logs, log_db, trigger_task +from .views_base import link_list, logs, log_db #, trigger_task, from django.core.paginator import Paginator from django.shortcuts import render, get_object_or_404 diff --git a/app_urls/fetcher/views_base.py b/app_urls/fetcher/views_base.py index 52598db..68e0ad8 100644 --- a/app_urls/fetcher/views_base.py +++ b/app_urls/fetcher/views_base.py @@ -1,15 +1,17 @@ import os -from .tasks import background_task from django.http import JsonResponse, HttpResponse from django.db import connection #################################################################################################### +""" +### from .tasks import background_task + def trigger_task(request, task): # Enqueue function in "default" queue background_task.delay(task) return JsonResponse({"message": "Task has been enqueued!", "task": task}) +""" -#################################################################################################### def link_list(request): # Base URL path app_url = request.build_absolute_uri() @@ -19,8 +21,8 @@ def link_list(request): # List of links list_links = \ [ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \ - [ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] + \ - [ os.path.join(app_url, "task", l) for l in links_fetch + links_process ] + [ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] #+ \ + #[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ] # Links tuple links = [(l, l) for l in list_links] @@ -32,6 +34,7 @@ def link_list(request): return HttpResponse(html) + #################################################################################################### def logs(request, log_type): # Capture output: python manage.py rqstats diff --git a/app_urls/initialize.sh b/app_urls/initialize.sh index 80d0651..ec8e3ed 100755 --- a/app_urls/initialize.sh +++ b/app_urls/initialize.sh @@ -6,9 +6,10 @@ else echo "Initializating database" python init_db.py --initialize_tables --initialize_data python manage.py makemigrations fetcher; python manage.py migrate --fake-initial + python manage.py migrate django_celery_beat python manage.py createsuperuser --noinput python manage.py collectstatic --no-input - python manage.py import --filename scheduled_tasks.json + python manage.py loaddata scheduled_tasks.json # # python manage.py inspectdb # Debugging model fi diff --git a/app_urls/requirements.txt b/app_urls/requirements.txt index de35441..427b318 100644 --- a/app_urls/requirements.txt +++ b/app_urls/requirements.txt @@ -1,5 +1,5 @@ django==5.1 -django-tasks-scheduler==4.0.5 +django-celery-beat django-redis psycopg[binary] gunicorn diff --git a/app_urls/run.sh b/app_urls/run.sh index 980798e..6d4ab06 100755 --- a/app_urls/run.sh +++ b/app_urls/run.sh @@ -7,7 +7,12 @@ else echo "Running in PROD mode" # Multi-worker # gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 86400 & while true; do echo "Initializing worker default" >> /opt/logs/warning.log; python manage.py scheduler_worker -v 2 --traceback default high 2>> /opt/logs/warning.log; done & while true; do echo "Initializing worker low" >> /opt/logs/warning.log; python manage.py scheduler_worker -v 2 --without-scheduler --traceback low 2>> /opt/logs/warning.log; done - (sleep 10; while true; do echo "Initializing worker default" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name default default high; sleep 120; done) & - (sleep 10; while true; do echo "Initializing worker low" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name low low; sleep 120; done) & + # + #(sleep 10; while true; do echo "Initializing worker default" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name default default high; sleep 120; done) & + #(sleep 10; while true; do echo "Initializing worker low" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name low low; sleep 120; done) & + # + celery -A core beat -l info & + celery -A core worker -l info --concurrency=1 -Q default & + celery -A core worker -l info --concurrency=1 -Q low & gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 172800 fi diff --git a/app_urls/scheduled_tasks.json b/app_urls/scheduled_tasks.json index 4e071f6..ff31f07 100644 --- a/app_urls/scheduled_tasks.json +++ b/app_urls/scheduled_tasks.json @@ -1,305 +1,479 @@ [ { - "model": "RepeatableTaskType", - "name": "Process error URLs", - "callable": "fetcher.tasks.process_error_urls", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "default", - "repeat": null, - "at_front": false, - "timeout": 1800, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 8, - "interval_unit": "hours", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 1, + "fields": { + "name": "celery.backend_cleanup", + "task": "celery.backend_cleanup", + "interval": null, + "crontab": 1, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": 43200, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:07:34.609Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Process raw URLs", - "callable": "fetcher.tasks.process_raw_urls", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "default", - "repeat": null, - "at_front": false, - "timeout": 1800, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 10, - "interval_unit": "minutes", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 2, + "fields": { + "name": "Process error URLs", + "task": "fetcher.tasks.process_error_urls", + "interval": 1, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:10:08.861Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Process MissingKids URLs", - "callable": "fetcher.tasks.process_missing_kids_urls", - "callable_args": [], - "callable_kwargs": [ - { - "arg_type": "int", - "key": "batch_size", - "val": 50 - } - ], - "enabled": false, - "queue": "low", - "repeat": null, - "at_front": false, - "timeout": 10800, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 1, - "interval_unit": "days", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 3, + "fields": { + "name": "Process raw URLs", + "task": "fetcher.tasks.process_raw_urls", + "interval": 2, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": "2025-07-17T16:20:36.751Z", + "total_run_count": 1, + "date_changed": "2025-07-17T16:21:17.099Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Process MissingKids URLs ALL - unknown", - "callable": "fetcher.tasks.process_missing_kids_urls", - "callable_args": [], - "callable_kwargs": [ - { - "arg_type": "str", - "key": "process_status_only", - "val": "unknown" - } - ], - "enabled": false, - "queue": "low", - "repeat": null, - "at_front": false, - "timeout": 86400, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 12, - "interval_unit": "hours", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 4, + "fields": { + "name": "Process MissingKids URLs - batch=50", + "task": "fetcher.tasks.process_missing_kids_urls", + "interval": 3, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{\"batch_size\": 50}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:12:44.533Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Process MissingKids URLs ALL - valid", - "callable": "fetcher.tasks.process_missing_kids_urls", - "callable_args": [], - "callable_kwargs": [ - { - "arg_type": "str", - "key": "process_status_only", - "val": "valid" - } - ], - "enabled": false, - "queue": "low", - "repeat": null, - "at_front": false, - "timeout": 86400, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 2, - "interval_unit": "days", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 5, + "fields": { + "name": "Process MissingKids URLs ALL - unknown", + "task": "fetcher.tasks.process_missing_kids_urls", + "interval": 4, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{\"process_status_only\": \"unknown\"}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:16:38.258Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Process MissingKids URLs ALL - invalid", - "callable": "fetcher.tasks.process_missing_kids_urls", - "callable_args": [], - "callable_kwargs": [ - { - "arg_type": "str", - "key": "process_status_only", - "val": "invalid" - } - ], - "enabled": false, - "queue": "low", - "repeat": null, - "at_front": false, - "timeout": 86400, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 8, - "interval_unit": "weeks", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 6, + "fields": { + "name": "Process MissingKids URLs ALL - valid", + "task": "fetcher.tasks.process_missing_kids_urls", + "interval": 5, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{\"process_status_only\": \"valid\"}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:20:19.969Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Fetch Feeds", - "callable": "fetcher.tasks.fetch_feeds", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "default", - "repeat": null, - "at_front": false, - "timeout": 1800, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 10, - "interval_unit": "minutes", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 7, + "fields": { + "name": "Process MissingKids URLs ALL - invalid", + "task": "fetcher.tasks.process_missing_kids_urls", + "interval": 6, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{\"process_status_only\": \"invalid\"}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:21:30.809Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Fetch Parser", - "callable": "fetcher.tasks.fetch_parser", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "default", - "repeat": null, - "at_front": false, - "timeout": 3600, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 8, - "interval_unit": "hours", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 8, + "fields": { + "name": "Fetch Feeds", + "task": "fetcher.tasks.fetch_feeds", + "interval": 2, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:22:15.615Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Fetch Search", - "callable": "fetcher.tasks.fetch_search", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "default", - "repeat": null, - "at_front": false, - "timeout": 3600, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 4, - "interval_unit": "hours", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 9, + "fields": { + "name": "Fetch Parser", + "task": "fetcher.tasks.fetch_parser", + "interval": 7, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:22:40.215Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Fetch Selenium Search", - "callable": "fetcher.tasks.fetch_selenium_search", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "low", - "repeat": null, - "at_front": false, - "timeout": 3600, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 1, - "interval_unit": "days", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 10, + "fields": { + "name": "Fetch Search", + "task": "fetcher.tasks.fetch_search", + "interval": 8, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:23:00.329Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Fetch MissingKids", - "callable": "fetcher.tasks.fetch_missing_kids", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "low", - "repeat": null, - "at_front": false, - "timeout": 1800, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 12, - "interval_unit": "hours", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 11, + "fields": { + "name": "Fetch Selenium Search", + "task": "fetcher.tasks.fetch_selenium_search", + "interval": 3, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:24:08.315Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Fetch MissingKids ALL", - "callable": "fetcher.tasks.fetch_missing_kids", - "callable_args": [], - "callable_kwargs": [ - { - "arg_type": "int", - "key": "number_pages", - "val": "-1" - } - ], - "enabled": false, - "queue": "low", - "repeat": null, - "at_front": false, - "timeout": 43200, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 1, - "interval_unit": "weeks", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 12, + "fields": { + "name": "Fetch MissingKids - pages=5", + "task": "fetcher.tasks.fetch_missing_kids", + "interval": 4, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{\"number_pages\": 5}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:25:02.494Z", + "description": "" + } }, { - "model": "RepeatableTaskType", - "name": "Clean old URL content", - "callable": "fetcher.tasks.clean_old_url_content", - "callable_args": [], - "callable_kwargs": [], - "enabled": false, - "queue": "default", - "repeat": null, - "at_front": false, - "timeout": null, - "result_ttl": 86400, - "cron_string": null, - "scheduled_time": "2025-01-01T00:00:00+00:00", - "interval": 1, - "interval_unit": "weeks", - "successful_runs": 0, - "failed_runs": 0, - "last_successful_run": null, - "last_failed_run": null + "model": "django_celery_beat.periodictask", + "pk": 13, + "fields": { + "name": "Fetch MissingKids - ALL", + "task": "fetcher.tasks.fetch_missing_kids", + "interval": 9, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{\"number_pages\": -1}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:25:50.597Z", + "description": "" + } + }, + { + "model": "django_celery_beat.periodictask", + "pk": 14, + "fields": { + "name": "Clean old URL content", + "task": "fetcher.tasks.clean_old_url_content", + "interval": 9, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:26:16.272Z", + "description": "" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 1, + "fields": { + "every": 6, + "period": "hours" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 2, + "fields": { + "every": 10, + "period": "minutes" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 3, + "fields": { + "every": 1, + "period": "days" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 4, + "fields": { + "every": 12, + "period": "hours" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 5, + "fields": { + "every": 2, + "period": "days" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 6, + "fields": { + "every": 28, + "period": "days" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 7, + "fields": { + "every": 8, + "period": "hours" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 8, + "fields": { + "every": 4, + "period": "hours" + } + }, + { + "model": "django_celery_beat.intervalschedule", + "pk": 9, + "fields": { + "every": 7, + "period": "days" + } + }, + { + "model": "django_celery_beat.crontabschedule", + "pk": 1, + "fields": { + "minute": "0", + "hour": "4", + "day_of_month": "*", + "month_of_year": "*", + "day_of_week": "*", + "timezone": "UTC" + } } -] + ] + \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 8acb44f..563c784 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -26,9 +26,9 @@ services: # - default # This network # - docker_default # Reverse proxy network ports: - - 8000:8000 - volumes: # Development mode - - ./app_urls:/opt/app + - 8005:8000 + ## volumes: # Development mode + ## - ./app_urls:/opt/app deploy: resources: limits: