Switching to django celery for workers

This commit is contained in:
Luciano Gervasoni
2025-07-17 22:29:06 +02:00
parent 50e8666162
commit cb621c9d6b
15 changed files with 540 additions and 348 deletions

View File

@@ -73,6 +73,17 @@ class Meta:
* Environment variables * Environment variables
* In docker-compose.yml * In docker-compose.yml
* Tasks
```
python manage.py dumpdata \
django_celery_beat.PeriodicTask \
django_celery_beat.IntervalSchedule \
django_celery_beat.CrontabSchedule \
django_celery_beat.SolarSchedule \
django_celery_beat.ClockedSchedule \
--indent 2 > scheduled_tasks.json
```
* Deploy * Deploy
``` ```
# Check environments variables on .env file # Check environments variables on .env file

View File

@@ -0,0 +1,3 @@
from .celery import app as celery_app
__all__ = ('celery_app',)

14
app_urls/core/celery.py Normal file
View File

@@ -0,0 +1,14 @@
# core/celery.py
import os
from celery import Celery
# Set default Django settings module
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
app = Celery('core')
# Load config from Django settings, namespace CELERY
app.config_from_object('django.conf:settings', namespace='CELERY')
# Auto-discover tasks from all registered Django app configs
app.autodiscover_tasks()

View File

@@ -12,14 +12,12 @@ https://docs.djangoproject.com/en/5.1/ref/settings/
from pathlib import Path from pathlib import Path
import os import os
from typing import Dict # Queues and routing
from scheduler.types import SchedulerConfiguration, Broker, QueueConfiguration from kombu import Queue
# Build paths inside the project like this: BASE_DIR / 'subdir'. # Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production # Quick-start development settings - unsuitable for production
# SECURITY WARNING: keep the secret key used in production secret! # SECURITY WARNING: keep the secret key used in production secret!
@@ -40,7 +38,7 @@ INSTALLED_APPS = [
'django.contrib.sessions', 'django.contrib.sessions',
'django.contrib.messages', 'django.contrib.messages',
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'scheduler', 'django_celery_beat',
'fetcher', 'fetcher',
] ]
@@ -110,27 +108,21 @@ CACHES = {
} }
} }
SCHEDULER_CONFIG = SchedulerConfiguration(
EXECUTIONS_IN_PAGE=20,
SCHEDULER_INTERVAL=10, # Celery configuration
BROKER=Broker.REDIS, CELERY_BROKER_URL = 'redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))
CALLBACK_TIMEOUT=60, # Callback timeout in seconds (success/failure/stopped) CELERY_RESULT_BACKEND = 'redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB_RESULTS", 1))
# Default values, can be overriden per task/job CELERY_ACCEPT_CONTENT = ['json']
DEFAULT_SUCCESS_TTL=10 * 60, # Time To Live (TTL) in seconds to keep successful job results CELERY_TASK_SERIALIZER = 'json'
DEFAULT_FAILURE_TTL=365 * 24 * 60 * 60, # Time To Live (TTL) in seconds to keep job failure information
DEFAULT_JOB_TTL=10 * 60, # Time To Live (TTL) in seconds to keep job information # Celery Beat scheduler (required for django-celery-beat to work)
DEFAULT_JOB_TIMEOUT=os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # timeout (seconds) for a job CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers.DatabaseScheduler'
# General configuration values
DEFAULT_WORKER_TTL=10 * 60, # Time To Live (TTL) in seconds to keep worker information after last heartbeat CELERY_TASK_QUEUES = (
DEFAULT_MAINTENANCE_TASK_INTERVAL=10 * 60, # The interval to run maintenance tasks in seconds. 10 minutes. Queue('default'),
DEFAULT_JOB_MONITORING_INTERVAL=30, # The interval to monitor jobs in seconds. Queue('low'),
SCHEDULER_FALLBACK_PERIOD_SECS=120, # Period (secs) to wait before requiring to reacquire locks
) )
SCHEDULER_QUEUES: Dict[str, QueueConfiguration] = {
'default': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
'high': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
'low': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
}
# Password validation # Password validation

View File

@@ -19,6 +19,5 @@ from django.urls import path, include
urlpatterns = [ urlpatterns = [
path('admin/', admin.site.urls), path('admin/', admin.site.urls),
path('scheduler/', include('scheduler.urls')),
path('', include('fetcher.urls')), path('', include('fetcher.urls')),
] ]

View File

@@ -1,48 +1,37 @@
import logging import logging
import os import os
# Set to warning
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("newspaper").setLevel(logging.WARNING)
# Get env var # Get env var
logs_directory = os.getenv("PATH_LOGS_DIRECTORY", "logs") logs_directory = os.getenv("PATH_LOGS_DIRECTORY", "logs")
# Directory of logs # Directory of logs
os.makedirs(logs_directory, exist_ok=True) os.makedirs(logs_directory, exist_ok=True)
class PPIDFilter(logging.Filter): logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')
def filter(self, record):
# record.ppid = str(os.getppid()) + " " + multiprocessing.current_process().name # os.environ.get("PPID", "*" + os.environ.get("PID"))
record.ppid = os.getppid()
return True
logging.basicConfig(format='%(filename)s | PPID=%(ppid)s | %(levelname)s | %(asctime)s | %(message)s')
logger = logging.getLogger("fetcher") logger = logging.getLogger("fetcher")
# logger.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) logger.setLevel(logging.DEBUG)
logger.addFilter(PPIDFilter())
logger.setLevel(logging.INFO)
# To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL # To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL
fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1) fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1)
fh.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
fh.addFilter(PPIDFilter())
fh.setLevel(logging.DEBUG) fh.setLevel(logging.DEBUG)
logger.addHandler(fh) logger.addHandler(fh)
# To file log: INFO / WARNING / ERROR # To file log: INFO / WARNING / ERROR
fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "info.log"), mode="a", maxBytes=10000000, backupCount=1) fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "info.log"), mode="a", maxBytes=10000000, backupCount=1)
fh.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
fh.addFilter(PPIDFilter())
fh.setLevel(logging.INFO) fh.setLevel(logging.INFO)
logger.addHandler(fh) logger.addHandler(fh)
# To file log: WARNING / ERROR / CRITICAL # To file log: WARNING / ERROR / CRITICAL
fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "warning.log"), mode="a", maxBytes=10000000, backupCount=1) fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "warning.log"), mode="a", maxBytes=10000000, backupCount=1)
fh.setFormatter(logging.Formatter('%(levelname)s | PPID=%(ppid)s | %(asctime)s | %(message)s')) fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
fh.addFilter(PPIDFilter())
fh.setLevel(logging.WARNING) fh.setLevel(logging.WARNING)
logger.addHandler(fh) logger.addHandler(fh)
# Set to warning
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("newspaper").setLevel(logging.WARNING)
def get_logger(): def get_logger():
return logger return logger

View File

@@ -1,4 +1,4 @@
from scheduler import job from celery import shared_task
from .src.fetch_feed import FetchFeeds from .src.fetch_feed import FetchFeeds
from .src.fetch_parser import FetchParser from .src.fetch_parser import FetchParser
@@ -11,63 +11,64 @@ from .src.publisher import Publisher
from .src.logger import get_logger from .src.logger import get_logger
logger = get_logger() logger = get_logger()
@job('default')
@shared_task(queue='default')
def fetch_feeds(): def fetch_feeds():
task = "Fetch Feeds" task = "Fetch Feeds"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
FetchFeeds().run() FetchFeeds().run()
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='default')
def fetch_parser(): def fetch_parser():
task = "Fetch Parser" task = "Fetch Parser"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
FetchParser().run() FetchParser().run()
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='default')
def fetch_search(): def fetch_search():
task = "Fetch Search" task = "Fetch Search"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
FetchSearcher().run() FetchSearcher().run()
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='low')
def fetch_selenium_search(): def fetch_selenium_search():
task = "Fetch Selenium search" task = "Fetch Selenium search"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
FetchSeleniumSourceSearch().run() FetchSeleniumSourceSearch().run()
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='low')
def fetch_missing_kids(number_pages=5): def fetch_missing_kids(number_pages=5):
task = "Fetch MissingKids" task = "Fetch MissingKids"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
FetchMissingKids().run(number_pages) FetchMissingKids().run(number_pages)
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='default')
def process_raw_urls(batch_size=100): def process_raw_urls(batch_size=100):
task = "Process raw URLs" task = "Process raw URLs"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
DB_Handler().process_raw_urls(batch_size=batch_size) DB_Handler().process_raw_urls(batch_size=batch_size)
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='default')
def process_error_urls(batch_size=50): def process_error_urls(batch_size=50):
task = "Process error URLs" task = "Process error URLs"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
DB_Handler().process_error_urls(batch_size=batch_size) DB_Handler().process_error_urls(batch_size=batch_size)
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='low')
def process_missing_kids_urls(batch_size=None, process_status_only=None): def process_missing_kids_urls(batch_size=None, process_status_only=None):
task = "Process Missing Kids URLs - batch_size={} process_status_only={}".format(batch_size, process_status_only) task = "Process Missing Kids URLs - batch_size={} process_status_only={}".format(batch_size, process_status_only)
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only=process_status_only) DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only=process_status_only)
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @shared_task(queue='default')
def clean_old_url_content(older_than_days=14): def clean_old_url_content(older_than_days=14):
task = "Clean old URL content" task = "Clean old URL content"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
@@ -75,6 +76,7 @@ def clean_old_url_content(older_than_days=14):
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
'''
@job('default') @job('default')
def background_task(process_type: str): def background_task(process_type: str):
logger.info("Task triggered: {}".format(process_type)) logger.info("Task triggered: {}".format(process_type))
@@ -143,3 +145,4 @@ def background_task(process_type: str):
logger.info("Task completed: {}".format(process_type)) logger.info("Task completed: {}".format(process_type))
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
'''

View File

@@ -7,8 +7,6 @@ urlpatterns = [
path('logs/database', views.log_db, name='log_db'), path('logs/database', views.log_db, name='log_db'),
path('logs/<str:log_type>', views.logs, name='logs'), path('logs/<str:log_type>', views.logs, name='logs'),
# #
path('task/<str:task>', views.trigger_task, name='trigger_task'),
#
path('urls/charts/', views.charts, name='charts'), path('urls/charts/', views.charts, name='charts'),
path('urls-by-fetch-date/', views.urls_by_fetch_date, name='urls_by_fetch_date'), path('urls-by-fetch-date/', views.urls_by_fetch_date, name='urls_by_fetch_date'),
path('urls-per-status/', views.urls_per_status, name='urls_per_status'), path('urls-per-status/', views.urls_per_status, name='urls_per_status'),

View File

@@ -1,4 +1,4 @@
from .views_base import link_list, logs, log_db, trigger_task from .views_base import link_list, logs, log_db #, trigger_task,
from django.core.paginator import Paginator from django.core.paginator import Paginator
from django.shortcuts import render, get_object_or_404 from django.shortcuts import render, get_object_or_404

View File

@@ -1,15 +1,17 @@
import os import os
from .tasks import background_task
from django.http import JsonResponse, HttpResponse from django.http import JsonResponse, HttpResponse
from django.db import connection from django.db import connection
#################################################################################################### ####################################################################################################
"""
### from .tasks import background_task
def trigger_task(request, task): def trigger_task(request, task):
# Enqueue function in "default" queue # Enqueue function in "default" queue
background_task.delay(task) background_task.delay(task)
return JsonResponse({"message": "Task has been enqueued!", "task": task}) return JsonResponse({"message": "Task has been enqueued!", "task": task})
"""
####################################################################################################
def link_list(request): def link_list(request):
# Base URL path # Base URL path
app_url = request.build_absolute_uri() app_url = request.build_absolute_uri()
@@ -19,8 +21,8 @@ def link_list(request):
# List of links # List of links
list_links = \ list_links = \
[ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \ [ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
[ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] + \ [ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] #+ \
[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ] #[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
# Links tuple # Links tuple
links = [(l, l) for l in list_links] links = [(l, l) for l in list_links]
@@ -32,6 +34,7 @@ def link_list(request):
return HttpResponse(html) return HttpResponse(html)
#################################################################################################### ####################################################################################################
def logs(request, log_type): def logs(request, log_type):
# Capture output: python manage.py rqstats # Capture output: python manage.py rqstats

View File

@@ -6,9 +6,10 @@ else
echo "Initializating database" echo "Initializating database"
python init_db.py --initialize_tables --initialize_data python init_db.py --initialize_tables --initialize_data
python manage.py makemigrations fetcher; python manage.py migrate --fake-initial python manage.py makemigrations fetcher; python manage.py migrate --fake-initial
python manage.py migrate django_celery_beat
python manage.py createsuperuser --noinput python manage.py createsuperuser --noinput
python manage.py collectstatic --no-input python manage.py collectstatic --no-input
python manage.py import --filename scheduled_tasks.json python manage.py loaddata scheduled_tasks.json
# #
# python manage.py inspectdb # Debugging model # python manage.py inspectdb # Debugging model
fi fi

View File

@@ -1,5 +1,5 @@
django==5.1 django==5.1
django-tasks-scheduler==4.0.5 django-celery-beat
django-redis django-redis
psycopg[binary] psycopg[binary]
gunicorn gunicorn

View File

@@ -7,7 +7,12 @@ else
echo "Running in PROD mode" echo "Running in PROD mode"
# Multi-worker # Multi-worker
# gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 86400 & while true; do echo "Initializing worker default" >> /opt/logs/warning.log; python manage.py scheduler_worker -v 2 --traceback default high 2>> /opt/logs/warning.log; done & while true; do echo "Initializing worker low" >> /opt/logs/warning.log; python manage.py scheduler_worker -v 2 --without-scheduler --traceback low 2>> /opt/logs/warning.log; done # gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 86400 & while true; do echo "Initializing worker default" >> /opt/logs/warning.log; python manage.py scheduler_worker -v 2 --traceback default high 2>> /opt/logs/warning.log; done & while true; do echo "Initializing worker low" >> /opt/logs/warning.log; python manage.py scheduler_worker -v 2 --without-scheduler --traceback low 2>> /opt/logs/warning.log; done
(sleep 10; while true; do echo "Initializing worker default" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name default default high; sleep 120; done) & #
(sleep 10; while true; do echo "Initializing worker low" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name low low; sleep 120; done) & #(sleep 10; while true; do echo "Initializing worker default" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name default default high; sleep 120; done) &
#(sleep 10; while true; do echo "Initializing worker low" >> /opt/logs/info.log; python manage.py scheduler_worker -v 1 --worker-ttl 172800 --traceback --name low low; sleep 120; done) &
#
celery -A core beat -l info &
celery -A core worker -l info --concurrency=1 -Q default &
celery -A core worker -l info --concurrency=1 -Q low &
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 172800 gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 172800
fi fi

View File

@@ -1,305 +1,479 @@
[ [
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Process error URLs", "pk": 1,
"callable": "fetcher.tasks.process_error_urls", "fields": {
"callable_args": [], "name": "celery.backend_cleanup",
"callable_kwargs": [], "task": "celery.backend_cleanup",
"enabled": false, "interval": null,
"queue": "default", "crontab": 1,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": 1800, "args": "[]",
"result_ttl": 86400, "kwargs": "{}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 8, "routing_key": null,
"interval_unit": "hours", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": 43200,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:07:34.609Z",
"description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Process raw URLs", "pk": 2,
"callable": "fetcher.tasks.process_raw_urls", "fields": {
"callable_args": [], "name": "Process error URLs",
"callable_kwargs": [], "task": "fetcher.tasks.process_error_urls",
"enabled": false, "interval": 1,
"queue": "default", "crontab": null,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": 1800, "args": "[]",
"result_ttl": 86400, "kwargs": "{}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 10, "routing_key": null,
"interval_unit": "minutes", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": null,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:10:08.861Z",
"description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Process MissingKids URLs", "pk": 3,
"callable": "fetcher.tasks.process_missing_kids_urls", "fields": {
"callable_args": [], "name": "Process raw URLs",
"callable_kwargs": [ "task": "fetcher.tasks.process_raw_urls",
{ "interval": 2,
"arg_type": "int", "crontab": null,
"key": "batch_size", "solar": null,
"val": 50 "clocked": null,
} "args": "[]",
], "kwargs": "{}",
"enabled": false, "queue": null,
"queue": "low", "exchange": null,
"repeat": null, "routing_key": null,
"at_front": false, "headers": "{}",
"timeout": 10800, "priority": null,
"result_ttl": 86400, "expires": null,
"cron_string": null, "expire_seconds": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "one_off": false,
"interval": 1, "start_time": null,
"interval_unit": "days", "enabled": true,
"successful_runs": 0, "last_run_at": "2025-07-17T16:20:36.751Z",
"failed_runs": 0, "total_run_count": 1,
"last_successful_run": null, "date_changed": "2025-07-17T16:21:17.099Z",
"last_failed_run": null "description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Process MissingKids URLs ALL - unknown", "pk": 4,
"callable": "fetcher.tasks.process_missing_kids_urls", "fields": {
"callable_args": [], "name": "Process MissingKids URLs - batch=50",
"callable_kwargs": [ "task": "fetcher.tasks.process_missing_kids_urls",
{ "interval": 3,
"arg_type": "str", "crontab": null,
"key": "process_status_only", "solar": null,
"val": "unknown" "clocked": null,
} "args": "[]",
], "kwargs": "{\"batch_size\": 50}",
"enabled": false, "queue": null,
"queue": "low", "exchange": null,
"repeat": null, "routing_key": null,
"at_front": false, "headers": "{}",
"timeout": 86400, "priority": null,
"result_ttl": 86400, "expires": null,
"cron_string": null, "expire_seconds": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "one_off": false,
"interval": 12, "start_time": null,
"interval_unit": "hours", "enabled": true,
"successful_runs": 0, "last_run_at": null,
"failed_runs": 0, "total_run_count": 0,
"last_successful_run": null, "date_changed": "2025-07-17T16:12:44.533Z",
"last_failed_run": null "description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Process MissingKids URLs ALL - valid", "pk": 5,
"callable": "fetcher.tasks.process_missing_kids_urls", "fields": {
"callable_args": [], "name": "Process MissingKids URLs ALL - unknown",
"callable_kwargs": [ "task": "fetcher.tasks.process_missing_kids_urls",
{ "interval": 4,
"arg_type": "str", "crontab": null,
"key": "process_status_only", "solar": null,
"val": "valid" "clocked": null,
} "args": "[]",
], "kwargs": "{\"process_status_only\": \"unknown\"}",
"enabled": false, "queue": null,
"queue": "low", "exchange": null,
"repeat": null, "routing_key": null,
"at_front": false, "headers": "{}",
"timeout": 86400, "priority": null,
"result_ttl": 86400, "expires": null,
"cron_string": null, "expire_seconds": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "one_off": false,
"interval": 2, "start_time": null,
"interval_unit": "days", "enabled": true,
"successful_runs": 0, "last_run_at": null,
"failed_runs": 0, "total_run_count": 0,
"last_successful_run": null, "date_changed": "2025-07-17T16:16:38.258Z",
"last_failed_run": null "description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Process MissingKids URLs ALL - invalid", "pk": 6,
"callable": "fetcher.tasks.process_missing_kids_urls", "fields": {
"callable_args": [], "name": "Process MissingKids URLs ALL - valid",
"callable_kwargs": [ "task": "fetcher.tasks.process_missing_kids_urls",
{ "interval": 5,
"arg_type": "str", "crontab": null,
"key": "process_status_only", "solar": null,
"val": "invalid" "clocked": null,
} "args": "[]",
], "kwargs": "{\"process_status_only\": \"valid\"}",
"enabled": false, "queue": null,
"queue": "low", "exchange": null,
"repeat": null, "routing_key": null,
"at_front": false, "headers": "{}",
"timeout": 86400, "priority": null,
"result_ttl": 86400, "expires": null,
"cron_string": null, "expire_seconds": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "one_off": false,
"interval": 8, "start_time": null,
"interval_unit": "weeks", "enabled": true,
"successful_runs": 0, "last_run_at": null,
"failed_runs": 0, "total_run_count": 0,
"last_successful_run": null, "date_changed": "2025-07-17T16:20:19.969Z",
"last_failed_run": null "description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Fetch Feeds", "pk": 7,
"callable": "fetcher.tasks.fetch_feeds", "fields": {
"callable_args": [], "name": "Process MissingKids URLs ALL - invalid",
"callable_kwargs": [], "task": "fetcher.tasks.process_missing_kids_urls",
"enabled": false, "interval": 6,
"queue": "default", "crontab": null,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": 1800, "args": "[]",
"result_ttl": 86400, "kwargs": "{\"process_status_only\": \"invalid\"}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 10, "routing_key": null,
"interval_unit": "minutes", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": null,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:21:30.809Z",
"description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Fetch Parser", "pk": 8,
"callable": "fetcher.tasks.fetch_parser", "fields": {
"callable_args": [], "name": "Fetch Feeds",
"callable_kwargs": [], "task": "fetcher.tasks.fetch_feeds",
"enabled": false, "interval": 2,
"queue": "default", "crontab": null,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": 3600, "args": "[]",
"result_ttl": 86400, "kwargs": "{}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 8, "routing_key": null,
"interval_unit": "hours", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": null,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:22:15.615Z",
"description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Fetch Search", "pk": 9,
"callable": "fetcher.tasks.fetch_search", "fields": {
"callable_args": [], "name": "Fetch Parser",
"callable_kwargs": [], "task": "fetcher.tasks.fetch_parser",
"enabled": false, "interval": 7,
"queue": "default", "crontab": null,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": 3600, "args": "[]",
"result_ttl": 86400, "kwargs": "{}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 4, "routing_key": null,
"interval_unit": "hours", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": null,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:22:40.215Z",
"description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Fetch Selenium Search", "pk": 10,
"callable": "fetcher.tasks.fetch_selenium_search", "fields": {
"callable_args": [], "name": "Fetch Search",
"callable_kwargs": [], "task": "fetcher.tasks.fetch_search",
"enabled": false, "interval": 8,
"queue": "low", "crontab": null,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": 3600, "args": "[]",
"result_ttl": 86400, "kwargs": "{}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 1, "routing_key": null,
"interval_unit": "days", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": null,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:23:00.329Z",
"description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Fetch MissingKids", "pk": 11,
"callable": "fetcher.tasks.fetch_missing_kids", "fields": {
"callable_args": [], "name": "Fetch Selenium Search",
"callable_kwargs": [], "task": "fetcher.tasks.fetch_selenium_search",
"enabled": false, "interval": 3,
"queue": "low", "crontab": null,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": 1800, "args": "[]",
"result_ttl": 86400, "kwargs": "{}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 12, "routing_key": null,
"interval_unit": "hours", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": null,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:24:08.315Z",
"description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Fetch MissingKids ALL", "pk": 12,
"callable": "fetcher.tasks.fetch_missing_kids", "fields": {
"callable_args": [], "name": "Fetch MissingKids - pages=5",
"callable_kwargs": [ "task": "fetcher.tasks.fetch_missing_kids",
{ "interval": 4,
"arg_type": "int", "crontab": null,
"key": "number_pages", "solar": null,
"val": "-1" "clocked": null,
} "args": "[]",
], "kwargs": "{\"number_pages\": 5}",
"enabled": false, "queue": null,
"queue": "low", "exchange": null,
"repeat": null, "routing_key": null,
"at_front": false, "headers": "{}",
"timeout": 43200, "priority": null,
"result_ttl": 86400, "expires": null,
"cron_string": null, "expire_seconds": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "one_off": false,
"interval": 1, "start_time": null,
"interval_unit": "weeks", "enabled": true,
"successful_runs": 0, "last_run_at": null,
"failed_runs": 0, "total_run_count": 0,
"last_successful_run": null, "date_changed": "2025-07-17T16:25:02.494Z",
"last_failed_run": null "description": ""
}
}, },
{ {
"model": "RepeatableTaskType", "model": "django_celery_beat.periodictask",
"name": "Clean old URL content", "pk": 13,
"callable": "fetcher.tasks.clean_old_url_content", "fields": {
"callable_args": [], "name": "Fetch MissingKids - ALL",
"callable_kwargs": [], "task": "fetcher.tasks.fetch_missing_kids",
"enabled": false, "interval": 9,
"queue": "default", "crontab": null,
"repeat": null, "solar": null,
"at_front": false, "clocked": null,
"timeout": null, "args": "[]",
"result_ttl": 86400, "kwargs": "{\"number_pages\": -1}",
"cron_string": null, "queue": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "exchange": null,
"interval": 1, "routing_key": null,
"interval_unit": "weeks", "headers": "{}",
"successful_runs": 0, "priority": null,
"failed_runs": 0, "expires": null,
"last_successful_run": null, "expire_seconds": null,
"last_failed_run": null "one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:25:50.597Z",
"description": ""
}
},
{
"model": "django_celery_beat.periodictask",
"pk": 14,
"fields": {
"name": "Clean old URL content",
"task": "fetcher.tasks.clean_old_url_content",
"interval": 9,
"crontab": null,
"solar": null,
"clocked": null,
"args": "[]",
"kwargs": "{}",
"queue": null,
"exchange": null,
"routing_key": null,
"headers": "{}",
"priority": null,
"expires": null,
"expire_seconds": null,
"one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:26:16.272Z",
"description": ""
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 1,
"fields": {
"every": 6,
"period": "hours"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 2,
"fields": {
"every": 10,
"period": "minutes"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 3,
"fields": {
"every": 1,
"period": "days"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 4,
"fields": {
"every": 12,
"period": "hours"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 5,
"fields": {
"every": 2,
"period": "days"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 6,
"fields": {
"every": 28,
"period": "days"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 7,
"fields": {
"every": 8,
"period": "hours"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 8,
"fields": {
"every": 4,
"period": "hours"
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 9,
"fields": {
"every": 7,
"period": "days"
}
},
{
"model": "django_celery_beat.crontabschedule",
"pk": 1,
"fields": {
"minute": "0",
"hour": "4",
"day_of_month": "*",
"month_of_year": "*",
"day_of_week": "*",
"timezone": "UTC"
}
} }
] ]

View File

@@ -26,9 +26,9 @@ services:
# - default # This network # - default # This network
# - docker_default # Reverse proxy network # - docker_default # Reverse proxy network
ports: ports:
- 8000:8000 - 8005:8000
volumes: # Development mode ## volumes: # Development mode
- ./app_urls:/opt/app ## - ./app_urls:/opt/app
deploy: deploy:
resources: resources:
limits: limits: