django tasks scheduler update, .env and docker compose towards fetcher sca

This commit is contained in:
Luciano Gervasoni
2025-06-20 00:35:48 +02:00
parent 490f01d66c
commit 03a2949b2b
8 changed files with 149 additions and 51 deletions

View File

@@ -12,6 +12,9 @@ https://docs.djangoproject.com/en/5.1/ref/settings/
from pathlib import Path
import os
from typing import Dict
from scheduler.types import SchedulerConfiguration, Broker, QueueConfiguration
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
@@ -107,57 +110,27 @@ CACHES = {
}
}
'''
from scheduler.types import SchedulerConfiguration, QueueConfiguration, Broker
from typing import Dict
# https://django-tasks-scheduler.readthedocs.io/en/latest/configuration/
SCHEDULER_CONFIG = SchedulerConfiguration(
DEFAULT_JOB_TIMEOUT = os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # 30 minutes
EXECUTIONS_IN_PAGE=20,
SCHEDULER_INTERVAL=10,
BROKER=Broker.REDIS,
CALLBACK_TIMEOUT=60, # Callback timeout in seconds (success/failure/stopped)
# Default values, can be overriden per task/job
DEFAULT_SUCCESS_TTL=10 * 60, # Time To Live (TTL) in seconds to keep successful job results
DEFAULT_FAILURE_TTL=365 * 24 * 60 * 60, # Time To Live (TTL) in seconds to keep job failure information
DEFAULT_JOB_TTL=10 * 60, # Time To Live (TTL) in seconds to keep job information
DEFAULT_JOB_TIMEOUT=30 * 60, # timeout (seconds) for a job
# General configuration values
DEFAULT_WORKER_TTL=10 * 60, # Time To Live (TTL) in seconds to keep worker information after last heartbeat
DEFAULT_MAINTENANCE_TASK_INTERVAL=10 * 60, # The interval to run maintenance tasks in seconds. 10 minutes.
DEFAULT_JOB_MONITORING_INTERVAL=30, # The interval to monitor jobs in seconds.
SCHEDULER_FALLBACK_PERIOD_SECS=120, # Period (secs) to wait before requiring to reacquire locks
)
SCHEDULER_QUEUES: Dict[str, QueueConfiguration] = {
'default': QueueConfiguration(
HOST = os.environ.get("REDIS_HOST", "localhost"),
PORT = os.environ.get("REDIS_PORT", 6379),
DB = os.environ.get("REDIS_DB", 0),
),
'high': QueueConfiguration(
HOST = os.environ.get("REDIS_HOST", "localhost"),
PORT = os.environ.get("REDIS_PORT", 6379),
DB = os.environ.get("REDIS_DB", 0),
),
'low': QueueConfiguration(
HOST = os.environ.get("REDIS_HOST", "localhost"),
PORT = os.environ.get("REDIS_PORT", 6379),
DB = os.environ.get("REDIS_DB", 0),
),
}
'''
SCHEDULER_QUEUES = {
'default': {
'HOST': os.environ.get("REDIS_HOST", "localhost"),
'PORT': os.environ.get("REDIS_PORT", 6379),
'DB': os.environ.get("REDIS_DB", 0),
},
'high': {
'HOST': os.environ.get("REDIS_HOST", "localhost"),
'PORT': os.environ.get("REDIS_PORT", 6379),
'DB': os.environ.get("REDIS_DB", 0),
},
'low': {
'HOST': os.environ.get("REDIS_HOST", "localhost"),
'PORT': os.environ.get("REDIS_PORT", 6379),
'DB': os.environ.get("REDIS_DB", 0),
}
}
SCHEDULER_CONFIG = {
'DEFAULT_TIMEOUT': os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # 30 minutes
'DEFAULT_RESULT_TTL': 60*60*12, # 12 hours
'EXECUTIONS_IN_PAGE': 20,
'SCHEDULER_INTERVAL': 10, # 10 seconds
# 'default': QueueConfiguration(URL='redis://localhost:6379/0'),
'default': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
'high': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
'low': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
}

View File

@@ -43,7 +43,6 @@ class DB_Handler():
UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
else:
# Add object to insert
# url_object_to_insert.append(Urls(url=url))
urls_to_insert.append(url)
### Insert URLs & (URL_id, source_id)

View File

@@ -9,4 +9,6 @@ else
python manage.py createsuperuser --noinput
python manage.py collectstatic --no-input
python manage.py import --filename scheduled_tasks.json
#
# python manage.py inspectdb # Debugging model
fi

View File

@@ -1,5 +1,5 @@
django==5.1
django-tasks-scheduler==3.0.1
django-tasks-scheduler==4.0.4
django-redis
psycopg[binary]
gunicorn

View File

@@ -2,7 +2,7 @@
if [ "${DJANGO_DEBUG}" = true ] | [ "${DJANGO_DEBUG}" == "True" ]; then
echo "Running in DEBUG mode"
gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 600 & python manage.py scheduler_worker high default low
else
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 600 & python manage.py scheduler_worker high default low
fi