django tasks scheduler update, .env and docker compose towards fetcher sca
This commit is contained in:
4
.env
4
.env
@@ -1,3 +1,7 @@
|
||||
# AutoSSH DB
|
||||
REMOTE_HOST=''
|
||||
REMOTE_USERNAME=''
|
||||
|
||||
# Initialization
|
||||
INITIALIZE_DB=true
|
||||
DJANGO_SUPERUSER_USERNAME=matitos
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
.env
|
||||
__pycache__/
|
||||
*.pyc
|
||||
**/credentials.py
|
||||
|
||||
@@ -12,6 +12,9 @@ https://docs.djangoproject.com/en/5.1/ref/settings/
|
||||
|
||||
from pathlib import Path
|
||||
import os
|
||||
from typing import Dict
|
||||
from scheduler.types import SchedulerConfiguration, Broker, QueueConfiguration
|
||||
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
@@ -107,57 +110,27 @@ CACHES = {
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
from scheduler.types import SchedulerConfiguration, QueueConfiguration, Broker
|
||||
from typing import Dict
|
||||
|
||||
# https://django-tasks-scheduler.readthedocs.io/en/latest/configuration/
|
||||
SCHEDULER_CONFIG = SchedulerConfiguration(
|
||||
DEFAULT_JOB_TIMEOUT = os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # 30 minutes
|
||||
EXECUTIONS_IN_PAGE=20,
|
||||
SCHEDULER_INTERVAL=10,
|
||||
BROKER=Broker.REDIS,
|
||||
CALLBACK_TIMEOUT=60, # Callback timeout in seconds (success/failure/stopped)
|
||||
# Default values, can be overriden per task/job
|
||||
DEFAULT_SUCCESS_TTL=10 * 60, # Time To Live (TTL) in seconds to keep successful job results
|
||||
DEFAULT_FAILURE_TTL=365 * 24 * 60 * 60, # Time To Live (TTL) in seconds to keep job failure information
|
||||
DEFAULT_JOB_TTL=10 * 60, # Time To Live (TTL) in seconds to keep job information
|
||||
DEFAULT_JOB_TIMEOUT=30 * 60, # timeout (seconds) for a job
|
||||
# General configuration values
|
||||
DEFAULT_WORKER_TTL=10 * 60, # Time To Live (TTL) in seconds to keep worker information after last heartbeat
|
||||
DEFAULT_MAINTENANCE_TASK_INTERVAL=10 * 60, # The interval to run maintenance tasks in seconds. 10 minutes.
|
||||
DEFAULT_JOB_MONITORING_INTERVAL=30, # The interval to monitor jobs in seconds.
|
||||
SCHEDULER_FALLBACK_PERIOD_SECS=120, # Period (secs) to wait before requiring to reacquire locks
|
||||
)
|
||||
|
||||
SCHEDULER_QUEUES: Dict[str, QueueConfiguration] = {
|
||||
'default': QueueConfiguration(
|
||||
HOST = os.environ.get("REDIS_HOST", "localhost"),
|
||||
PORT = os.environ.get("REDIS_PORT", 6379),
|
||||
DB = os.environ.get("REDIS_DB", 0),
|
||||
),
|
||||
'high': QueueConfiguration(
|
||||
HOST = os.environ.get("REDIS_HOST", "localhost"),
|
||||
PORT = os.environ.get("REDIS_PORT", 6379),
|
||||
DB = os.environ.get("REDIS_DB", 0),
|
||||
),
|
||||
'low': QueueConfiguration(
|
||||
HOST = os.environ.get("REDIS_HOST", "localhost"),
|
||||
PORT = os.environ.get("REDIS_PORT", 6379),
|
||||
DB = os.environ.get("REDIS_DB", 0),
|
||||
),
|
||||
}
|
||||
'''
|
||||
|
||||
SCHEDULER_QUEUES = {
|
||||
'default': {
|
||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
||||
'DB': os.environ.get("REDIS_DB", 0),
|
||||
},
|
||||
'high': {
|
||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
||||
'DB': os.environ.get("REDIS_DB", 0),
|
||||
},
|
||||
'low': {
|
||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
||||
'DB': os.environ.get("REDIS_DB", 0),
|
||||
}
|
||||
}
|
||||
SCHEDULER_CONFIG = {
|
||||
'DEFAULT_TIMEOUT': os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # 30 minutes
|
||||
'DEFAULT_RESULT_TTL': 60*60*12, # 12 hours
|
||||
'EXECUTIONS_IN_PAGE': 20,
|
||||
'SCHEDULER_INTERVAL': 10, # 10 seconds
|
||||
# 'default': QueueConfiguration(URL='redis://localhost:6379/0'),
|
||||
'default': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
|
||||
'high': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
|
||||
'low': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -43,7 +43,6 @@ class DB_Handler():
|
||||
UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
|
||||
else:
|
||||
# Add object to insert
|
||||
# url_object_to_insert.append(Urls(url=url))
|
||||
urls_to_insert.append(url)
|
||||
|
||||
### Insert URLs & (URL_id, source_id)
|
||||
|
||||
@@ -9,4 +9,6 @@ else
|
||||
python manage.py createsuperuser --noinput
|
||||
python manage.py collectstatic --no-input
|
||||
python manage.py import --filename scheduled_tasks.json
|
||||
#
|
||||
# python manage.py inspectdb # Debugging model
|
||||
fi
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
django==5.1
|
||||
django-tasks-scheduler==3.0.1
|
||||
django-tasks-scheduler==4.0.4
|
||||
django-redis
|
||||
psycopg[binary]
|
||||
gunicorn
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
if [ "${DJANGO_DEBUG}" = true ] | [ "${DJANGO_DEBUG}" == "True" ]; then
|
||||
echo "Running in DEBUG mode"
|
||||
gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
|
||||
gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 600 & python manage.py scheduler_worker high default low
|
||||
else
|
||||
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
|
||||
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 600 & python manage.py scheduler_worker high default low
|
||||
fi
|
||||
|
||||
119
docker-compose-prod.yml
Normal file
119
docker-compose-prod.yml
Normal file
@@ -0,0 +1,119 @@
|
||||
version: '3.9'
|
||||
|
||||
services:
|
||||
|
||||
fetcher_app_selenium:
|
||||
image: fetcher_app_selenium
|
||||
build:
|
||||
context: ./app_selenium
|
||||
args:
|
||||
- ARCH=${ARCH} # arm64, amd64
|
||||
container_name: fetcher_app_selenium
|
||||
restart: unless-stopped
|
||||
shm_size: 512mb
|
||||
environment:
|
||||
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
ports:
|
||||
- 80
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_app_urls:
|
||||
image: fetcher_app_urls
|
||||
build:
|
||||
context: ./app_urls
|
||||
container_name: fetcher_app_urls
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
# Initialization
|
||||
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
|
||||
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
|
||||
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
|
||||
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
|
||||
# Django
|
||||
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
|
||||
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
|
||||
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
|
||||
- DJANGO_DEBUG=${DJANGO_DEBUG}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
# Database
|
||||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
- REDIS_HOST=${REDIS_HOST}
|
||||
- REDIS_PORT=${REDIS_PORT}
|
||||
# Job timeout: 30 min
|
||||
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
|
||||
# Fetcher
|
||||
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
|
||||
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
|
||||
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP} # Sleep time between each search
|
||||
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP} # Sleep time between requests to same URL host
|
||||
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR} # Min amonut of characters to run language detection
|
||||
- FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
|
||||
- FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
|
||||
# Selenium
|
||||
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
|
||||
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
|
||||
# Ghost
|
||||
- GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
|
||||
- GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
|
||||
- PEXELS_API_KEY=${PEXELS_API_KEY}
|
||||
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
|
||||
########################
|
||||
#volumes: # Development mode
|
||||
# - ./app_urls:/opt/app
|
||||
########################
|
||||
ports:
|
||||
- 8000:8000
|
||||
depends_on:
|
||||
- fetcher_db
|
||||
- fetcher_redis
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_db:
|
||||
container_name: fetcher_db
|
||||
image: alpine:latest
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
volumes:
|
||||
# REQUIREMENT: Add fetcher's SSH public key into the DB's .ssh/authorized_keys machine
|
||||
- ~/.ssh:/root/.ssh:ro
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
apk add --update openssh autossh
|
||||
autossh -M 15885 -N -o 'GatewayPorts yes' -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
|
||||
### Alternative:
|
||||
### autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||
### -M 15882 monitors on port X, if already being used conflict!
|
||||
###autossh -M 15882 -N -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||
###ssh -N -o "StrictHostKeyChecking no" -o "ServerAliveInterval 60" -o "ServerAliveCountMax 3" -o 'PasswordAuthentication no' -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||
network_mode: "host"
|
||||
|
||||
fetcher_redis:
|
||||
image: redis:alpine
|
||||
container_name: fetcher_redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 6379 #:6379
|
||||
Reference in New Issue
Block a user