django tasks scheduler update, .env and docker compose towards fetcher sca
This commit is contained in:
4
.env
4
.env
@@ -1,3 +1,7 @@
|
|||||||
|
# AutoSSH DB
|
||||||
|
REMOTE_HOST=''
|
||||||
|
REMOTE_USERNAME=''
|
||||||
|
|
||||||
# Initialization
|
# Initialization
|
||||||
INITIALIZE_DB=true
|
INITIALIZE_DB=true
|
||||||
DJANGO_SUPERUSER_USERNAME=matitos
|
DJANGO_SUPERUSER_USERNAME=matitos
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
|
.env
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
**/credentials.py
|
**/credentials.py
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ https://docs.djangoproject.com/en/5.1/ref/settings/
|
|||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import os
|
import os
|
||||||
|
from typing import Dict
|
||||||
|
from scheduler.types import SchedulerConfiguration, Broker, QueueConfiguration
|
||||||
|
|
||||||
|
|
||||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
@@ -107,57 +110,27 @@ CACHES = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
'''
|
|
||||||
from scheduler.types import SchedulerConfiguration, QueueConfiguration, Broker
|
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
# https://django-tasks-scheduler.readthedocs.io/en/latest/configuration/
|
|
||||||
SCHEDULER_CONFIG = SchedulerConfiguration(
|
SCHEDULER_CONFIG = SchedulerConfiguration(
|
||||||
DEFAULT_JOB_TIMEOUT = os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # 30 minutes
|
EXECUTIONS_IN_PAGE=20,
|
||||||
|
SCHEDULER_INTERVAL=10,
|
||||||
BROKER=Broker.REDIS,
|
BROKER=Broker.REDIS,
|
||||||
|
CALLBACK_TIMEOUT=60, # Callback timeout in seconds (success/failure/stopped)
|
||||||
|
# Default values, can be overriden per task/job
|
||||||
|
DEFAULT_SUCCESS_TTL=10 * 60, # Time To Live (TTL) in seconds to keep successful job results
|
||||||
|
DEFAULT_FAILURE_TTL=365 * 24 * 60 * 60, # Time To Live (TTL) in seconds to keep job failure information
|
||||||
|
DEFAULT_JOB_TTL=10 * 60, # Time To Live (TTL) in seconds to keep job information
|
||||||
|
DEFAULT_JOB_TIMEOUT=30 * 60, # timeout (seconds) for a job
|
||||||
|
# General configuration values
|
||||||
|
DEFAULT_WORKER_TTL=10 * 60, # Time To Live (TTL) in seconds to keep worker information after last heartbeat
|
||||||
|
DEFAULT_MAINTENANCE_TASK_INTERVAL=10 * 60, # The interval to run maintenance tasks in seconds. 10 minutes.
|
||||||
|
DEFAULT_JOB_MONITORING_INTERVAL=30, # The interval to monitor jobs in seconds.
|
||||||
|
SCHEDULER_FALLBACK_PERIOD_SECS=120, # Period (secs) to wait before requiring to reacquire locks
|
||||||
)
|
)
|
||||||
|
|
||||||
SCHEDULER_QUEUES: Dict[str, QueueConfiguration] = {
|
SCHEDULER_QUEUES: Dict[str, QueueConfiguration] = {
|
||||||
'default': QueueConfiguration(
|
# 'default': QueueConfiguration(URL='redis://localhost:6379/0'),
|
||||||
HOST = os.environ.get("REDIS_HOST", "localhost"),
|
'default': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
|
||||||
PORT = os.environ.get("REDIS_PORT", 6379),
|
'high': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
|
||||||
DB = os.environ.get("REDIS_DB", 0),
|
'low': QueueConfiguration(URL='redis://{}:{}/{}'.format(os.environ.get("REDIS_HOST", "localhost"), os.environ.get("REDIS_PORT", 6379), os.environ.get("REDIS_DB", 0))),
|
||||||
),
|
|
||||||
'high': QueueConfiguration(
|
|
||||||
HOST = os.environ.get("REDIS_HOST", "localhost"),
|
|
||||||
PORT = os.environ.get("REDIS_PORT", 6379),
|
|
||||||
DB = os.environ.get("REDIS_DB", 0),
|
|
||||||
),
|
|
||||||
'low': QueueConfiguration(
|
|
||||||
HOST = os.environ.get("REDIS_HOST", "localhost"),
|
|
||||||
PORT = os.environ.get("REDIS_PORT", 6379),
|
|
||||||
DB = os.environ.get("REDIS_DB", 0),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
'''
|
|
||||||
|
|
||||||
SCHEDULER_QUEUES = {
|
|
||||||
'default': {
|
|
||||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
|
||||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
|
||||||
'DB': os.environ.get("REDIS_DB", 0),
|
|
||||||
},
|
|
||||||
'high': {
|
|
||||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
|
||||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
|
||||||
'DB': os.environ.get("REDIS_DB", 0),
|
|
||||||
},
|
|
||||||
'low': {
|
|
||||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
|
||||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
|
||||||
'DB': os.environ.get("REDIS_DB", 0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SCHEDULER_CONFIG = {
|
|
||||||
'DEFAULT_TIMEOUT': os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # 30 minutes
|
|
||||||
'DEFAULT_RESULT_TTL': 60*60*12, # 12 hours
|
|
||||||
'EXECUTIONS_IN_PAGE': 20,
|
|
||||||
'SCHEDULER_INTERVAL': 10, # 10 seconds
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ class DB_Handler():
|
|||||||
UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
|
UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
|
||||||
else:
|
else:
|
||||||
# Add object to insert
|
# Add object to insert
|
||||||
# url_object_to_insert.append(Urls(url=url))
|
|
||||||
urls_to_insert.append(url)
|
urls_to_insert.append(url)
|
||||||
|
|
||||||
### Insert URLs & (URL_id, source_id)
|
### Insert URLs & (URL_id, source_id)
|
||||||
|
|||||||
@@ -9,4 +9,6 @@ else
|
|||||||
python manage.py createsuperuser --noinput
|
python manage.py createsuperuser --noinput
|
||||||
python manage.py collectstatic --no-input
|
python manage.py collectstatic --no-input
|
||||||
python manage.py import --filename scheduled_tasks.json
|
python manage.py import --filename scheduled_tasks.json
|
||||||
|
#
|
||||||
|
# python manage.py inspectdb # Debugging model
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
django==5.1
|
django==5.1
|
||||||
django-tasks-scheduler==3.0.1
|
django-tasks-scheduler==4.0.4
|
||||||
django-redis
|
django-redis
|
||||||
psycopg[binary]
|
psycopg[binary]
|
||||||
gunicorn
|
gunicorn
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
if [ "${DJANGO_DEBUG}" = true ] | [ "${DJANGO_DEBUG}" == "True" ]; then
|
if [ "${DJANGO_DEBUG}" = true ] | [ "${DJANGO_DEBUG}" == "True" ]; then
|
||||||
echo "Running in DEBUG mode"
|
echo "Running in DEBUG mode"
|
||||||
gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
|
gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 600 & python manage.py scheduler_worker high default low
|
||||||
else
|
else
|
||||||
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
|
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 600 & python manage.py scheduler_worker high default low
|
||||||
fi
|
fi
|
||||||
|
|||||||
119
docker-compose-prod.yml
Normal file
119
docker-compose-prod.yml
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
version: '3.9'
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
fetcher_app_selenium:
|
||||||
|
image: fetcher_app_selenium
|
||||||
|
build:
|
||||||
|
context: ./app_selenium
|
||||||
|
args:
|
||||||
|
- ARCH=${ARCH} # arm64, amd64
|
||||||
|
container_name: fetcher_app_selenium
|
||||||
|
restart: unless-stopped
|
||||||
|
shm_size: 512mb
|
||||||
|
environment:
|
||||||
|
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||||
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||||
|
ports:
|
||||||
|
- 80
|
||||||
|
dns:
|
||||||
|
- 1.1.1.1
|
||||||
|
- 1.0.0.1
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '${DEPLOY_CPUS}'
|
||||||
|
memory: ${DEPLOY_RAM}
|
||||||
|
|
||||||
|
fetcher_app_urls:
|
||||||
|
image: fetcher_app_urls
|
||||||
|
build:
|
||||||
|
context: ./app_urls
|
||||||
|
container_name: fetcher_app_urls
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
# Initialization
|
||||||
|
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
|
||||||
|
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
|
||||||
|
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
|
||||||
|
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
|
||||||
|
# Django
|
||||||
|
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
|
||||||
|
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
|
||||||
|
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
|
||||||
|
- DJANGO_DEBUG=${DJANGO_DEBUG}
|
||||||
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||||
|
# Database
|
||||||
|
- DB_NAME=${DB_NAME}
|
||||||
|
- DB_USER=${DB_USER}
|
||||||
|
- DB_PASSWORD=${DB_PASSWORD}
|
||||||
|
- DB_HOST=${DB_HOST}
|
||||||
|
- DB_PORT=${DB_PORT}
|
||||||
|
- REDIS_HOST=${REDIS_HOST}
|
||||||
|
- REDIS_PORT=${REDIS_PORT}
|
||||||
|
# Job timeout: 30 min
|
||||||
|
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
|
||||||
|
# Fetcher
|
||||||
|
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
|
||||||
|
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
|
||||||
|
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP} # Sleep time between each search
|
||||||
|
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP} # Sleep time between requests to same URL host
|
||||||
|
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR} # Min amonut of characters to run language detection
|
||||||
|
- FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
|
||||||
|
- FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
|
||||||
|
# Selenium
|
||||||
|
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
|
||||||
|
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
|
||||||
|
# Ghost
|
||||||
|
- GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
|
||||||
|
- GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
|
||||||
|
- PEXELS_API_KEY=${PEXELS_API_KEY}
|
||||||
|
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
|
||||||
|
########################
|
||||||
|
#volumes: # Development mode
|
||||||
|
# - ./app_urls:/opt/app
|
||||||
|
########################
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
||||||
|
depends_on:
|
||||||
|
- fetcher_db
|
||||||
|
- fetcher_redis
|
||||||
|
dns:
|
||||||
|
- 1.1.1.1
|
||||||
|
- 1.0.0.1
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '${DEPLOY_CPUS}'
|
||||||
|
memory: ${DEPLOY_RAM}
|
||||||
|
|
||||||
|
fetcher_db:
|
||||||
|
container_name: fetcher_db
|
||||||
|
image: alpine:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 256M
|
||||||
|
volumes:
|
||||||
|
# REQUIREMENT: Add fetcher's SSH public key into the DB's .ssh/authorized_keys machine
|
||||||
|
- ~/.ssh:/root/.ssh:ro
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
apk add --update openssh autossh
|
||||||
|
autossh -M 15885 -N -o 'GatewayPorts yes' -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
|
||||||
|
### Alternative:
|
||||||
|
### autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||||
|
### -M 15882 monitors on port X, if already being used conflict!
|
||||||
|
###autossh -M 15882 -N -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||||
|
###ssh -N -o "StrictHostKeyChecking no" -o "ServerAliveInterval 60" -o "ServerAliveCountMax 3" -o 'PasswordAuthentication no' -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||||
|
network_mode: "host"
|
||||||
|
|
||||||
|
fetcher_redis:
|
||||||
|
image: redis:alpine
|
||||||
|
container_name: fetcher_redis
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- 6379 #:6379
|
||||||
Reference in New Issue
Block a user