docker env vars, selenium docker simplified, favicon, settings clean

This commit is contained in:
Luciano Gervasoni
2025-04-10 10:54:57 +02:00
parent 842f3175df
commit 0cd84496cf
12 changed files with 177 additions and 103 deletions

48
.env
View File

@@ -1 +1,47 @@
# TODO...
# Initialization
INITIALIZE_DB=true
DJANGO_SUPERUSER_USERNAME=matitos
DJANGO_SUPERUSER_PASSWORD=matitos
DJANGO_SUPERUSER_EMAIL=matitos@matitos.org
# Reverse proxy
REVERSE_PROXY_URL=fetcher.matitos.org
# Django
DJANGO_ALLOWED_ORIGINS=https://fetcher.matitos.org # Reverse proxy
DJANGO_ALLOWED_HOSTS=* # host1,host2
DJANGO_SECRET_KEY=EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkzN7dXVUsMSqy6a5rjY6WNCw3CcRH5
# DJANGO_DEBUG=False
DJANGO_DEBUG=True
PATH_LOGS_DIRECTORY=/opt/logs
# Database
DB_NAME=matitos
DB_PASSWORD=supermatitos
DB_USER=supermatitos
PATH_DB_DATA=.
# Database: Django
DB_HOST=fetcher_db
DB_PORT=5432
REDIS_HOST=fetcher_redis
REDIS_PORT=6379
# Job timeout: 30 min
JOB_DEFAULT_TIMEOUT=1800
# Fetcher
FETCHER_GNEWS_DECODE_SLEEP=1.5
FETCHER_URL_HOST_SLEEP=1.5
FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=5
FETCHER_BETWEEN_SEARCHES_SLEEP=1
FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
# Selenium
SELENIUM_ENDPOINT=http://fetcher_app_selenium:80
ENDPOINT_OLLAMA=https://ollamamodel.matitos.org
# APP: Selenium
ARCH=amd64 # arm64, amd64
SELENIUM_SLEEP_PER_PAGE=4
PATH_LOGS_DIRECTORY=/opt/logs

View File

@@ -46,3 +46,12 @@
- Related to child abuse?
- ...
- Merge similar articles?
# Deploy
* Dev mode
```
docker compose -f docker-compose-dev.yml down -v
docker compose -f docker-compose-dev.yml build --progress=plain
docker compose -f docker-compose-dev.yml up
```

View File

@@ -1,18 +1,22 @@
FROM python:3.12
# Architecture: amd64
#ARG ARCH_G=linux64
#ARG ARCH_F=linux-x86_64
# Architecture: arm64
ARG ARCH_G=linux-aarch64
ARG ARCH_F=linux-aarch64
# Architecture: amd64 or arm64
#ARCH=arm64
#ARCH=amd64
ARG ARCH
ARG firefox_ver=137.0
ARG geckodriver_ver=0.36.0
RUN echo "Architecture build: $ARCH"
RUN apt-get update \
RUN if [ "${ARCH}" = "amd64" ] ; then \
ARCH_G="linux64"; ARCH_F="linux-x86_64"; \
else \
ARCH_G="linux-aarch64"; ARCH_F="linux-aarch64"; \
fi \
&& apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends --no-install-suggests \
ca-certificates \

View File

@@ -75,7 +75,7 @@ class Meta:
* Deploy
```
# Check environments variables on docker-compose.yml
# Check environments variables on .env file
# Remove previous instances
docker compose down -v

View File

@@ -20,17 +20,13 @@ BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY", 'django-insecure-54mqLbW5NlO8OlVDsT3fcbg3Vf6C8Fgcoj8H0hXv3Pr8bpgqvOuiaeqvGn34sGwt')
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY", 'django-insecure-EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkzN7dXVUsMSqy6a5rjY6WNCw3CcRH5')
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = (os.environ.get('DJANGO_DEBUG') == "True")
ALLOWED_HOSTS = os.environ.get('DJANGO_ALLOWED_HOSTS', "*").split(",")
CSRF_TRUSTED_ORIGINS = os.environ.get('DJANGO_ALLOWED_ORIGINS', "*").split(",")
#CSRF_TRUSTED_ORIGINS = ["https://fetcher.matitos.org"]
#CSRF_ALLOWED_ORIGINS = ["https://fetcher.matitos.org"]
#CORS_ORIGINS_WHITELIST = ["https://fetcher.matitos.org"]
# Application definition
@@ -55,6 +51,7 @@ MIDDLEWARE = [
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'fetcher.middleware.login_required.LoginRequiredMiddleware',
'fetcher.middleware.favicon.FaviconMiddleware',
]
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'

View File

@@ -0,0 +1,23 @@
from django.utils.deprecation import MiddlewareMixin
'''
class FaviconMiddleware(MiddlewareMixin):
def process_response(self, request, response):
if 'text/html' in response.get('Content-Type', '') and b'</head>' in response.content:
icon_link = b'<link rel="icon" type="image/png" href="/static/img/mate-icon.png">\n'
response.content = response.content.replace(b'</head>', icon_link + b'</head>')
return response
'''
class FaviconMiddleware(MiddlewareMixin):
def process_response(self, request, response):
if 'text/html' in response.get('Content-Type', '') and b'</head>' in response.content:
icon_link = (
b"<link rel='icon' href=\"data:image/svg+xml,"
b"<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 120 120'>"
b"<text y='96' font-size='96'>\xf0\x9f\xa7\x89</text></svg>\">"
b"\n"
)
# (UTF-8 encoded 🧉 = \xf0\x9f\xa7\x89 in bytes)
response.content = response.content.replace(b'</head>', icon_link + b'</head>')
return response

View File

@@ -3,12 +3,10 @@ from django.conf import settings
from django.urls import reverse
EXEMPT_URLS = [
# reverse('login'), # or the name of your login view
reverse('admin:login'),
reverse('admin:index'),
# reverse('logout'), # optional
'/admin/', # allow full access to admin
settings.STATIC_URL, # allow static files
'/admin/', # Allow full access to admin
settings.STATIC_URL, # Allow static files
# path('scheduler/', include('scheduler.urls')),
]

View File

@@ -295,7 +295,6 @@ def filtered_urls(request):
####################################################################################################
def content_generation(request):
# https://fetcher.matitos.org/urls/?per_page=100&days=1&valid_content=True&min_sources=1&search=13&status=all&language=all&source=all
'''
# Get list of URLs ID
selected_urls = request.GET.getlist('urls', [])

View File

@@ -21,8 +21,17 @@ def link_list(request):
[ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
[ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] + \
[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
# Json
return JsonResponse({"links": list_links })
# Links tuple
links = [(l, l) for l in list_links]
# HTML
html = "<html><head><title>Links</title></head><body><h1>Links</h1><ul>"
for text, url in links:
html += f'<li><a href="{url}" target="_blank">{text}</a></li>'
html += "</ul></body></html>"
return HttpResponse(html)
####################################################################################################
def logs(request, log_type):

View File

@@ -9,7 +9,7 @@
"queue": "low",
"repeat": null,
"at_front": false,
"timeout": null,
"timeout": 1800,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",
@@ -30,7 +30,7 @@
"queue": "low",
"repeat": null,
"at_front": false,
"timeout": null,
"timeout": 1800,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",
@@ -51,7 +51,7 @@
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"timeout": 1800,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",
@@ -72,7 +72,7 @@
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": 3600,
"timeout": 7200,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",
@@ -93,7 +93,7 @@
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"timeout": 1800,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",
@@ -114,7 +114,7 @@
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"timeout": 3600,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",
@@ -156,7 +156,7 @@
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"timeout": 1800,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",
@@ -177,7 +177,7 @@
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": 3600,
"timeout": 7200,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00",

View File

@@ -1,7 +1,5 @@
version: '3.9'
# docker compose -f docker-compose-dev.yml down -v; docker compose -f docker-compose-dev.yml up -d --build; docker logs fetcher_app_urls -f
services:
fetcher_app_selenium:
@@ -9,18 +7,13 @@ services:
build:
context: ./app_selenium
args:
# arm64
#- ARCH_G=linux-aarch64
#- ARCH_F=linux-aarch64
# amd64
- ARCH_G=linux64
- ARCH_F=linux-x86_64
- ARCH=${ARCH} # arm64, amd64
container_name: fetcher_app_selenium
restart: unless-stopped
shm_size: 512mb
environment:
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
ports:
- 80:80
dns:
@@ -40,35 +33,35 @@ services:
restart: unless-stopped
environment:
# Initialization
- INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
# Django
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
- DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
- DJANGO_DEBUG=${DJANGO_DEBUG:-True}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
- DJANGO_DEBUG=${DJANGO_DEBUG}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
# Database
- DB_NAME=${DB_NAME:-matitos}
- DB_USER=${DB_USER:-supermatitos}
- DB_PASSWORD=${DB_PASSWORD:-supermatitos}
- DB_HOST=${DB_HOST:-fetcher_db}
- DB_PORT=${DB_PORT:-5432}
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
- REDIS_PORT=${REDIS_PORT:-6379}
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
- REDIS_HOST=${REDIS_HOST}
- REDIS_PORT=${REDIS_PORT}
# Job timeout: 30 min
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
# Fetcher
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-1.5}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1}
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-1.5}
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP}
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP}
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
# Selenium
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
########################
volumes: # Development mode
- ./app_urls:/opt/app
@@ -88,9 +81,10 @@ services:
memory: 4G
#labels: # Reverse proxy sample
# - "traefik.enable=true"
# - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
# - "traefik.http.routers.fetcher.rule=Host(`urls.yourdomain.com`)"
# - "traefik.http.routers.fetcher.entrypoints=websecure"
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
# - "traefik.http.services.fetcher.loadbalancer.server.port=8000"
#networks:
# - default # This network
# - docker_default # Reverse proxy network
@@ -102,12 +96,12 @@ services:
# Set shared memory limit when using docker-compose
shm_size: 128mb
environment:
POSTGRES_DB: ${DB_NAME:-matitos}
POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
POSTGRES_USER: ${DB_USER:-supermatitos}
POSTGRES_DB: ${DB_NAME}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_USER: ${DB_USER}
POSTGRES_INITDB_ARGS: '--data-checksums'
#volumes: # Persistent DB?
# - ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data
# - ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
ports:
- 5432 #:5432

View File

@@ -7,18 +7,13 @@ services:
build:
context: ./app_selenium
args:
# arm64
- ARCH_G=linux-aarch64
- ARCH_F=linux-aarch64
# amd64
#- ARCH_G=linux64
#- ARCH_F=linux-x86_64
- ARCH=${ARCH} # arm64, amd64
container_name: fetcher_app_selenium
restart: unless-stopped
shm_size: 512mb
environment:
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
ports:
- 80
dns:
@@ -38,35 +33,35 @@ services:
restart: unless-stopped
environment:
# Initialization
- INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
# Django
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
- DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
- DJANGO_DEBUG=${DJANGO_DEBUG:-False}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
- DJANGO_DEBUG=${DJANGO_DEBUG}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
# Database
- DB_NAME=${DB_NAME:-matitos}
- DB_USER=${DB_USER:-supermatitos}
- DB_PASSWORD=${DB_PASSWORD:-supermatitos}
- DB_HOST=${DB_HOST:-fetcher_db}
- DB_PORT=${DB_PORT:-5432}
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
- REDIS_PORT=${REDIS_PORT:-6379}
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
- REDIS_HOST=${REDIS_HOST}
- REDIS_PORT=${REDIS_PORT}
# Job timeout: 30 min
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
# Fetcher
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-1.5}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1}
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-1.5}
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP}
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP}
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
# Selenium
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
########################
#volumes: # Development mode
# - ./app_urls:/opt/app
@@ -86,7 +81,7 @@ services:
memory: 4G
labels: # Reverse proxy sample
- "traefik.enable=true"
- "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
- "traefik.http.routers.fetcher.rule=Host(`${REVERSE_PROXY_URL}`)"
- "traefik.http.routers.fetcher.entrypoints=websecure"
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
- "traefik.http.services.fetcher.loadbalancer.server.port=8000"
@@ -101,12 +96,12 @@ services:
# Set shared memory limit when using docker-compose
shm_size: 128mb
environment:
POSTGRES_DB: ${DB_NAME:-matitos}
POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
POSTGRES_USER: ${DB_USER:-supermatitos}
POSTGRES_DB: ${DB_NAME}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_USER: ${DB_USER}
POSTGRES_INITDB_ARGS: '--data-checksums'
volumes: # Persistent DB?
- ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data
- ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
ports:
- 5432 #:5432