docker env vars, selenium docker simplified, favicon, settings clean
This commit is contained in:
48
.env
48
.env
@@ -1 +1,47 @@
|
|||||||
# TODO...
|
# Initialization
|
||||||
|
INITIALIZE_DB=true
|
||||||
|
DJANGO_SUPERUSER_USERNAME=matitos
|
||||||
|
DJANGO_SUPERUSER_PASSWORD=matitos
|
||||||
|
DJANGO_SUPERUSER_EMAIL=matitos@matitos.org
|
||||||
|
|
||||||
|
# Reverse proxy
|
||||||
|
REVERSE_PROXY_URL=fetcher.matitos.org
|
||||||
|
|
||||||
|
# Django
|
||||||
|
DJANGO_ALLOWED_ORIGINS=https://fetcher.matitos.org # Reverse proxy
|
||||||
|
DJANGO_ALLOWED_HOSTS=* # host1,host2
|
||||||
|
DJANGO_SECRET_KEY=EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkzN7dXVUsMSqy6a5rjY6WNCw3CcRH5
|
||||||
|
# DJANGO_DEBUG=False
|
||||||
|
DJANGO_DEBUG=True
|
||||||
|
PATH_LOGS_DIRECTORY=/opt/logs
|
||||||
|
|
||||||
|
# Database
|
||||||
|
DB_NAME=matitos
|
||||||
|
DB_PASSWORD=supermatitos
|
||||||
|
DB_USER=supermatitos
|
||||||
|
PATH_DB_DATA=.
|
||||||
|
|
||||||
|
# Database: Django
|
||||||
|
DB_HOST=fetcher_db
|
||||||
|
DB_PORT=5432
|
||||||
|
REDIS_HOST=fetcher_redis
|
||||||
|
REDIS_PORT=6379
|
||||||
|
|
||||||
|
# Job timeout: 30 min
|
||||||
|
JOB_DEFAULT_TIMEOUT=1800
|
||||||
|
|
||||||
|
# Fetcher
|
||||||
|
FETCHER_GNEWS_DECODE_SLEEP=1.5
|
||||||
|
FETCHER_URL_HOST_SLEEP=1.5
|
||||||
|
FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=5
|
||||||
|
FETCHER_BETWEEN_SEARCHES_SLEEP=1
|
||||||
|
FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
|
||||||
|
|
||||||
|
# Selenium
|
||||||
|
SELENIUM_ENDPOINT=http://fetcher_app_selenium:80
|
||||||
|
ENDPOINT_OLLAMA=https://ollamamodel.matitos.org
|
||||||
|
|
||||||
|
# APP: Selenium
|
||||||
|
ARCH=amd64 # arm64, amd64
|
||||||
|
SELENIUM_SLEEP_PER_PAGE=4
|
||||||
|
PATH_LOGS_DIRECTORY=/opt/logs
|
||||||
@@ -46,3 +46,12 @@
|
|||||||
- Related to child abuse?
|
- Related to child abuse?
|
||||||
- ...
|
- ...
|
||||||
- Merge similar articles?
|
- Merge similar articles?
|
||||||
|
|
||||||
|
# Deploy
|
||||||
|
|
||||||
|
* Dev mode
|
||||||
|
```
|
||||||
|
docker compose -f docker-compose-dev.yml down -v
|
||||||
|
docker compose -f docker-compose-dev.yml build --progress=plain
|
||||||
|
docker compose -f docker-compose-dev.yml up
|
||||||
|
```
|
||||||
|
|||||||
@@ -1,18 +1,22 @@
|
|||||||
|
|
||||||
FROM python:3.12
|
FROM python:3.12
|
||||||
|
|
||||||
# Architecture: amd64
|
# Architecture: amd64 or arm64
|
||||||
#ARG ARCH_G=linux64
|
#ARCH=arm64
|
||||||
#ARG ARCH_F=linux-x86_64
|
#ARCH=amd64
|
||||||
# Architecture: arm64
|
ARG ARCH
|
||||||
ARG ARCH_G=linux-aarch64
|
|
||||||
ARG ARCH_F=linux-aarch64
|
|
||||||
|
|
||||||
ARG firefox_ver=137.0
|
ARG firefox_ver=137.0
|
||||||
ARG geckodriver_ver=0.36.0
|
ARG geckodriver_ver=0.36.0
|
||||||
|
|
||||||
|
RUN echo "Architecture build: $ARCH"
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN if [ "${ARCH}" = "amd64" ] ; then \
|
||||||
|
ARCH_G="linux64"; ARCH_F="linux-x86_64"; \
|
||||||
|
else \
|
||||||
|
ARCH_G="linux-aarch64"; ARCH_F="linux-aarch64"; \
|
||||||
|
fi \
|
||||||
|
&& apt-get update \
|
||||||
&& apt-get upgrade -y \
|
&& apt-get upgrade -y \
|
||||||
&& apt-get install -y --no-install-recommends --no-install-suggests \
|
&& apt-get install -y --no-install-recommends --no-install-suggests \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ class Meta:
|
|||||||
|
|
||||||
* Deploy
|
* Deploy
|
||||||
```
|
```
|
||||||
# Check environments variables on docker-compose.yml
|
# Check environments variables on .env file
|
||||||
|
|
||||||
# Remove previous instances
|
# Remove previous instances
|
||||||
docker compose down -v
|
docker compose down -v
|
||||||
|
|||||||
@@ -20,17 +20,13 @@ BASE_DIR = Path(__file__).resolve().parent.parent
|
|||||||
# Quick-start development settings - unsuitable for production
|
# Quick-start development settings - unsuitable for production
|
||||||
|
|
||||||
# SECURITY WARNING: keep the secret key used in production secret!
|
# SECURITY WARNING: keep the secret key used in production secret!
|
||||||
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY", 'django-insecure-54mqLbW5NlO8OlVDsT3fcbg3Vf6C8Fgcoj8H0hXv3Pr8bpgqvOuiaeqvGn34sGwt')
|
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY", 'django-insecure-EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkzN7dXVUsMSqy6a5rjY6WNCw3CcRH5')
|
||||||
|
|
||||||
# SECURITY WARNING: don't run with debug turned on in production!
|
# SECURITY WARNING: don't run with debug turned on in production!
|
||||||
DEBUG = (os.environ.get('DJANGO_DEBUG') == "True")
|
DEBUG = (os.environ.get('DJANGO_DEBUG') == "True")
|
||||||
|
|
||||||
ALLOWED_HOSTS = os.environ.get('DJANGO_ALLOWED_HOSTS', "*").split(",")
|
ALLOWED_HOSTS = os.environ.get('DJANGO_ALLOWED_HOSTS', "*").split(",")
|
||||||
|
|
||||||
CSRF_TRUSTED_ORIGINS = os.environ.get('DJANGO_ALLOWED_ORIGINS', "*").split(",")
|
CSRF_TRUSTED_ORIGINS = os.environ.get('DJANGO_ALLOWED_ORIGINS', "*").split(",")
|
||||||
#CSRF_TRUSTED_ORIGINS = ["https://fetcher.matitos.org"]
|
|
||||||
#CSRF_ALLOWED_ORIGINS = ["https://fetcher.matitos.org"]
|
|
||||||
#CORS_ORIGINS_WHITELIST = ["https://fetcher.matitos.org"]
|
|
||||||
|
|
||||||
# Application definition
|
# Application definition
|
||||||
|
|
||||||
@@ -55,6 +51,7 @@ MIDDLEWARE = [
|
|||||||
'django.contrib.messages.middleware.MessageMiddleware',
|
'django.contrib.messages.middleware.MessageMiddleware',
|
||||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||||
'fetcher.middleware.login_required.LoginRequiredMiddleware',
|
'fetcher.middleware.login_required.LoginRequiredMiddleware',
|
||||||
|
'fetcher.middleware.favicon.FaviconMiddleware',
|
||||||
]
|
]
|
||||||
|
|
||||||
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
|
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
|
||||||
|
|||||||
23
app_urls/fetcher/middleware/favicon.py
Normal file
23
app_urls/fetcher/middleware/favicon.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from django.utils.deprecation import MiddlewareMixin
|
||||||
|
|
||||||
|
'''
|
||||||
|
class FaviconMiddleware(MiddlewareMixin):
|
||||||
|
def process_response(self, request, response):
|
||||||
|
if 'text/html' in response.get('Content-Type', '') and b'</head>' in response.content:
|
||||||
|
icon_link = b'<link rel="icon" type="image/png" href="/static/img/mate-icon.png">\n'
|
||||||
|
response.content = response.content.replace(b'</head>', icon_link + b'</head>')
|
||||||
|
return response
|
||||||
|
'''
|
||||||
|
|
||||||
|
class FaviconMiddleware(MiddlewareMixin):
|
||||||
|
def process_response(self, request, response):
|
||||||
|
if 'text/html' in response.get('Content-Type', '') and b'</head>' in response.content:
|
||||||
|
icon_link = (
|
||||||
|
b"<link rel='icon' href=\"data:image/svg+xml,"
|
||||||
|
b"<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 120 120'>"
|
||||||
|
b"<text y='96' font-size='96'>\xf0\x9f\xa7\x89</text></svg>\">"
|
||||||
|
b"\n"
|
||||||
|
)
|
||||||
|
# (UTF-8 encoded 🧉 = \xf0\x9f\xa7\x89 in bytes)
|
||||||
|
response.content = response.content.replace(b'</head>', icon_link + b'</head>')
|
||||||
|
return response
|
||||||
@@ -3,12 +3,10 @@ from django.conf import settings
|
|||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
EXEMPT_URLS = [
|
EXEMPT_URLS = [
|
||||||
# reverse('login'), # or the name of your login view
|
|
||||||
reverse('admin:login'),
|
reverse('admin:login'),
|
||||||
reverse('admin:index'),
|
reverse('admin:index'),
|
||||||
# reverse('logout'), # optional
|
'/admin/', # Allow full access to admin
|
||||||
'/admin/', # allow full access to admin
|
settings.STATIC_URL, # Allow static files
|
||||||
settings.STATIC_URL, # allow static files
|
|
||||||
# path('scheduler/', include('scheduler.urls')),
|
# path('scheduler/', include('scheduler.urls')),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -295,7 +295,6 @@ def filtered_urls(request):
|
|||||||
####################################################################################################
|
####################################################################################################
|
||||||
|
|
||||||
def content_generation(request):
|
def content_generation(request):
|
||||||
# https://fetcher.matitos.org/urls/?per_page=100&days=1&valid_content=True&min_sources=1&search=13&status=all&language=all&source=all
|
|
||||||
'''
|
'''
|
||||||
# Get list of URLs ID
|
# Get list of URLs ID
|
||||||
selected_urls = request.GET.getlist('urls', [])
|
selected_urls = request.GET.getlist('urls', [])
|
||||||
|
|||||||
@@ -21,8 +21,17 @@ def link_list(request):
|
|||||||
[ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
|
[ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
|
||||||
[ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] + \
|
[ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] + \
|
||||||
[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
|
[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
|
||||||
# Json
|
|
||||||
return JsonResponse({"links": list_links })
|
# Links tuple
|
||||||
|
links = [(l, l) for l in list_links]
|
||||||
|
# HTML
|
||||||
|
html = "<html><head><title>Links</title></head><body><h1>Links</h1><ul>"
|
||||||
|
for text, url in links:
|
||||||
|
html += f'<li><a href="{url}" target="_blank">{text}</a></li>'
|
||||||
|
html += "</ul></body></html>"
|
||||||
|
|
||||||
|
return HttpResponse(html)
|
||||||
|
|
||||||
|
|
||||||
####################################################################################################
|
####################################################################################################
|
||||||
def logs(request, log_type):
|
def logs(request, log_type):
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"queue": "low",
|
"queue": "low",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": null,
|
"timeout": 1800,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
@@ -30,7 +30,7 @@
|
|||||||
"queue": "low",
|
"queue": "low",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": null,
|
"timeout": 1800,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
@@ -51,7 +51,7 @@
|
|||||||
"queue": "default",
|
"queue": "default",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": null,
|
"timeout": 1800,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
@@ -72,7 +72,7 @@
|
|||||||
"queue": "default",
|
"queue": "default",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": 3600,
|
"timeout": 7200,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
@@ -93,7 +93,7 @@
|
|||||||
"queue": "default",
|
"queue": "default",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": null,
|
"timeout": 1800,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
@@ -114,7 +114,7 @@
|
|||||||
"queue": "default",
|
"queue": "default",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": null,
|
"timeout": 3600,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
@@ -156,7 +156,7 @@
|
|||||||
"queue": "default",
|
"queue": "default",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": null,
|
"timeout": 1800,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
@@ -177,7 +177,7 @@
|
|||||||
"queue": "default",
|
"queue": "default",
|
||||||
"repeat": null,
|
"repeat": null,
|
||||||
"at_front": false,
|
"at_front": false,
|
||||||
"timeout": 3600,
|
"timeout": 7200,
|
||||||
"result_ttl": 86400,
|
"result_ttl": 86400,
|
||||||
"cron_string": null,
|
"cron_string": null,
|
||||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
version: '3.9'
|
version: '3.9'
|
||||||
|
|
||||||
# docker compose -f docker-compose-dev.yml down -v; docker compose -f docker-compose-dev.yml up -d --build; docker logs fetcher_app_urls -f
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
|
||||||
fetcher_app_selenium:
|
fetcher_app_selenium:
|
||||||
@@ -9,18 +7,13 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: ./app_selenium
|
context: ./app_selenium
|
||||||
args:
|
args:
|
||||||
# arm64
|
- ARCH=${ARCH} # arm64, amd64
|
||||||
#- ARCH_G=linux-aarch64
|
|
||||||
#- ARCH_F=linux-aarch64
|
|
||||||
# amd64
|
|
||||||
- ARCH_G=linux64
|
|
||||||
- ARCH_F=linux-x86_64
|
|
||||||
container_name: fetcher_app_selenium
|
container_name: fetcher_app_selenium
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
shm_size: 512mb
|
shm_size: 512mb
|
||||||
environment:
|
environment:
|
||||||
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
|
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||||
ports:
|
ports:
|
||||||
- 80:80
|
- 80:80
|
||||||
dns:
|
dns:
|
||||||
@@ -40,35 +33,35 @@ services:
|
|||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
environment:
|
environment:
|
||||||
# Initialization
|
# Initialization
|
||||||
- INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence
|
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
|
||||||
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
|
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
|
||||||
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
|
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
|
||||||
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
|
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
|
||||||
# Django
|
# Django
|
||||||
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
|
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
|
||||||
- DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy
|
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
|
||||||
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
|
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
|
||||||
- DJANGO_DEBUG=${DJANGO_DEBUG:-True}
|
- DJANGO_DEBUG=${DJANGO_DEBUG}
|
||||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||||
# Database
|
# Database
|
||||||
- DB_NAME=${DB_NAME:-matitos}
|
- DB_NAME=${DB_NAME}
|
||||||
- DB_USER=${DB_USER:-supermatitos}
|
- DB_USER=${DB_USER}
|
||||||
- DB_PASSWORD=${DB_PASSWORD:-supermatitos}
|
- DB_PASSWORD=${DB_PASSWORD}
|
||||||
- DB_HOST=${DB_HOST:-fetcher_db}
|
- DB_HOST=${DB_HOST}
|
||||||
- DB_PORT=${DB_PORT:-5432}
|
- DB_PORT=${DB_PORT}
|
||||||
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
|
- REDIS_HOST=${REDIS_HOST}
|
||||||
- REDIS_PORT=${REDIS_PORT:-6379}
|
- REDIS_PORT=${REDIS_PORT}
|
||||||
# Job timeout: 30 min
|
# Job timeout: 30 min
|
||||||
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
|
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
|
||||||
# Fetcher
|
# Fetcher
|
||||||
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-1.5}
|
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
|
||||||
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
|
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
|
||||||
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1}
|
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP}
|
||||||
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-1.5}
|
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP}
|
||||||
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
|
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
|
||||||
# Selenium
|
# Selenium
|
||||||
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
|
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
|
||||||
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
|
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
|
||||||
########################
|
########################
|
||||||
volumes: # Development mode
|
volumes: # Development mode
|
||||||
- ./app_urls:/opt/app
|
- ./app_urls:/opt/app
|
||||||
@@ -88,9 +81,10 @@ services:
|
|||||||
memory: 4G
|
memory: 4G
|
||||||
#labels: # Reverse proxy sample
|
#labels: # Reverse proxy sample
|
||||||
# - "traefik.enable=true"
|
# - "traefik.enable=true"
|
||||||
# - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
|
# - "traefik.http.routers.fetcher.rule=Host(`urls.yourdomain.com`)"
|
||||||
# - "traefik.http.routers.fetcher.entrypoints=websecure"
|
# - "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||||
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||||
|
# - "traefik.http.services.fetcher.loadbalancer.server.port=8000"
|
||||||
#networks:
|
#networks:
|
||||||
# - default # This network
|
# - default # This network
|
||||||
# - docker_default # Reverse proxy network
|
# - docker_default # Reverse proxy network
|
||||||
@@ -102,12 +96,12 @@ services:
|
|||||||
# Set shared memory limit when using docker-compose
|
# Set shared memory limit when using docker-compose
|
||||||
shm_size: 128mb
|
shm_size: 128mb
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_DB: ${DB_NAME:-matitos}
|
POSTGRES_DB: ${DB_NAME}
|
||||||
POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
|
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||||
POSTGRES_USER: ${DB_USER:-supermatitos}
|
POSTGRES_USER: ${DB_USER}
|
||||||
POSTGRES_INITDB_ARGS: '--data-checksums'
|
POSTGRES_INITDB_ARGS: '--data-checksums'
|
||||||
#volumes: # Persistent DB?
|
#volumes: # Persistent DB?
|
||||||
# - ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data
|
# - ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
|
||||||
ports:
|
ports:
|
||||||
- 5432 #:5432
|
- 5432 #:5432
|
||||||
|
|
||||||
|
|||||||
@@ -7,18 +7,13 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: ./app_selenium
|
context: ./app_selenium
|
||||||
args:
|
args:
|
||||||
# arm64
|
- ARCH=${ARCH} # arm64, amd64
|
||||||
- ARCH_G=linux-aarch64
|
|
||||||
- ARCH_F=linux-aarch64
|
|
||||||
# amd64
|
|
||||||
#- ARCH_G=linux64
|
|
||||||
#- ARCH_F=linux-x86_64
|
|
||||||
container_name: fetcher_app_selenium
|
container_name: fetcher_app_selenium
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
shm_size: 512mb
|
shm_size: 512mb
|
||||||
environment:
|
environment:
|
||||||
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
|
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||||
ports:
|
ports:
|
||||||
- 80
|
- 80
|
||||||
dns:
|
dns:
|
||||||
@@ -38,35 +33,35 @@ services:
|
|||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
environment:
|
environment:
|
||||||
# Initialization
|
# Initialization
|
||||||
- INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence
|
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
|
||||||
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
|
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
|
||||||
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
|
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
|
||||||
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
|
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
|
||||||
# Django
|
# Django
|
||||||
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
|
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
|
||||||
- DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy
|
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
|
||||||
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
|
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
|
||||||
- DJANGO_DEBUG=${DJANGO_DEBUG:-False}
|
- DJANGO_DEBUG=${DJANGO_DEBUG}
|
||||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||||
# Database
|
# Database
|
||||||
- DB_NAME=${DB_NAME:-matitos}
|
- DB_NAME=${DB_NAME}
|
||||||
- DB_USER=${DB_USER:-supermatitos}
|
- DB_USER=${DB_USER}
|
||||||
- DB_PASSWORD=${DB_PASSWORD:-supermatitos}
|
- DB_PASSWORD=${DB_PASSWORD}
|
||||||
- DB_HOST=${DB_HOST:-fetcher_db}
|
- DB_HOST=${DB_HOST}
|
||||||
- DB_PORT=${DB_PORT:-5432}
|
- DB_PORT=${DB_PORT}
|
||||||
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
|
- REDIS_HOST=${REDIS_HOST}
|
||||||
- REDIS_PORT=${REDIS_PORT:-6379}
|
- REDIS_PORT=${REDIS_PORT}
|
||||||
# Job timeout: 30 min
|
# Job timeout: 30 min
|
||||||
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
|
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
|
||||||
# Fetcher
|
# Fetcher
|
||||||
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-1.5}
|
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
|
||||||
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
|
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
|
||||||
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1}
|
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP}
|
||||||
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-1.5}
|
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP}
|
||||||
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
|
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=100
|
||||||
# Selenium
|
# Selenium
|
||||||
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
|
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
|
||||||
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
|
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
|
||||||
########################
|
########################
|
||||||
#volumes: # Development mode
|
#volumes: # Development mode
|
||||||
# - ./app_urls:/opt/app
|
# - ./app_urls:/opt/app
|
||||||
@@ -86,7 +81,7 @@ services:
|
|||||||
memory: 4G
|
memory: 4G
|
||||||
labels: # Reverse proxy sample
|
labels: # Reverse proxy sample
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
|
- "traefik.http.routers.fetcher.rule=Host(`${REVERSE_PROXY_URL}`)"
|
||||||
- "traefik.http.routers.fetcher.entrypoints=websecure"
|
- "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||||
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||||
- "traefik.http.services.fetcher.loadbalancer.server.port=8000"
|
- "traefik.http.services.fetcher.loadbalancer.server.port=8000"
|
||||||
@@ -101,12 +96,12 @@ services:
|
|||||||
# Set shared memory limit when using docker-compose
|
# Set shared memory limit when using docker-compose
|
||||||
shm_size: 128mb
|
shm_size: 128mb
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_DB: ${DB_NAME:-matitos}
|
POSTGRES_DB: ${DB_NAME}
|
||||||
POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
|
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||||
POSTGRES_USER: ${DB_USER:-supermatitos}
|
POSTGRES_USER: ${DB_USER}
|
||||||
POSTGRES_INITDB_ARGS: '--data-checksums'
|
POSTGRES_INITDB_ARGS: '--data-checksums'
|
||||||
volumes: # Persistent DB?
|
volumes: # Persistent DB?
|
||||||
- ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data
|
- ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
|
||||||
ports:
|
ports:
|
||||||
- 5432 #:5432
|
- 5432 #:5432
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user