Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
32d01a2cd6 |
38
.env
38
.env
@@ -1,23 +1,31 @@
|
||||
# Initialization
|
||||
INITIALIZE_DB=true
|
||||
DJANGO_SUPERUSER_USERNAME=matitos
|
||||
DJANGO_SUPERUSER_PASSWORD=matitos
|
||||
DJANGO_SUPERUSER_EMAIL=matitos@matitos.org
|
||||
# Reverse proxy
|
||||
TRAEFIK_MAIL=yourmail@protonmail.com
|
||||
DUCKDNS_TOKEN=
|
||||
DUCKDNS_SUBDOMAINS=
|
||||
|
||||
# Reverse proxy
|
||||
REVERSE_PROXY_URL=sample.url.com
|
||||
OLLAMA_WEBUI_REVERSE_PROXY_URL=ollama.steep.duckdns.org
|
||||
OLLAMA_REVERSE_PROXY_URL=ollamamodel.steep.duckdns.org
|
||||
REVERSE_PROXY_URL=fetcher.steep.duckdns.org
|
||||
DJANGO_ALLOWED_ORIGINS=https://fetcher.steep.duckdns.org # Reverse proxy
|
||||
|
||||
|
||||
# Initialization
|
||||
INITIALIZE_DB=true
|
||||
DJANGO_SUPERUSER_USERNAME=steep
|
||||
DJANGO_SUPERUSER_PASSWORD=steep
|
||||
DJANGO_SUPERUSER_EMAIL=steep@steepnews.org
|
||||
|
||||
# Django
|
||||
DJANGO_ALLOWED_ORIGINS=https://sample.url.com # Reverse proxy
|
||||
DJANGO_ALLOWED_HOSTS=* # host1,host2
|
||||
DJANGO_SECRET_KEY=EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkzN7dXVUsMSqy6a5rjY6WNCw3CcRH5
|
||||
DJANGO_SECRET_KEY=EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkqN7dXVUsMSqy6a5rjY6WNCw3CcRH5
|
||||
DJANGO_DEBUG=True
|
||||
PATH_LOGS_DIRECTORY=/opt/logs
|
||||
|
||||
# Database
|
||||
DB_NAME=matitos
|
||||
DB_PASSWORD=supermatitos
|
||||
DB_USER=supermatitos
|
||||
DB_NAME=steep
|
||||
DB_PASSWORD=supersteep
|
||||
DB_USER=supersteep
|
||||
PATH_DB_DATA=.
|
||||
|
||||
# Database: Django
|
||||
@@ -40,7 +48,7 @@ FETCHER_ERROR_URL_CACHE_TIME=172800
|
||||
|
||||
# Selenium
|
||||
SELENIUM_ENDPOINT=http://fetcher_app_selenium:80
|
||||
ENDPOINT_OLLAMA=https://ollamamodel.matitos.org
|
||||
ENDPOINT_OLLAMA=http://ollama:11434
|
||||
|
||||
# APP: Selenium
|
||||
ARCH=amd64 # arm64, amd64
|
||||
@@ -52,6 +60,6 @@ DEPLOY_CPUS=2
|
||||
DEPLOY_RAM=4G
|
||||
|
||||
# Ghost
|
||||
GHOST_ADMIN_API_URL=https://news.matitos.org/ghost/api/admin/
|
||||
GHOST_ADMIN_API_KEY=67fffe1b8a57a80001ecec5b:59f580020c196f92e05e208d288702082f8edad6366e2b2c8940b54e41cc355a
|
||||
PEXELS_API_KEY=Y6clJkY32eihf34ukX4JsINYu9lzxh3xDdNq2HMAmGwXp0a0tt6vr6S9
|
||||
GHOST_ADMIN_API_URL=
|
||||
GHOST_ADMIN_API_KEY=
|
||||
PEXELS_API_KEY=
|
||||
|
||||
@@ -1,34 +1,65 @@
|
||||
{
|
||||
"SEARCH": {
|
||||
"rss_feed": [
|
||||
"https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC",
|
||||
"https://feeds.feedburner.com/breitbart",
|
||||
"https://feeds.feedburner.com/zerohedge/feed",
|
||||
"https://moxie.foxnews.com/google-publisher/latest.xml",
|
||||
"https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15837362",
|
||||
"https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100727362"
|
||||
],
|
||||
"url_host": [
|
||||
"missingkids.org/poster",
|
||||
"missingkids.org/new-poster",
|
||||
"breitbart.com",
|
||||
"zerohedge.com",
|
||||
"foxnews.com",
|
||||
"cnbc.com"
|
||||
],
|
||||
"johnpilger.com",
|
||||
"lapenseeecologique.com",
|
||||
"partage-le.com",
|
||||
"reflets.info",
|
||||
"rezo.net",
|
||||
"consortiumnews.com",
|
||||
"disclose.ngo/fr",
|
||||
"energieetenvironnement.com",
|
||||
"global-climat.com",
|
||||
"slashdot.org",
|
||||
"lesamisdebartleby.wordpress.com",
|
||||
"lundi.am",
|
||||
"lvsl.fr",
|
||||
"moderndiplomacy.eu",
|
||||
"mrmondialisation.org",
|
||||
"ourfiniteworld.com",
|
||||
"southfront.org",
|
||||
"simplicius76.substack.com",
|
||||
"smoothiex12.blogspot.com",
|
||||
"theintercept.com",
|
||||
"wikileaks.org",
|
||||
"contretemps.eu",
|
||||
"indianpunchline.com",
|
||||
"investigaction.net/fr",
|
||||
"notechmagazine.com",
|
||||
"terrestres.org",
|
||||
"truthdig.com",
|
||||
"tass.com",
|
||||
"bastamag.net",
|
||||
"counterpunch.org",
|
||||
"energy-daily.com",
|
||||
"fakirpresse.info",
|
||||
"geopoliticalmonitor.com",
|
||||
"huffingtonpost.fr",
|
||||
"legrandsoir.info",
|
||||
"les-crises.fr",
|
||||
"liberation.fr",
|
||||
"maitre-eolas.fr",
|
||||
"marianne.net",
|
||||
"mediapart.fr",
|
||||
"metaefficient.com",
|
||||
"monde-diplomatique.fr",
|
||||
"paulcraigroberts.org",
|
||||
"politis.fr",
|
||||
"reporterre.net",
|
||||
"rue89.com",
|
||||
"theguardian.com/international",
|
||||
"treehugger.com",
|
||||
"unz.com",
|
||||
"voltairenet.org",
|
||||
"wsws.org"
|
||||
],
|
||||
"keyword_search": [
|
||||
"child abuse"
|
||||
"society collapse"
|
||||
]
|
||||
},
|
||||
"REGEX_PATTERN_STATUS_PRIORITY": [
|
||||
[".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50],
|
||||
[".*cnbc\\.com\\/(video|quotes)\\/.*", "invalid", 75],
|
||||
[".*foxnews\\.com\\/(video|category)\\/.*", "invalid", 75],
|
||||
[".*radio.foxnews\\.com\\/.*", "invalid", 75],
|
||||
[".*breitbart\\.com\\/(tag|author)\\/.*", "invalid", 75],
|
||||
[".*zerohedge\\.com\\/(user)\\/.*", "invalid", 75],
|
||||
[".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50],
|
||||
[".*breitbart\\.com\\/(economy|entertainment|border|crime|clips)\\/.*", "valid", 50],
|
||||
[".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50]
|
||||
[".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50]
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,65 +0,0 @@
|
||||
{
|
||||
"SEARCH": {
|
||||
"rss_feed": [
|
||||
],
|
||||
"url_host": [
|
||||
"johnpilger.com",
|
||||
"lapenseeecologique.com",
|
||||
"partage-le.com",
|
||||
"reflets.info",
|
||||
"rezo.net",
|
||||
"consortiumnews.com",
|
||||
"disclose.ngo/fr",
|
||||
"energieetenvironnement.com",
|
||||
"global-climat.com",
|
||||
"slashdot.org",
|
||||
"lesamisdebartleby.wordpress.com",
|
||||
"lundi.am",
|
||||
"lvsl.fr",
|
||||
"moderndiplomacy.eu",
|
||||
"mrmondialisation.org",
|
||||
"ourfiniteworld.com",
|
||||
"southfront.org",
|
||||
"simplicius76.substack.com",
|
||||
"smoothiex12.blogspot.com",
|
||||
"theintercept.com",
|
||||
"wikileaks.org",
|
||||
"contretemps.eu",
|
||||
"indianpunchline.com",
|
||||
"investigaction.net/fr",
|
||||
"notechmagazine.com",
|
||||
"terrestres.org",
|
||||
"truthdig.com",
|
||||
"tass.com",
|
||||
"bastamag.net",
|
||||
"counterpunch.org",
|
||||
"energy-daily.com",
|
||||
"fakirpresse.info",
|
||||
"geopoliticalmonitor.com",
|
||||
"huffingtonpost.fr",
|
||||
"legrandsoir.info",
|
||||
"les-crises.fr",
|
||||
"liberation.fr",
|
||||
"maitre-eolas.fr",
|
||||
"marianne.net",
|
||||
"mediapart.fr",
|
||||
"metaefficient.com",
|
||||
"monde-diplomatique.fr",
|
||||
"paulcraigroberts.org",
|
||||
"politis.fr",
|
||||
"reporterre.net",
|
||||
"rue89.com",
|
||||
"theguardian.com/international",
|
||||
"treehugger.com",
|
||||
"unz.com",
|
||||
"voltairenet.org",
|
||||
"wsws.org"
|
||||
],
|
||||
"keyword_search": [
|
||||
"society collapse"
|
||||
]
|
||||
},
|
||||
"REGEX_PATTERN_STATUS_PRIORITY": [
|
||||
[".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50]
|
||||
]
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
{
|
||||
"SEARCH": {
|
||||
"rss_feed": [
|
||||
"https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC",
|
||||
"https://feeds.feedburner.com/breitbart",
|
||||
"https://feeds.feedburner.com/zerohedge/feed",
|
||||
"https://moxie.foxnews.com/google-publisher/latest.xml",
|
||||
"https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15837362",
|
||||
"https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100727362"
|
||||
],
|
||||
"url_host": [
|
||||
"missingkids.org/poster",
|
||||
"missingkids.org/new-poster",
|
||||
"breitbart.com",
|
||||
"zerohedge.com",
|
||||
"foxnews.com",
|
||||
"cnbc.com"
|
||||
],
|
||||
"keyword_search": [
|
||||
"child abuse"
|
||||
]
|
||||
},
|
||||
"REGEX_PATTERN_STATUS_PRIORITY": [
|
||||
[".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50],
|
||||
[".*cnbc\\.com\\/(video|quotes)\\/.*", "invalid", 75],
|
||||
[".*foxnews\\.com\\/(video|category)\\/.*", "invalid", 75],
|
||||
[".*radio.foxnews\\.com\\/.*", "invalid", 75],
|
||||
[".*breitbart\\.com\\/(tag|author)\\/.*", "invalid", 75],
|
||||
[".*zerohedge\\.com\\/(user)\\/.*", "invalid", 75],
|
||||
[".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50],
|
||||
[".*breitbart\\.com\\/(economy|entertainment|border|crime|clips)\\/.*", "valid", 50],
|
||||
[".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50]
|
||||
]
|
||||
}
|
||||
123
docker-compose-prod.yml
Normal file
123
docker-compose-prod.yml
Normal file
@@ -0,0 +1,123 @@
|
||||
version: '3.9'
|
||||
|
||||
services:
|
||||
|
||||
fetcher_app_selenium:
|
||||
image: fetcher_app_selenium
|
||||
build:
|
||||
context: ./app_selenium
|
||||
args:
|
||||
- ARCH=${ARCH} # arm64, amd64
|
||||
container_name: fetcher_app_selenium
|
||||
restart: unless-stopped
|
||||
shm_size: 512mb
|
||||
environment:
|
||||
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
ports:
|
||||
- 80
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_app_urls:
|
||||
image: fetcher_app_urls
|
||||
build:
|
||||
context: ./app_urls
|
||||
container_name: fetcher_app_urls
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
# Initialization
|
||||
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
|
||||
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
|
||||
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
|
||||
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
|
||||
# Django
|
||||
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
|
||||
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
|
||||
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
|
||||
- DJANGO_DEBUG=${DJANGO_DEBUG}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
# Database
|
||||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
- REDIS_HOST=${REDIS_HOST}
|
||||
- REDIS_PORT=${REDIS_PORT}
|
||||
# Job timeout: 30 min
|
||||
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
|
||||
# Fetcher
|
||||
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
|
||||
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
|
||||
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP} # Sleep time between each search
|
||||
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP} # Sleep time between requests to same URL host
|
||||
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR} # Min amonut of characters to run language detection
|
||||
- FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
|
||||
- FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
|
||||
# Selenium
|
||||
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
|
||||
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
|
||||
# Ghost
|
||||
- GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
|
||||
- GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
|
||||
- PEXELS_API_KEY=${PEXELS_API_KEY}
|
||||
########################
|
||||
#volumes: # Development mode
|
||||
# - ./app_urls:/opt/app
|
||||
########################
|
||||
ports:
|
||||
- 8000 # :8000
|
||||
depends_on:
|
||||
- fetcher_db
|
||||
- fetcher_redis
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
labels: # Reverse proxy sample
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.fetcher.rule=Host(`${REVERSE_PROXY_URL}`)"
|
||||
- "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||
- "traefik.http.services.fetcher.loadbalancer.server.port=8000"
|
||||
networks:
|
||||
- default # This network
|
||||
- docker_default # Reverse proxy network
|
||||
|
||||
fetcher_db:
|
||||
image: postgres:17
|
||||
container_name: fetcher_db
|
||||
restart: unless-stopped
|
||||
# Set shared memory limit when using docker-compose
|
||||
shm_size: 128mb
|
||||
environment:
|
||||
POSTGRES_DB: ${DB_NAME}
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||
POSTGRES_USER: ${DB_USER}
|
||||
POSTGRES_INITDB_ARGS: '--data-checksums'
|
||||
volumes: # Persistent DB?
|
||||
- ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
|
||||
ports:
|
||||
- 5432 #:5432
|
||||
|
||||
fetcher_redis:
|
||||
image: redis:alpine
|
||||
container_name: fetcher_redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 6379 #:6379
|
||||
|
||||
networks:
|
||||
docker_default:
|
||||
external: true
|
||||
@@ -2,28 +2,126 @@ version: '3.9'
|
||||
|
||||
services:
|
||||
|
||||
fetcher_app_selenium:
|
||||
image: fetcher_app_selenium
|
||||
build:
|
||||
context: ./app_selenium
|
||||
args:
|
||||
- ARCH=${ARCH} # arm64, amd64
|
||||
container_name: fetcher_app_selenium
|
||||
duckdns:
|
||||
image: lscr.io/linuxserver/duckdns:latest
|
||||
container_name: duckdns
|
||||
restart: unless-stopped
|
||||
shm_size: 512mb
|
||||
environment:
|
||||
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
- PUID=1000 #optional
|
||||
- PGID=1000 #optional
|
||||
- TZ=Europe/London
|
||||
- SUBDOMAINS=${DUCKDNS_SUBDOMAINS}
|
||||
- TOKEN=${DUCKDNS_TOKEN}
|
||||
- LOG_FILE=true #optional
|
||||
volumes:
|
||||
- ${PATH_DB_DATA}/duckdns_config:/config
|
||||
|
||||
traefik:
|
||||
image: "traefik:v3.3"
|
||||
container_name: "traefik"
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- "--api.insecure=true"
|
||||
- "--providers.docker=true"
|
||||
- "--providers.docker.exposedbydefault=false"
|
||||
# Logs for fail2ban
|
||||
- "--log.level=INFO"
|
||||
- "--accesslog=true"
|
||||
- "--accesslog.filepath=/var/log/traefik/access.log"
|
||||
# HTTPS
|
||||
- "--entrypoints.websecure.address=:443"
|
||||
# HTTPS -> Timeouts
|
||||
- "--entrypoints.websecure.transport.respondingTimeouts.readTimeout=1200s"
|
||||
- "--entrypoints.websecure.transport.respondingTimeouts.idleTimeout=1200s"
|
||||
- "--entrypoints.websecure.transport.respondingTimeouts.writeTimeout=1200s"
|
||||
# HTTP -> HTTPS
|
||||
- "--entryPoints.web.address=:80"
|
||||
- "--entrypoints.web.http.redirections.entryPoint.to=websecure"
|
||||
# Let's Encrypt
|
||||
- "--certificatesresolvers.myresolver.acme.email=${TRAEFIK_MAIL}"
|
||||
- "--certificatesresolvers.myresolver.acme.storage=/letsencrypt/acme.json"
|
||||
# TLS challenge to request new certificate
|
||||
- "--certificatesresolvers.myresolver.acme.tlschallenge=true"
|
||||
ports:
|
||||
- 80
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- "${PATH_DB_DATA}/letsencrypt:/letsencrypt"
|
||||
- "${PATH_DB_DATA}/traefik_logs:/var/log/traefik"
|
||||
- "/var/run/docker.sock:/var/run/docker.sock:ro"
|
||||
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
container_name: ollama
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- '11434:11434'
|
||||
volumes:
|
||||
- ${PATH_DB_DATA}/ollama:/root/.ollama
|
||||
#deploy:
|
||||
# resources:
|
||||
# limits:
|
||||
# memory: 6G
|
||||
# cpus: 6 # 80% for 8 cores
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.ollama-secure.rule=Host(`${OLLAMA_REVERSE_PROXY_URL}`)"
|
||||
- "traefik.http.routers.ollama.entrypoints=websecure"
|
||||
- "traefik.http.routers.ollama-secure.tls.certresolver=myresolver"
|
||||
- "traefik.http.services.ollama.loadbalancer.server.port=11434"
|
||||
|
||||
ollama-webui:
|
||||
image: ghcr.io/ollama-webui/ollama-webui:main
|
||||
container_name: ollama-webui
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 8080:8080
|
||||
volumes:
|
||||
- ${PATH_DB_DATA}/ollama-webui:/app/backend/data
|
||||
depends_on:
|
||||
- ollama
|
||||
environment:
|
||||
- 'OLLAMA_API_BASE_URL=http://ollama:11434/api'
|
||||
- 'ENABLE_SIGNUP=false'
|
||||
#- 'ENABLE_RAG_WEB_SEARCH=true'
|
||||
#- 'RAG_WEB_SEARCH_ENGINE=brave'
|
||||
#- 'ENABLE_IMAGE_GENERATION=true'
|
||||
#- 'IMAGE_GENERATION_ENGINE=comfyui'
|
||||
#- 'COMFYUI_BASE_URL=comfyui.matitos.org'
|
||||
#- 'COMFYUI_API_KEY='
|
||||
#- 'COMFYUI_WORKFLOW=' # https://docs.openwebui.com/getting-started/env-configuration#comfyui_workflow
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.ollamawebui-secure.rule=Host(`${OLLAMA_WEBUI_REVERSE_PROXY_URL}`)"
|
||||
- "traefik.http.routers.ollamawebui.entrypoints=websecure"
|
||||
- "traefik.http.routers.ollamawebui-secure.tls.certresolver=myresolver"
|
||||
- "traefik.http.services.ollamawebui.loadbalancer.server.port=8080"
|
||||
|
||||
|
||||
#fetcher_app_selenium:
|
||||
# image: fetcher_app_selenium
|
||||
# build:
|
||||
# context: ./app_selenium
|
||||
# args:
|
||||
# - ARCH=${ARCH} # arm64, amd64
|
||||
# container_name: fetcher_app_selenium
|
||||
# restart: unless-stopped
|
||||
# shm_size: 512mb
|
||||
# environment:
|
||||
# - SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||
# - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
# ports:
|
||||
# - 80
|
||||
# dns:
|
||||
# - 1.1.1.1
|
||||
# - 1.0.0.1
|
||||
# deploy:
|
||||
# resources:
|
||||
# limits:
|
||||
# cpus: '${DEPLOY_CPUS}'
|
||||
# memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_app_urls:
|
||||
image: fetcher_app_urls
|
||||
@@ -73,7 +171,7 @@ services:
|
||||
# - ./app_urls:/opt/app
|
||||
########################
|
||||
ports:
|
||||
- 8000 # :8000
|
||||
- 8000:8000
|
||||
depends_on:
|
||||
- fetcher_db
|
||||
- fetcher_redis
|
||||
@@ -89,11 +187,8 @@ services:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.fetcher.rule=Host(`${REVERSE_PROXY_URL}`)"
|
||||
- "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||
- "traefik.http.routers.fetcher.tls.certresolver=myresolver"
|
||||
- "traefik.http.services.fetcher.loadbalancer.server.port=8000"
|
||||
networks:
|
||||
- default # This network
|
||||
- docker_default # Reverse proxy network
|
||||
|
||||
fetcher_db:
|
||||
image: postgres:17
|
||||
@@ -117,7 +212,3 @@ services:
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 6379 #:6379
|
||||
|
||||
networks:
|
||||
docker_default:
|
||||
external: true
|
||||
|
||||
@@ -1,335 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from urllib.parse import urljoin\n",
|
||||
"import pandas as pd\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"headers = {\"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Function to decode Cloudflare-protected emails\n",
|
||||
"def decode_email(encoded_email):\n",
|
||||
" \"\"\"\n",
|
||||
" Decode an email protected by Cloudflare's email protection.\n",
|
||||
" :param encoded_email: The encoded email string from the data-cfemail attribute.\n",
|
||||
" :return: The decoded email address.\n",
|
||||
" \"\"\"\n",
|
||||
" email = \"\"\n",
|
||||
" key = int(encoded_email[:2], 16) # Extract the key (first two characters)\n",
|
||||
" for i in range(2, len(encoded_email), 2):\n",
|
||||
" # XOR each pair of hex characters with the key\n",
|
||||
" email += chr(int(encoded_email[i:i + 2], 16) ^ key)\n",
|
||||
" return email\n",
|
||||
"\n",
|
||||
"def extract_emails(soup):\n",
|
||||
" # Find all visible email links (mailto:)\n",
|
||||
" visible_emails = []\n",
|
||||
" for link in soup.find_all('a', href=lambda href: href and href.startswith('mailto:')):\n",
|
||||
" email = link['href'].replace('mailto:', '')\n",
|
||||
" visible_emails.append(email)\n",
|
||||
"\n",
|
||||
" # Find all Cloudflare-protected emails\n",
|
||||
" protected_emails = []\n",
|
||||
" for span in soup.find_all('span', class_='__cf_email__', attrs={'data-cfemail': True}):\n",
|
||||
" encoded_email = span['data-cfemail']\n",
|
||||
" decoded_email = decode_email(encoded_email)\n",
|
||||
" protected_emails.append(decoded_email)\n",
|
||||
"\n",
|
||||
" # Combine all emails\n",
|
||||
" all_emails = visible_emails + protected_emails\n",
|
||||
" all_emails = list(set(all_emails))\n",
|
||||
" if (len(all_emails) == 0):\n",
|
||||
" return None\n",
|
||||
" elif (len(all_emails) == 1):\n",
|
||||
" return all_emails[0]\n",
|
||||
" else:\n",
|
||||
" return all_emails\n",
|
||||
"\n",
|
||||
"def find_website(soup_school):\n",
|
||||
" # Find all <a> tags with href attributes\n",
|
||||
" for link in soup_school.find(class_=\"dl-horizontal dl-icons\").find_all('a', href=True):\n",
|
||||
" href = link['href']\n",
|
||||
" # Filter out only valid URLs (e.g., starting with http or https)\n",
|
||||
" if href.startswith(('http://', 'https://')):\n",
|
||||
" # websites.append(href)\n",
|
||||
" return href\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def main():\n",
|
||||
" list_urls = [\n",
|
||||
" \"https://scholenopdekaart.nl/Basisscholen/\",\n",
|
||||
" \"https://scholenopdekaart.nl/middelbare-scholen/\"\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" list_school_data_dicts = []\n",
|
||||
"\n",
|
||||
" # For each category\n",
|
||||
" for url in list_urls:\n",
|
||||
" # Fetch the HTML content of the page\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
||||
" # Parse the HTML content using BeautifulSoup\n",
|
||||
" soup = BeautifulSoup(response.text, 'html.parser')\n",
|
||||
"\n",
|
||||
" # Get category\n",
|
||||
" category = url.strip(\"/\").split(\"/\")[-1].lower()\n",
|
||||
"\n",
|
||||
" # Find all <a> tags with href attributes\n",
|
||||
" links_areas = []\n",
|
||||
" for a_tag in soup.find_all('a', href=True):\n",
|
||||
" href = a_tag['href']\n",
|
||||
" \n",
|
||||
" if (category not in href):\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" # Convert relative URLs to absolute URLs\n",
|
||||
" area_full_url = urljoin(url, href)\n",
|
||||
" links_areas.append(area_full_url)\n",
|
||||
"\n",
|
||||
" # Area\n",
|
||||
" area = href.rstrip(\"/\").split(\"/\")[-1]\n",
|
||||
"\n",
|
||||
" ###############################################\n",
|
||||
" # Fetch the HTML content of the page\n",
|
||||
" print(\".\", end=\"\")\n",
|
||||
" response = requests.get(area_full_url, headers=headers)\n",
|
||||
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
||||
"\n",
|
||||
" # Parse the HTML content using BeautifulSoup\n",
|
||||
" soup_area= BeautifulSoup(response.text, 'html.parser')\n",
|
||||
"\n",
|
||||
" # Get schools in area\n",
|
||||
" for a_tag in soup_area.find_all('a', href=True):\n",
|
||||
" href = a_tag['href']\n",
|
||||
"\n",
|
||||
" school_url = urljoin(url, href)\n",
|
||||
" if (area_full_url not in school_url):\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" school_name = a_tag.text.rstrip(\".\")\n",
|
||||
" school_data = {\n",
|
||||
" \"category\": category,\n",
|
||||
" \"area\": area,\n",
|
||||
" \"name\": school_name,\n",
|
||||
" \"url\": school_url,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Process school (request contact details)\n",
|
||||
" response = requests.get(os.path.join(school_url, \"contact/#inhoud\"), headers=headers)\n",
|
||||
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
||||
"\n",
|
||||
" # Parse the HTML content using BeautifulSoup\n",
|
||||
" soup_school = BeautifulSoup(response.text, 'html.parser')\n",
|
||||
"\n",
|
||||
" # School details\n",
|
||||
" school_details = soup_school.find(class_=\"school-details\")\n",
|
||||
" for category_idx, li_detail in enumerate(school_details.find_all(\"li\")):\n",
|
||||
" data = li_detail.find('span', class_='infotip-term')['data-dfn']\n",
|
||||
" text = li_detail.get_text(strip=True)\n",
|
||||
" # Set data\n",
|
||||
" school_data[\"category_{}\".format(category_idx)] = text\n",
|
||||
" school_data[\"category_{}_description\".format(category_idx)] = data\n",
|
||||
" \n",
|
||||
" school_address = soup_school.find(class_=\"school-adres\").get_text(strip=True)\n",
|
||||
" school_postcode_city = soup_school.find(class_=\"school-postcode-woonplaats\").get_text(strip=True)\n",
|
||||
" school_postcode = \"\".join(school_postcode_city.split(\" \")[:2])\n",
|
||||
" school_city = \" \".join(school_postcode_city.split(\" \")[2:])\n",
|
||||
"\n",
|
||||
" school_data[\"city\"] = school_city\n",
|
||||
" school_data[\"postcode\"] = school_postcode\n",
|
||||
" school_data[\"address\"] = school_address\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" school_data[\"website\"] = find_website(soup_school) # soup_school.find(class_=\"button button-primary\").get('href')\n",
|
||||
" except Exception as e:\n",
|
||||
" pass\n",
|
||||
" try:\n",
|
||||
" school_data[\"phone\"] = soup_school.find('a', href=lambda href: href and href.startswith('tel:')).text\n",
|
||||
" except Exception as e:\n",
|
||||
" pass\n",
|
||||
" try:\n",
|
||||
" school_data[\"email\"] = extract_emails(soup_school)\n",
|
||||
" except Exception as e:\n",
|
||||
" pass\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(school_url, str(e))\n",
|
||||
" # assert False\n",
|
||||
"\n",
|
||||
" list_school_data_dicts.append(school_data)\n",
|
||||
"\n",
|
||||
" df = pd.DataFrame(list_school_data_dicts)\n",
|
||||
" df.to_csv(\"scholenopdekaart.csv\")\n",
|
||||
"\n",
|
||||
"\"\"\" # Issues with URL:\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/grave/1900/merletcollege-grave/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/lent/4386/citadel-college-locatie-griftdijk/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/24527/montessori-college-k33-nijmegen/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/26368/aventurijn-park-neerbosch/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/26187/kandinsky-college-voor-lyceum-havo-mavo-vbo-lwoo/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/1791/karel-de-grote-college/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/2040/mondial-college-locatie-leuvensbroek/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/2041/mondial-college-meeuwse-acker/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/2036/stedelijk-gymnasium-nijmegen/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/2038/stedelijke-scholengemeenschap-nijmegen/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/nijmegen/26184/yuverta-vmbo-het-groene-lyceum-nijmegen/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/23719/het-hooghuis-locatie-mondriaan-college/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/943/het-hooghuis-locatie-oss-stadion/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/947/het-hooghuis-zuidwest-gebouw-west/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/946/het-hooghuis-zuidwest-gebouw-zuid/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/1929/het-maaslandcollege-scholengemeenschap-voor-tweetalig-mavo-havo-vwo/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/25783/sonnewijser-unit-route-arbeid/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/11432/sonnewijser-unit-vervolgonderwijs-oss/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/oss/942/titus-brandsmalyceum/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/velp-noord-brabant/24545/merletcollege-eerste-opvang-anderstaligen-eoa/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/wijchen/2018/maaswaal-college-havo-atheneum-gymnasium/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/wijchen/2020/maaswaal-college-vmbo-basis-kader-mavo/\n",
|
||||
"https://scholenopdekaart.nl/middelbare-scholen/wijchen/1781/pro-college-wijchen/\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" main()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"school_url = \"https://scholenopdekaart.nl/basisscholen/aalden/9661/christelijke-basisschool-de-schutse/\"\n",
|
||||
"response = requests.get(os.path.join(school_url, \"contact/#inhoud\"), headers=headers)\n",
|
||||
"# Parse the HTML content using BeautifulSoup\n",
|
||||
"soup_school = BeautifulSoup(response.text, 'html.parser')\n",
|
||||
"soup_school\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"scholenopdekaart.csv\", index_col=0)\n",
|
||||
"df.loc[0, \"category_3\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"\n",
|
||||
"# Step 1: Fetch the webpage\n",
|
||||
"url = \"https://scholenopdekaart.nl/basisscholen/aagtekerke/25963/jhr-willem-versluijsschool/\"\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"response = requests.get(url, headers=headers)\n",
|
||||
"\n",
|
||||
"# Check if the request was successful\n",
|
||||
"if response.status_code != 200:\n",
|
||||
" print(f\"Failed to retrieve the page. Status code: {response.status_code}\")\n",
|
||||
" exit()\n",
|
||||
"\n",
|
||||
"# Step 2: Parse the HTML content\n",
|
||||
"soup = BeautifulSoup(response.text, 'html.parser')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Aantal per Leerjaar:\n",
|
||||
"Groep 1: 29 leerlingen\n",
|
||||
"Groep 2: 28 leerlingen\n",
|
||||
"Groep 3: 30 leerlingen\n",
|
||||
"Groep 4: 25 leerlingen\n",
|
||||
"Groep 5: 19 leerlingen\n",
|
||||
"Groep 6: 26 leerlingen\n",
|
||||
"Groep 7: 22 leerlingen\n",
|
||||
"Groep 8: 20 leerlingen\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Step 1: Locate the <aantal-leerlingen-leerjaar-bar-chart> tag\n",
|
||||
"chart_tag = soup.find('aantal-leerlingen-leerjaar-bar-chart', attrs={'aantal-per-leerjaar': True})\n",
|
||||
"\n",
|
||||
"if not chart_tag:\n",
|
||||
" print(\"Could not find the 'aantal per leerjaar' section.\")\n",
|
||||
"else:\n",
|
||||
" # Step 2: Extract the 'aantal-per-leerjaar' attribute\n",
|
||||
" raw_data = chart_tag['aantal-per-leerjaar']\n",
|
||||
" \n",
|
||||
" # Step 3: Parse the JSON data\n",
|
||||
" try:\n",
|
||||
" data = json.loads(raw_data)\n",
|
||||
" \n",
|
||||
" # Step 4: Print the extracted data\n",
|
||||
" print(\"Aantal per Leerjaar:\")\n",
|
||||
" for entry in data:\n",
|
||||
" print(f\"Groep {entry['key']}: {entry['aantal']} leerlingen\")\n",
|
||||
" except json.JSONDecodeError as e:\n",
|
||||
" print(f\"Failed to parse JSON data: {e}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "matitos_urls",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
<clickhouse>
|
||||
<listen_host>0.0.0.0</listen_host>
|
||||
</clickhouse>
|
||||
@@ -1,28 +0,0 @@
|
||||
<clickhouse>
|
||||
<logger>
|
||||
<level>warning</level>
|
||||
<console>true</console>
|
||||
</logger>
|
||||
|
||||
<query_log replace="1">
|
||||
<database>system</database>
|
||||
<table>query_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<engine>
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY event_date
|
||||
ORDER BY (event_time)
|
||||
TTL event_date + interval 30 day
|
||||
SETTINGS ttl_only_drop_parts=1
|
||||
</engine>
|
||||
</query_log>
|
||||
|
||||
<!-- Stops unnecessary logging -->
|
||||
<metric_log remove="remove" />
|
||||
<asynchronous_metric_log remove="remove" />
|
||||
<query_thread_log remove="remove" />
|
||||
<text_log remove="remove" />
|
||||
<trace_log remove="remove" />
|
||||
<session_log remove="remove" />
|
||||
<part_log remove="remove" />
|
||||
</clickhouse>
|
||||
@@ -1,23 +0,0 @@
|
||||
<!-- https://clickhouse.com/docs/en/operations/tips#using-less-than-16gb-of-ram -->
|
||||
<clickhouse>
|
||||
<!--
|
||||
https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#mark_cache_size -->
|
||||
<mark_cache_size>524288000</mark_cache_size>
|
||||
|
||||
<profile>
|
||||
<default>
|
||||
<!-- https://clickhouse.com/docs/en/operations/settings/settings#max_threads -->
|
||||
<max_threads>1</max_threads>
|
||||
<!-- https://clickhouse.com/docs/en/operations/settings/settings#max_block_size -->
|
||||
<max_block_size>8192</max_block_size>
|
||||
<!-- https://clickhouse.com/docs/en/operations/settings/settings#max_download_threads -->
|
||||
<max_download_threads>1</max_download_threads>
|
||||
<!--
|
||||
https://clickhouse.com/docs/en/operations/settings/settings#input_format_parallel_parsing -->
|
||||
<input_format_parallel_parsing>0</input_format_parallel_parsing>
|
||||
<!--
|
||||
https://clickhouse.com/docs/en/operations/settings/settings#output_format_parallel_formatting -->
|
||||
<output_format_parallel_formatting>0</output_format_parallel_formatting>
|
||||
</default>
|
||||
</profile>
|
||||
</clickhouse>
|
||||
@@ -1,147 +0,0 @@
|
||||
services:
|
||||
|
||||
ghost:
|
||||
image: ghost:5-alpine
|
||||
container_name: ghost
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 2368 #- 8080:2368
|
||||
environment:
|
||||
# see https://ghost.org/docs/config/#configuration-options
|
||||
database__client: mysql
|
||||
database__connection__host: ghost_db
|
||||
database__connection__user: root
|
||||
database__connection__password: example
|
||||
database__connection__database: ghost
|
||||
url: https://news.matitos.org
|
||||
# contrary to the default mentioned in the linked documentation, this image defaults to NODE_ENV=production (so development mode needs to be explicitly specified if desired)
|
||||
#NODE_ENV: development
|
||||
volumes:
|
||||
- ./docker_data/ghost:/var/lib/ghost/content
|
||||
labels: # Reverse proxy sample
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.news.rule=Host(`news.matitos.org`)"
|
||||
- "traefik.http.routers.news.entrypoints=websecure"
|
||||
- "traefik.http.routers.news.tls.certresolver=myresolvercd"
|
||||
- "traefik.http.services.news.loadbalancer.server.port=2368"
|
||||
networks:
|
||||
- default # This network
|
||||
- docker_default # Reverse proxy network
|
||||
|
||||
ghost_db:
|
||||
image: mysql:8.0
|
||||
container_name: ghost_db
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: example
|
||||
volumes:
|
||||
- ./docker_data/ghost_db:/var/lib/mysql
|
||||
|
||||
plausible_db:
|
||||
image: postgres:16-alpine
|
||||
restart: unless-stopped
|
||||
container_name: plausible_db
|
||||
volumes:
|
||||
- ./docker_data/plausible_db_data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- POSTGRES_PASSWORD=postgres
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
start_period: 1m
|
||||
|
||||
plausible_events_db:
|
||||
image: clickhouse/clickhouse-server:24.12-alpine
|
||||
restart: unless-stopped
|
||||
container_name: plausible_events_db
|
||||
volumes:
|
||||
- ./docker_data/event-data:/var/lib/clickhouse
|
||||
- ./docker_data/event-logs:/var/log/clickhouse-server
|
||||
- ./clickhouse/logs.xml:/etc/clickhouse-server/config.d/logs.xml:ro
|
||||
# This makes ClickHouse bind to IPv4 only, since Docker doesn't enable IPv6 in bridge networks by default.
|
||||
# Fixes "Listen [::]:9000 failed: Address family for hostname not supported" warnings.
|
||||
- ./clickhouse/ipv4-only.xml:/etc/clickhouse-server/config.d/ipv4-only.xml:ro
|
||||
# This makes ClickHouse consume less resources, which is useful for small setups.
|
||||
# https://clickhouse.com/docs/en/operations/tips#using-less-than-16gb-of-ram
|
||||
- ./clickhouse/low-resources.xml:/etc/clickhouse-server/config.d/low-resources.xml:ro
|
||||
ulimits:
|
||||
nofile:
|
||||
soft: 262144
|
||||
hard: 262144
|
||||
environment:
|
||||
- CLICKHOUSE_SKIP_USER_SETUP=1
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget --no-verbose --tries=1 -O - http://127.0.0.1:8123/ping || exit 1"]
|
||||
start_period: 1m
|
||||
|
||||
plausible:
|
||||
image: ghcr.io/plausible/community-edition:v3.0.1
|
||||
restart: unless-stopped
|
||||
container_name: plausible
|
||||
command: sh -c "/entrypoint.sh db createdb && /entrypoint.sh db migrate && /entrypoint.sh run"
|
||||
depends_on:
|
||||
plausible_db:
|
||||
condition: service_healthy
|
||||
plausible_events_db:
|
||||
condition: service_healthy
|
||||
#volumes:
|
||||
# - ./docker_data/plausible_data:/var/lib/plausible # https://github.com/plausible/community-edition/issues/163
|
||||
ulimits:
|
||||
nofile:
|
||||
soft: 65535
|
||||
hard: 65535
|
||||
ports:
|
||||
- 8000 # :8000
|
||||
environment:
|
||||
- TMPDIR=/var/lib/plausible/tmp
|
||||
# required: https://github.com/plausible/community-edition/wiki/configuration#required
|
||||
#- BASE_URL=${BASE_URL}
|
||||
#- SECRET_KEY_BASE=${SECRET_KEY_BASE}
|
||||
- BASE_URL=https://plausible.matitos.org
|
||||
- SECRET_KEY_BASE=KKfwEjeK3Xp6NdH7eCJ2szWliTueiB0vcCT4XpHvEE8ZHgvRg0Vle90wOrETQZoC
|
||||
# optional: https://github.com/plausible/community-edition/wiki/configuration#optional
|
||||
# registration: https://github.com/plausible/community-edition/wiki/configuration#registration
|
||||
- TOTP_VAULT_KEY
|
||||
- DISABLE_REGISTRATION
|
||||
- ENABLE_EMAIL_VERIFICATION
|
||||
# web: https://github.com/plausible/community-edition/wiki/configuration#web
|
||||
- HTTP_PORT=8000
|
||||
- HTTPS_PORT
|
||||
# databases: https://github.com/plausible/community-edition/wiki/configuration#database
|
||||
- DATABASE_URL
|
||||
- CLICKHOUSE_DATABASE_URL
|
||||
# Google: https://github.com/plausible/community-edition/wiki/configuration#google
|
||||
- GOOGLE_CLIENT_ID
|
||||
- GOOGLE_CLIENT_SECRET
|
||||
# geolocation: https://github.com/plausible/community-edition/wiki/configuration#ip-geolocation
|
||||
- IP_GEOLOCATION_DB
|
||||
- GEONAMES_SOURCE_FILE
|
||||
- MAXMIND_LICENSE_KEY
|
||||
- MAXMIND_EDITION
|
||||
# email: https://github.com/plausible/community-edition/wiki/configuration#email
|
||||
- MAILER_ADAPTER
|
||||
- MAILER_EMAIL
|
||||
- MAILER_NAME
|
||||
- SMTP_HOST_ADDR
|
||||
- SMTP_HOST_PORT
|
||||
- SMTP_USER_NAME
|
||||
- SMTP_USER_PWD
|
||||
- SMTP_HOST_SSL_ENABLED
|
||||
- POSTMARK_API_KEY
|
||||
- MAILGUN_API_KEY
|
||||
- MAILGUN_DOMAIN
|
||||
- MAILGUN_BASE_URI
|
||||
- MANDRILL_API_KEY
|
||||
- SENDGRID_API_KEY
|
||||
labels: # Reverse proxy sample
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.plausible.rule=Host(`plausible.matitos.org`)"
|
||||
- "traefik.http.routers.plausible.entrypoints=websecure"
|
||||
- "traefik.http.routers.plausible.tls.certresolver=myresolvercd"
|
||||
- "traefik.http.services.plausible.loadbalancer.server.port=8000"
|
||||
networks:
|
||||
- default # This network
|
||||
- docker_default # Reverse proxy network
|
||||
|
||||
networks:
|
||||
docker_default:
|
||||
external: true
|
||||
Reference in New Issue
Block a user