From c7cdc559f2bd56c4c34f23ba98facda8043e86e4 Mon Sep 17 00:00:00 2001 From: Luciano Gervasoni Date: Fri, 4 Apr 2025 21:13:52 +0200 Subject: [PATCH] Compose dev mode, shuffle searches, reordering url filters --- app_urls/fetcher/src/fetch_search.py | 3 +- app_urls/fetcher/templates/filtered_urls.html | 42 +++---- docker-compose-dev.yml | 115 ++++++++++++++++++ docker-compose.yml | 24 ++-- 4 files changed, 150 insertions(+), 34 deletions(-) create mode 100644 docker-compose-dev.yml diff --git a/app_urls/fetcher/src/fetch_search.py b/app_urls/fetcher/src/fetch_search.py index 728e165..cc28d68 100644 --- a/app_urls/fetcher/src/fetch_search.py +++ b/app_urls/fetcher/src/fetch_search.py @@ -4,6 +4,7 @@ from django.db.models import Q import traceback import time import os +import random from .fetch_search_instances import ListSearchInstances from .logger import get_logger logger = get_logger() @@ -21,7 +22,7 @@ class FetchSearcher(): logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj])) # Search - for obj_search in list_search_obj: + for obj_search in random.shuffle(list(list_search_obj)): # TODO: language & country customization # Search diff --git a/app_urls/fetcher/templates/filtered_urls.html b/app_urls/fetcher/templates/filtered_urls.html index 040a32b..3cb86fb 100644 --- a/app_urls/fetcher/templates/filtered_urls.html +++ b/app_urls/fetcher/templates/filtered_urls.html @@ -298,7 +298,7 @@ input[type="checkbox"] {
{% endfor %} - +

Valid content


{% for vc in valid_contents %} @@ -309,17 +309,23 @@ input[type="checkbox"] {
{% endfor %} - -

Search

-
- {% for search in searches %} + +

Language

+
+ {% for lang in languages %}
{% endfor %} + +

Min #Sources

+
+ +
+

Source


@@ -331,20 +337,14 @@ input[type="checkbox"] {
{% endfor %} - -

Min #Sources

-
- -
- - -

Language

-
- {% for lang in languages %} + +

Search

+
+ {% for search in searches %}
{% endfor %} diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml new file mode 100644 index 0000000..12c8bce --- /dev/null +++ b/docker-compose-dev.yml @@ -0,0 +1,115 @@ +version: '3.9' + +# docker compose -f docker-compose-dev.yml down -v; docker compose -f docker-compose-dev.yml up -d --build; docker logs fetcher_app_urls -f + +services: + + fetcher_app_selenium: + image: fetcher_app_selenium + build: + context: ./app_selenium + container_name: fetcher_app_selenium + restart: unless-stopped + shm_size: 512mb + environment: + - SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4} + - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-logs} + ports: + - 80 + dns: + - 1.1.1.1 + - 1.0.0.1 + deploy: + resources: + limits: + cpus: '4' + memory: 4G + + fetcher_app_urls: + image: fetcher_app_urls + build: + context: ./app_urls + container_name: fetcher_app_urls + restart: unless-stopped + environment: + # Initialization + - INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence + - DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos} + - DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos} + - DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org} + # Django + - DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2 + - DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy + - DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty} + - DJANGO_DEBUG=${DJANGO_DEBUG:-False} + - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs} + # Database + - DB_NAME=${DB_NAME:-matitos} + - DB_USER=${DB_USER:-supermatitos} + - DB_PASSWORD=${DB_PASSWORD:-supermatitos} + - DB_HOST=${DB_HOST:-fetcher_db} + - DB_PORT=${DB_PORT:-5432} + - REDIS_HOST=${REDIS_HOST:-fetcher_redis} + - REDIS_PORT=${REDIS_PORT:-6379} + # Job timeout: 30 min + - JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800} + # Fetcher + - FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2} + - FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5} + - FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1} + - FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-2} + # Selenium + - SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80} + - ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org} + ######################## + volumes: # Development mode + - ./app_urls:/opt/app + ######################## + ports: + - 8000:8000 + depends_on: + - fetcher_db + - fetcher_redis + dns: + - 1.1.1.1 + - 1.0.0.1 + deploy: + resources: + limits: + cpus: '4' + memory: 4G + #labels: # Reverse proxy sample + # - "traefik.enable=true" + # - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)" + # - "traefik.http.routers.fetcher.entrypoints=websecure" + # - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd" + #networks: + # - default # This network + # - docker_default # Reverse proxy network + + fetcher_db: + image: postgres:17 + container_name: fetcher_db + restart: unless-stopped + # Set shared memory limit when using docker-compose + shm_size: 128mb + environment: + POSTGRES_DB: ${DB_NAME:-matitos} + POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos} + POSTGRES_USER: ${DB_USER:-supermatitos} + POSTGRES_INITDB_ARGS: '--data-checksums' + volumes: # Persistent DB? + - ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data + ports: + - 5432 #:5432 + + fetcher_redis: + image: redis:alpine + container_name: fetcher_redis + restart: unless-stopped + ports: + - 6379 #:6379 + +#networks: +# docker_default: +# external: true diff --git a/docker-compose.yml b/docker-compose.yml index 60e077f..888d6e1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,7 +64,7 @@ services: # - ./app_urls:/opt/app ######################## ports: - - 8000:8000 + - 8000 # :8000 depends_on: - fetcher_db - fetcher_redis @@ -76,14 +76,14 @@ services: limits: cpus: '4' memory: 4G - #labels: # Reverse proxy sample - # - "traefik.enable=true" - # - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)" - # - "traefik.http.routers.fetcher.entrypoints=websecure" - # - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd" - #networks: - # - default # This network - # - docker_default # Reverse proxy network + labels: # Reverse proxy sample + - "traefik.enable=true" + - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)" + - "traefik.http.routers.fetcher.entrypoints=websecure" + - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd" + networks: + - default # This network + - docker_default # Reverse proxy network fetcher_db: image: postgres:17 @@ -108,6 +108,6 @@ services: ports: - 6379 #:6379 -#networks: -# docker_default: -# external: true +networks: + docker_default: + external: true