diff --git a/app_urls/fetcher/src/fetch_search.py b/app_urls/fetcher/src/fetch_search.py
index 728e165..cc28d68 100644
--- a/app_urls/fetcher/src/fetch_search.py
+++ b/app_urls/fetcher/src/fetch_search.py
@@ -4,6 +4,7 @@ from django.db.models import Q
import traceback
import time
import os
+import random
from .fetch_search_instances import ListSearchInstances
from .logger import get_logger
logger = get_logger()
@@ -21,7 +22,7 @@ class FetchSearcher():
logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj]))
# Search
- for obj_search in list_search_obj:
+ for obj_search in random.shuffle(list(list_search_obj)):
# TODO: language & country customization
# Search
diff --git a/app_urls/fetcher/templates/filtered_urls.html b/app_urls/fetcher/templates/filtered_urls.html
index 040a32b..3cb86fb 100644
--- a/app_urls/fetcher/templates/filtered_urls.html
+++ b/app_urls/fetcher/templates/filtered_urls.html
@@ -298,7 +298,7 @@ input[type="checkbox"] {
{% endfor %}
-
+
Valid content
{% for vc in valid_contents %}
@@ -309,17 +309,23 @@ input[type="checkbox"] {
{% endfor %}
-
- Search
-
- {% for search in searches %}
+
+ Language
+
+ {% for lang in languages %}
{% endfor %}
+
+ Min #Sources
+
+
+
+
Source
@@ -331,20 +337,14 @@ input[type="checkbox"] {
{% endfor %}
-
- Min #Sources
-
-
-
-
-
- Language
-
- {% for lang in languages %}
+
+ Search
+
+ {% for search in searches %}
{% endfor %}
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
new file mode 100644
index 0000000..12c8bce
--- /dev/null
+++ b/docker-compose-dev.yml
@@ -0,0 +1,115 @@
+version: '3.9'
+
+# docker compose -f docker-compose-dev.yml down -v; docker compose -f docker-compose-dev.yml up -d --build; docker logs fetcher_app_urls -f
+
+services:
+
+ fetcher_app_selenium:
+ image: fetcher_app_selenium
+ build:
+ context: ./app_selenium
+ container_name: fetcher_app_selenium
+ restart: unless-stopped
+ shm_size: 512mb
+ environment:
+ - SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
+ - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-logs}
+ ports:
+ - 80
+ dns:
+ - 1.1.1.1
+ - 1.0.0.1
+ deploy:
+ resources:
+ limits:
+ cpus: '4'
+ memory: 4G
+
+ fetcher_app_urls:
+ image: fetcher_app_urls
+ build:
+ context: ./app_urls
+ container_name: fetcher_app_urls
+ restart: unless-stopped
+ environment:
+ # Initialization
+ - INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence
+ - DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
+ - DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
+ - DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
+ # Django
+ - DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
+ - DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy
+ - DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
+ - DJANGO_DEBUG=${DJANGO_DEBUG:-False}
+ - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
+ # Database
+ - DB_NAME=${DB_NAME:-matitos}
+ - DB_USER=${DB_USER:-supermatitos}
+ - DB_PASSWORD=${DB_PASSWORD:-supermatitos}
+ - DB_HOST=${DB_HOST:-fetcher_db}
+ - DB_PORT=${DB_PORT:-5432}
+ - REDIS_HOST=${REDIS_HOST:-fetcher_redis}
+ - REDIS_PORT=${REDIS_PORT:-6379}
+ # Job timeout: 30 min
+ - JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
+ # Fetcher
+ - FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2}
+ - FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
+ - FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1}
+ - FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-2}
+ # Selenium
+ - SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
+ - ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
+ ########################
+ volumes: # Development mode
+ - ./app_urls:/opt/app
+ ########################
+ ports:
+ - 8000:8000
+ depends_on:
+ - fetcher_db
+ - fetcher_redis
+ dns:
+ - 1.1.1.1
+ - 1.0.0.1
+ deploy:
+ resources:
+ limits:
+ cpus: '4'
+ memory: 4G
+ #labels: # Reverse proxy sample
+ # - "traefik.enable=true"
+ # - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
+ # - "traefik.http.routers.fetcher.entrypoints=websecure"
+ # - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
+ #networks:
+ # - default # This network
+ # - docker_default # Reverse proxy network
+
+ fetcher_db:
+ image: postgres:17
+ container_name: fetcher_db
+ restart: unless-stopped
+ # Set shared memory limit when using docker-compose
+ shm_size: 128mb
+ environment:
+ POSTGRES_DB: ${DB_NAME:-matitos}
+ POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
+ POSTGRES_USER: ${DB_USER:-supermatitos}
+ POSTGRES_INITDB_ARGS: '--data-checksums'
+ volumes: # Persistent DB?
+ - ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data
+ ports:
+ - 5432 #:5432
+
+ fetcher_redis:
+ image: redis:alpine
+ container_name: fetcher_redis
+ restart: unless-stopped
+ ports:
+ - 6379 #:6379
+
+#networks:
+# docker_default:
+# external: true
diff --git a/docker-compose.yml b/docker-compose.yml
index 60e077f..888d6e1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -64,7 +64,7 @@ services:
# - ./app_urls:/opt/app
########################
ports:
- - 8000:8000
+ - 8000 # :8000
depends_on:
- fetcher_db
- fetcher_redis
@@ -76,14 +76,14 @@ services:
limits:
cpus: '4'
memory: 4G
- #labels: # Reverse proxy sample
- # - "traefik.enable=true"
- # - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
- # - "traefik.http.routers.fetcher.entrypoints=websecure"
- # - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
- #networks:
- # - default # This network
- # - docker_default # Reverse proxy network
+ labels: # Reverse proxy sample
+ - "traefik.enable=true"
+ - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
+ - "traefik.http.routers.fetcher.entrypoints=websecure"
+ - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
+ networks:
+ - default # This network
+ - docker_default # Reverse proxy network
fetcher_db:
image: postgres:17
@@ -108,6 +108,6 @@ services:
ports:
- 6379 #:6379
-#networks:
-# docker_default:
-# external: true
+networks:
+ docker_default:
+ external: true