Compose dev mode, shuffle searches, reordering url filters

This commit is contained in:
Luciano Gervasoni
2025-04-04 21:13:52 +02:00
parent e87f10e7d4
commit c7cdc559f2
4 changed files with 150 additions and 34 deletions

View File

@@ -4,6 +4,7 @@ from django.db.models import Q
import traceback
import time
import os
import random
from .fetch_search_instances import ListSearchInstances
from .logger import get_logger
logger = get_logger()
@@ -21,7 +22,7 @@ class FetchSearcher():
logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj]))
# Search
for obj_search in list_search_obj:
for obj_search in random.shuffle(list(list_search_obj)):
# TODO: language & country customization
# Search

View File

@@ -298,7 +298,7 @@ input[type="checkbox"] {
</label><br>
{% endfor %}
<!-- Filter by valid content -->
<!-- Filter by Valid content -->
<h3>Valid content</h3>
<button type="button" class="toggle-all-btn" data-toggle="valid_content">Toggle All</button><br>
{% for vc in valid_contents %}
@@ -309,17 +309,23 @@ input[type="checkbox"] {
</label><br>
{% endfor %}
<!-- Filter by Search -->
<h3>Search</h3>
<button type="button" class="toggle-all-btn" data-toggle="search">Toggle All</button><br>
{% for search in searches %}
<!-- Filter by language -->
<h3>Language</h3>
<button type="button" class="toggle-all-btn" data-toggle="language">Toggle All</button><br>
{% for lang in languages %}
<label>
<input type="checkbox" name="search" value="{{ search.id }}"
{% if search.id|stringformat:"s" in selected_search or 'all' in selected_search %}checked{% endif %}>
[{{ search.type }}] {{ search.search|truncatechars:50 }}
<input type="checkbox" name="language" value="{{ lang }}"
{% if lang|stringformat:"s" in selected_language or 'all' in selected_language%}checked{% endif %}>
{{ lang|truncatechars:50 }}
</label><br>
{% endfor %}
<!-- Minimum Sources Count Box -->
<h3>Min #Sources</h3>
<div>
<input type="number" id="minSourceCount" name="min_sources" value="{{ selected_min_sources }}" min="1" style="width: 60px; text-align: center;">
</div>
<!-- Filter by Source -->
<h3>Source</h3>
<button type="button" class="toggle-all-btn" data-toggle="source">Toggle All</button><br>
@@ -331,20 +337,14 @@ input[type="checkbox"] {
</label><br>
{% endfor %}
<!-- Minimum Sources Count Box -->
<h3>Min #Sources</h3>
<div>
<input type="number" id="minSourceCount" name="min_sources" value="{{ selected_min_sources }}" min="1" style="width: 60px; text-align: center;">
</div>
<!-- Filter by language -->
<h3>Language</h3>
<button type="button" class="toggle-all-btn" data-toggle="language">Toggle All</button><br>
{% for lang in languages %}
<!-- Filter by Search -->
<h3>Search</h3>
<button type="button" class="toggle-all-btn" data-toggle="search">Toggle All</button><br>
{% for search in searches %}
<label>
<input type="checkbox" name="language" value="{{ lang }}"
{% if lang|stringformat:"s" in selected_language or 'all' in selected_language%}checked{% endif %}>
{{ lang|truncatechars:50 }}
<input type="checkbox" name="search" value="{{ search.id }}"
{% if search.id|stringformat:"s" in selected_search or 'all' in selected_search %}checked{% endif %}>
[{{ search.type }}] {{ search.search|truncatechars:50 }}
</label><br>
{% endfor %}

115
docker-compose-dev.yml Normal file
View File

@@ -0,0 +1,115 @@
version: '3.9'
# docker compose -f docker-compose-dev.yml down -v; docker compose -f docker-compose-dev.yml up -d --build; docker logs fetcher_app_urls -f
services:
fetcher_app_selenium:
image: fetcher_app_selenium
build:
context: ./app_selenium
container_name: fetcher_app_selenium
restart: unless-stopped
shm_size: 512mb
environment:
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-logs}
ports:
- 80
dns:
- 1.1.1.1
- 1.0.0.1
deploy:
resources:
limits:
cpus: '4'
memory: 4G
fetcher_app_urls:
image: fetcher_app_urls
build:
context: ./app_urls
container_name: fetcher_app_urls
restart: unless-stopped
environment:
# Initialization
- INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
# Django
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
- DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
- DJANGO_DEBUG=${DJANGO_DEBUG:-False}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
# Database
- DB_NAME=${DB_NAME:-matitos}
- DB_USER=${DB_USER:-supermatitos}
- DB_PASSWORD=${DB_PASSWORD:-supermatitos}
- DB_HOST=${DB_HOST:-fetcher_db}
- DB_PORT=${DB_PORT:-5432}
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
- REDIS_PORT=${REDIS_PORT:-6379}
# Job timeout: 30 min
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
# Fetcher
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1}
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-2}
# Selenium
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
########################
volumes: # Development mode
- ./app_urls:/opt/app
########################
ports:
- 8000:8000
depends_on:
- fetcher_db
- fetcher_redis
dns:
- 1.1.1.1
- 1.0.0.1
deploy:
resources:
limits:
cpus: '4'
memory: 4G
#labels: # Reverse proxy sample
# - "traefik.enable=true"
# - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
# - "traefik.http.routers.fetcher.entrypoints=websecure"
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
#networks:
# - default # This network
# - docker_default # Reverse proxy network
fetcher_db:
image: postgres:17
container_name: fetcher_db
restart: unless-stopped
# Set shared memory limit when using docker-compose
shm_size: 128mb
environment:
POSTGRES_DB: ${DB_NAME:-matitos}
POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
POSTGRES_USER: ${DB_USER:-supermatitos}
POSTGRES_INITDB_ARGS: '--data-checksums'
volumes: # Persistent DB?
- ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data
ports:
- 5432 #:5432
fetcher_redis:
image: redis:alpine
container_name: fetcher_redis
restart: unless-stopped
ports:
- 6379 #:6379
#networks:
# docker_default:
# external: true

View File

@@ -64,7 +64,7 @@ services:
# - ./app_urls:/opt/app
########################
ports:
- 8000:8000
- 8000 # :8000
depends_on:
- fetcher_db
- fetcher_redis
@@ -76,14 +76,14 @@ services:
limits:
cpus: '4'
memory: 4G
#labels: # Reverse proxy sample
# - "traefik.enable=true"
# - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
# - "traefik.http.routers.fetcher.entrypoints=websecure"
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
#networks:
# - default # This network
# - docker_default # Reverse proxy network
labels: # Reverse proxy sample
- "traefik.enable=true"
- "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
- "traefik.http.routers.fetcher.entrypoints=websecure"
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
networks:
- default # This network
- docker_default # Reverse proxy network
fetcher_db:
image: postgres:17
@@ -108,6 +108,6 @@ services:
ports:
- 6379 #:6379
#networks:
# docker_default:
# external: true
networks:
docker_default:
external: true