Compose dev mode, shuffle searches, reordering url filters
This commit is contained in:
@@ -4,6 +4,7 @@ from django.db.models import Q
|
|||||||
import traceback
|
import traceback
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
from .fetch_search_instances import ListSearchInstances
|
from .fetch_search_instances import ListSearchInstances
|
||||||
from .logger import get_logger
|
from .logger import get_logger
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
@@ -21,7 +22,7 @@ class FetchSearcher():
|
|||||||
logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj]))
|
logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj]))
|
||||||
|
|
||||||
# Search
|
# Search
|
||||||
for obj_search in list_search_obj:
|
for obj_search in random.shuffle(list(list_search_obj)):
|
||||||
# TODO: language & country customization
|
# TODO: language & country customization
|
||||||
|
|
||||||
# Search
|
# Search
|
||||||
|
|||||||
@@ -298,7 +298,7 @@ input[type="checkbox"] {
|
|||||||
</label><br>
|
</label><br>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
<!-- Filter by valid content -->
|
<!-- Filter by Valid content -->
|
||||||
<h3>Valid content</h3>
|
<h3>Valid content</h3>
|
||||||
<button type="button" class="toggle-all-btn" data-toggle="valid_content">Toggle All</button><br>
|
<button type="button" class="toggle-all-btn" data-toggle="valid_content">Toggle All</button><br>
|
||||||
{% for vc in valid_contents %}
|
{% for vc in valid_contents %}
|
||||||
@@ -309,17 +309,23 @@ input[type="checkbox"] {
|
|||||||
</label><br>
|
</label><br>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
<!-- Filter by Search -->
|
<!-- Filter by language -->
|
||||||
<h3>Search</h3>
|
<h3>Language</h3>
|
||||||
<button type="button" class="toggle-all-btn" data-toggle="search">Toggle All</button><br>
|
<button type="button" class="toggle-all-btn" data-toggle="language">Toggle All</button><br>
|
||||||
{% for search in searches %}
|
{% for lang in languages %}
|
||||||
<label>
|
<label>
|
||||||
<input type="checkbox" name="search" value="{{ search.id }}"
|
<input type="checkbox" name="language" value="{{ lang }}"
|
||||||
{% if search.id|stringformat:"s" in selected_search or 'all' in selected_search %}checked{% endif %}>
|
{% if lang|stringformat:"s" in selected_language or 'all' in selected_language%}checked{% endif %}>
|
||||||
[{{ search.type }}] {{ search.search|truncatechars:50 }}
|
{{ lang|truncatechars:50 }}
|
||||||
</label><br>
|
</label><br>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
|
<!-- Minimum Sources Count Box -->
|
||||||
|
<h3>Min #Sources</h3>
|
||||||
|
<div>
|
||||||
|
<input type="number" id="minSourceCount" name="min_sources" value="{{ selected_min_sources }}" min="1" style="width: 60px; text-align: center;">
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Filter by Source -->
|
<!-- Filter by Source -->
|
||||||
<h3>Source</h3>
|
<h3>Source</h3>
|
||||||
<button type="button" class="toggle-all-btn" data-toggle="source">Toggle All</button><br>
|
<button type="button" class="toggle-all-btn" data-toggle="source">Toggle All</button><br>
|
||||||
@@ -331,20 +337,14 @@ input[type="checkbox"] {
|
|||||||
</label><br>
|
</label><br>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
<!-- Minimum Sources Count Box -->
|
<!-- Filter by Search -->
|
||||||
<h3>Min #Sources</h3>
|
<h3>Search</h3>
|
||||||
<div>
|
<button type="button" class="toggle-all-btn" data-toggle="search">Toggle All</button><br>
|
||||||
<input type="number" id="minSourceCount" name="min_sources" value="{{ selected_min_sources }}" min="1" style="width: 60px; text-align: center;">
|
{% for search in searches %}
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- Filter by language -->
|
|
||||||
<h3>Language</h3>
|
|
||||||
<button type="button" class="toggle-all-btn" data-toggle="language">Toggle All</button><br>
|
|
||||||
{% for lang in languages %}
|
|
||||||
<label>
|
<label>
|
||||||
<input type="checkbox" name="language" value="{{ lang }}"
|
<input type="checkbox" name="search" value="{{ search.id }}"
|
||||||
{% if lang|stringformat:"s" in selected_language or 'all' in selected_language%}checked{% endif %}>
|
{% if search.id|stringformat:"s" in selected_search or 'all' in selected_search %}checked{% endif %}>
|
||||||
{{ lang|truncatechars:50 }}
|
[{{ search.type }}] {{ search.search|truncatechars:50 }}
|
||||||
</label><br>
|
</label><br>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
|
|||||||
115
docker-compose-dev.yml
Normal file
115
docker-compose-dev.yml
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
version: '3.9'
|
||||||
|
|
||||||
|
# docker compose -f docker-compose-dev.yml down -v; docker compose -f docker-compose-dev.yml up -d --build; docker logs fetcher_app_urls -f
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
fetcher_app_selenium:
|
||||||
|
image: fetcher_app_selenium
|
||||||
|
build:
|
||||||
|
context: ./app_selenium
|
||||||
|
container_name: fetcher_app_selenium
|
||||||
|
restart: unless-stopped
|
||||||
|
shm_size: 512mb
|
||||||
|
environment:
|
||||||
|
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
|
||||||
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-logs}
|
||||||
|
ports:
|
||||||
|
- 80
|
||||||
|
dns:
|
||||||
|
- 1.1.1.1
|
||||||
|
- 1.0.0.1
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '4'
|
||||||
|
memory: 4G
|
||||||
|
|
||||||
|
fetcher_app_urls:
|
||||||
|
image: fetcher_app_urls
|
||||||
|
build:
|
||||||
|
context: ./app_urls
|
||||||
|
container_name: fetcher_app_urls
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
# Initialization
|
||||||
|
- INITIALIZE_DB=${INITIALIZE_DB:-true} # Related to DB persistence
|
||||||
|
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
|
||||||
|
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
|
||||||
|
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
|
||||||
|
# Django
|
||||||
|
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
|
||||||
|
- DJANGO_ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-https://fetcher.matitos.org} # Reverse proxy
|
||||||
|
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
|
||||||
|
- DJANGO_DEBUG=${DJANGO_DEBUG:-False}
|
||||||
|
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
|
||||||
|
# Database
|
||||||
|
- DB_NAME=${DB_NAME:-matitos}
|
||||||
|
- DB_USER=${DB_USER:-supermatitos}
|
||||||
|
- DB_PASSWORD=${DB_PASSWORD:-supermatitos}
|
||||||
|
- DB_HOST=${DB_HOST:-fetcher_db}
|
||||||
|
- DB_PORT=${DB_PORT:-5432}
|
||||||
|
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
|
||||||
|
- REDIS_PORT=${REDIS_PORT:-6379}
|
||||||
|
# Job timeout: 30 min
|
||||||
|
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
|
||||||
|
# Fetcher
|
||||||
|
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2}
|
||||||
|
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
|
||||||
|
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP:-1}
|
||||||
|
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP:-2}
|
||||||
|
# Selenium
|
||||||
|
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
|
||||||
|
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
|
||||||
|
########################
|
||||||
|
volumes: # Development mode
|
||||||
|
- ./app_urls:/opt/app
|
||||||
|
########################
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
||||||
|
depends_on:
|
||||||
|
- fetcher_db
|
||||||
|
- fetcher_redis
|
||||||
|
dns:
|
||||||
|
- 1.1.1.1
|
||||||
|
- 1.0.0.1
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '4'
|
||||||
|
memory: 4G
|
||||||
|
#labels: # Reverse proxy sample
|
||||||
|
# - "traefik.enable=true"
|
||||||
|
# - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
|
||||||
|
# - "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||||
|
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||||
|
#networks:
|
||||||
|
# - default # This network
|
||||||
|
# - docker_default # Reverse proxy network
|
||||||
|
|
||||||
|
fetcher_db:
|
||||||
|
image: postgres:17
|
||||||
|
container_name: fetcher_db
|
||||||
|
restart: unless-stopped
|
||||||
|
# Set shared memory limit when using docker-compose
|
||||||
|
shm_size: 128mb
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: ${DB_NAME:-matitos}
|
||||||
|
POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
|
||||||
|
POSTGRES_USER: ${DB_USER:-supermatitos}
|
||||||
|
POSTGRES_INITDB_ARGS: '--data-checksums'
|
||||||
|
volumes: # Persistent DB?
|
||||||
|
- ${PATH_DB_DATA:-.}/postgres:/var/lib/postgresql/data
|
||||||
|
ports:
|
||||||
|
- 5432 #:5432
|
||||||
|
|
||||||
|
fetcher_redis:
|
||||||
|
image: redis:alpine
|
||||||
|
container_name: fetcher_redis
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- 6379 #:6379
|
||||||
|
|
||||||
|
#networks:
|
||||||
|
# docker_default:
|
||||||
|
# external: true
|
||||||
@@ -64,7 +64,7 @@ services:
|
|||||||
# - ./app_urls:/opt/app
|
# - ./app_urls:/opt/app
|
||||||
########################
|
########################
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- 8000 # :8000
|
||||||
depends_on:
|
depends_on:
|
||||||
- fetcher_db
|
- fetcher_db
|
||||||
- fetcher_redis
|
- fetcher_redis
|
||||||
@@ -76,14 +76,14 @@ services:
|
|||||||
limits:
|
limits:
|
||||||
cpus: '4'
|
cpus: '4'
|
||||||
memory: 4G
|
memory: 4G
|
||||||
#labels: # Reverse proxy sample
|
labels: # Reverse proxy sample
|
||||||
# - "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
# - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
|
- "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
|
||||||
# - "traefik.http.routers.fetcher.entrypoints=websecure"
|
- "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||||
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||||
#networks:
|
networks:
|
||||||
# - default # This network
|
- default # This network
|
||||||
# - docker_default # Reverse proxy network
|
- docker_default # Reverse proxy network
|
||||||
|
|
||||||
fetcher_db:
|
fetcher_db:
|
||||||
image: postgres:17
|
image: postgres:17
|
||||||
@@ -108,6 +108,6 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- 6379 #:6379
|
- 6379 #:6379
|
||||||
|
|
||||||
#networks:
|
networks:
|
||||||
# docker_default:
|
docker_default:
|
||||||
# external: true
|
external: true
|
||||||
|
|||||||
Reference in New Issue
Block a user