Refactor searches, env vars fetcher config, urls webpage update
This commit is contained in:
@@ -10,6 +10,10 @@ pip install feedparser python-dateutil newspaper4k[all] lxml[html_clean] googlen
|
||||
pip install ollama
|
||||
```
|
||||
|
||||
* Database
|
||||
* Database initialization -> 1-DB.ipynb
|
||||
|
||||
|
||||
* From automated inspectdb
|
||||
```
|
||||
# 1) Inspect DB, generate models.py
|
||||
@@ -72,23 +76,26 @@ class Meta:
|
||||
|
||||
* Environment variables
|
||||
```
|
||||
# Database
|
||||
DB_NAME=${DB_NAME:-matitos}
|
||||
DB_USER=${DB_NAME:-supermatitos}
|
||||
DB_PASSWORD=${DB_NAME:-supermatitos}
|
||||
DB_HOST=${DB_NAME:-localhost}
|
||||
DB_PORT=${DB_NAME:-5432}
|
||||
|
||||
REDIS_HOST=${REDIS_HOST:-localhost}
|
||||
REDIS_PORT=${REDIS_PORT:-6379}
|
||||
|
||||
# Default RQ job timeout
|
||||
RQ_DEFAULT_TIMEOUT=${REDIS_PORT:-900}
|
||||
# Default RQ job queue TTL
|
||||
RQ_DEFAULT_RESULT_TTL=${RQ_DEFAULT_RESULT_TTL:-3600}
|
||||
# Job timeout: 30 min
|
||||
JOB_DEFAULT_TIMEOUT=${RQ_DEFAULT_TIMEOUT:-1800}
|
||||
|
||||
# Logs path
|
||||
PATH_LOGS_ERROR=logs/log_app_fetcher_error.log
|
||||
PATH_LOGS=logs/log_app_fetcher.log
|
||||
PATH_LOGS_PARAMETERIZATION="logs/log_app_fetcher_{}.log"
|
||||
|
||||
# Fetcher
|
||||
FETCHER_GNEWS_DECODE_SLEEP=2
|
||||
FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=4
|
||||
FETCHER_BETWEEN_SEARCHES_SLEEP=5
|
||||
FETCHER_URL_HOST_SLEEP=5
|
||||
```
|
||||
|
||||
* Deploy
|
||||
@@ -110,30 +117,14 @@ http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=publ
|
||||
|
||||
* Scheduled tasks
|
||||
```
|
||||
# 1) Modify the scheduled tasks on the admin panel:
|
||||
|
||||
Names: Fetch Feeds, Fetch Parser, Fetch Search
|
||||
Callable: api.tasks.fetch_feeds, api.tasks.fetch_parser, api.tasks.fetch_search
|
||||
Task type: Repetable task (or cron...)
|
||||
Queue: Default
|
||||
Interval: 15min, 2h, 30min
|
||||
|
||||
Names: Process raw URLs, Process error URLs, Process MissingKids URLs
|
||||
Callable: api.tasks.process_raw_urls, api.tasks.process_error_urls, api.tasks.process_missing_kids_urls_50
|
||||
Task type: Repetable task (or cron...)
|
||||
Queue: Low, Low, Default
|
||||
Interval: 1h, 4h, 2h
|
||||
|
||||
# 2) Export
|
||||
# python manage.py export > scheduled_tasks.json
|
||||
|
||||
|
||||
# Or simply import saved definitions
|
||||
# Import tasks
|
||||
python manage.py import --filename scheduled_tasks.json
|
||||
|
||||
# Modify using the admin panel, then save
|
||||
# python manage.py export > scheduled_tasks.json
|
||||
```
|
||||
|
||||
* Utils
|
||||
* Utils. TODO: To endpoint...
|
||||
```
|
||||
python manage.py rqstats
|
||||
python manage.py rqstats --interval=1 # Refreshes every second
|
||||
```
|
||||
Reference in New Issue
Block a user