Urls source search, cleaning code

This commit is contained in:
Luciano Gervasoni
2025-03-20 17:19:52 +01:00
parent 05e17266f1
commit f84c7729f8
13 changed files with 241 additions and 300 deletions

View File

@@ -2,7 +2,7 @@
```
conda create -n matitos_urls python=3.12
conda activate matitos_urls
pip install django psycopg[binary] django-rq
pip install django psycopg[binary] django-redis django-rq
pip install feedparser python-dateutil newspaper4k lxml[html_clean] googlenewsdecoder gnews duckduckgo_search GoogleNews
```
@@ -77,8 +77,10 @@ DB_PORT=${DB_NAME:-5432}
REDIS_HOST=${REDIS_HOST:-localhost}
REDIS_PORT=${REDIS_PORT:-6379}
# Default RQ queue timeout
# Default RQ job timeout
RQ_DEFAULT_TIMEOUT=${REDIS_PORT:-900}
# Default RQ job queue TTL
RQ_DEFAULT_RESULT_TTL=${RQ_DEFAULT_RESULT_TTL:-3600}
```
* Django DB
@@ -94,9 +96,9 @@ python manage.py makemigrations api; python manage.py migrate --fake-initial
# Server
python manage.py runserver
# Worker
python manage.py rqworker default
while true; do python manage.py rqworker default --burst -v 0; sleep 5; done
# Workers
# python manage.py rqworker high default low
python manage.py rqworker high default low
# Visualize DB
http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id