84 lines
2.4 KiB
Markdown
84 lines
2.4 KiB
Markdown
* Dependencies
|
|
```
|
|
conda create -n matitos_urls python=3.12
|
|
conda activate matitos_urls
|
|
pip install django psycopg[binary] django-rq
|
|
pip install feedparser python-dateutil newspaper4k lxml[html_clean]
|
|
```
|
|
|
|
* From automated inspectdb
|
|
```
|
|
# 1) Inspect DB, generate models.py
|
|
python manage.py inspectdb
|
|
|
|
# 2) models.py, within class Urls, add:
|
|
|
|
class STATUS_ENUM(models.TextChoices):
|
|
RAW = "raw"
|
|
ERROR = "error"
|
|
VALID = "valid"
|
|
UNKNOWN = "unknown"
|
|
INVALID = "invalid"
|
|
DUPLICATE = "duplicate"
|
|
|
|
# Update status
|
|
status = models.TextField(choices=STATUS_ENUM, default=STATUS_ENUM.RAW) # This field type is a guess.
|
|
|
|
# To class Meta, add default ordering
|
|
class Meta:
|
|
managed = False
|
|
db_table = 'urls' # db_table = '{}_urls'.format(project_name)
|
|
ordering = ["-ts_fetch"]
|
|
|
|
# Fields default:
|
|
ts_fetch = models.DateTimeField(auto_now_add=True)
|
|
status = models.TextField(default='raw') # This field type is a guess.
|
|
|
|
# URLContent:
|
|
from django.contrib.postgres.fields import ArrayField
|
|
|
|
keywords = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
|
|
tags = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
|
|
authors = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
|
|
image_main_url = models.TextField(blank=True, null=True)
|
|
images_url = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
|
|
videos_url = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
|
|
```
|
|
|
|
* Environment variables
|
|
```
|
|
DB_NAME=${DB_NAME:-matitos}
|
|
DB_USER=${DB_NAME:-supermatitos}
|
|
DB_PASSWORD=${DB_NAME:-supermatitos}
|
|
DB_HOST=${DB_NAME:-localhost}
|
|
DB_PORT=${DB_NAME:-5432}
|
|
|
|
REDIS_HOST=${REDIS_HOST:-localhost}
|
|
REDIS_PORT=${REDIS_PORT:-6379}
|
|
```
|
|
|
|
* Django DB
|
|
```
|
|
# Generate content for models.py
|
|
python manage.py inspectdb
|
|
# Migrations
|
|
python manage.py makemigrations api; python manage.py migrate --fake-initial
|
|
```
|
|
|
|
* Deploy
|
|
```
|
|
# Server
|
|
python manage.py runserver
|
|
|
|
# Worker
|
|
python manage.py rqworker default
|
|
while true; do python manage.py rqworker default --burst; sleep 5; done
|
|
```
|
|
|
|
* Utils
|
|
```
|
|
python manage.py rqstats
|
|
python manage.py rqstats --interval=1 # Refreshes every second
|
|
python manage.py rqstats --json # Output as JSON
|
|
python manage.py rqstats --yaml # Output as YAML
|
|
``` |