- Dependencies
conda create -n matitos_urls python=3.12
conda activate matitos_urls
pip install django psycopg[binary] django-rq
pip install feedparser python-dateutil newspaper4k lxml[html_clean]
- From automated inspectdb
# 1) Inspect DB, generate models.py
python manage.py inspectdb
# 2) models.py, within class Urls, add:
class STATUS_ENUM(models.TextChoices):
RAW = "raw"
ERROR = "error"
VALID = "valid"
UNKNOWN = "unknown"
INVALID = "invalid"
DUPLICATE = "duplicate"
# Update status
status = models.TextField(choices=STATUS_ENUM, default=STATUS_ENUM.RAW) # This field type is a guess.
# To class Meta, add default ordering
class Meta:
managed = False
db_table = 'urls' # db_table = '{}_urls'.format(project_name)
ordering = ["-ts_fetch"]
# Fields default:
ts_fetch = models.DateTimeField(auto_now_add=True)
status = models.TextField(default='raw') # This field type is a guess.
# URLContent:
from django.contrib.postgres.fields import ArrayField
keywords = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
tags = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
authors = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
image_main_url = models.TextField(blank=True, null=True)
images_url = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
videos_url = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
- Environment variables
DB_NAME=${DB_NAME:-matitos}
DB_USER=${DB_NAME:-supermatitos}
DB_PASSWORD=${DB_NAME:-supermatitos}
DB_HOST=${DB_NAME:-localhost}
DB_PORT=${DB_NAME:-5432}
REDIS_HOST=${REDIS_HOST:-localhost}
REDIS_PORT=${REDIS_PORT:-6379}
- Django DB
# Generate content for models.py
python manage.py inspectdb
# Migrations
python manage.py makemigrations api; python manage.py migrate --fake-initial
- Deploy
# Server
python manage.py runserver
# Worker
python manage.py rqworker default
while true; do python manage.py rqworker default --burst -v 0; sleep 5; done
# Visualize DB
http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id
- Utils
python manage.py rqstats
python manage.py rqstats --interval=1 # Refreshes every second
python manage.py rqstats --json # Output as JSON
python manage.py rqstats --yaml # Output as YAML