Refactoring fetcher, working feeds and raw url writer
This commit is contained in:
@@ -3,6 +3,36 @@
|
||||
conda create -n matitos_urls python=3.12
|
||||
conda activate matitos_urls
|
||||
pip install django psycopg[binary] django-rq
|
||||
pip install feedparser python-dateutil newspaper4k lxml[html_clean]
|
||||
```
|
||||
|
||||
* From automated inspectdb
|
||||
```
|
||||
# 1) Inspect DB, generate models.py
|
||||
python manage.py inspectdb
|
||||
|
||||
# 2) models.py, within class Urls, add:
|
||||
|
||||
class STATUS_ENUM(models.TextChoices):
|
||||
RAW = "raw"
|
||||
ERROR = "error"
|
||||
VALID = "valid"
|
||||
UNKNOWN = "unknown"
|
||||
INVALID = "invalid"
|
||||
DUPLICATE = "duplicate"
|
||||
|
||||
# Update status
|
||||
status = models.TextField(choices=STATUS_ENUM, default=STATUS_ENUM.RAW) # This field type is a guess.
|
||||
|
||||
# To class Meta, add default ordering
|
||||
class Meta:
|
||||
managed = False
|
||||
db_table = 'urls' # db_table = '{}_urls'.format(project_name)
|
||||
ordering = ["-ts_fetch"]
|
||||
|
||||
# Fields default:
|
||||
ts_fetch = models.DateTimeField(auto_now_add=True)
|
||||
status = models.TextField(default='raw') # This field type is a guess.
|
||||
```
|
||||
|
||||
* Environment variables
|
||||
@@ -25,10 +55,20 @@ python manage.py makemigrations
|
||||
python manage.py migrate --fake
|
||||
```
|
||||
|
||||
|
||||
* Deploy
|
||||
```
|
||||
# Server
|
||||
python manage.py runserver
|
||||
|
||||
# Worker
|
||||
python manage.py rqworker default
|
||||
while true; do python manage.py rqworker default --burst; sleep 5; done
|
||||
```
|
||||
|
||||
* Utils
|
||||
```
|
||||
python manage.py rqstats
|
||||
python manage.py rqstats --interval=1 # Refreshes every second
|
||||
python manage.py rqstats --json # Output as JSON
|
||||
python manage.py rqstats --yaml # Output as YAML
|
||||
```
|
||||
Reference in New Issue
Block a user