Towards django RQ

This commit is contained in:
Luciano Gervasoni
2025-03-10 12:17:31 +01:00
parent e024b200bb
commit e124dbc21a
20 changed files with 722 additions and 4643 deletions

34
app_urls/README.md Normal file
View File

@@ -0,0 +1,34 @@
* Dependencies
```
conda create -n matitos_urls python=3.12
conda activate matitos_urls
pip install django psycopg[binary] django-rq
```
* Environment variables
```
DB_NAME=${DB_NAME:-matitos}
DB_USER=${DB_NAME:-supermatitos}
DB_PASSWORD=${DB_NAME:-supermatitos}
DB_HOST=${DB_NAME:-localhost}
DB_PORT=${DB_NAME:-5432}
REDIS_HOST=${REDIS_HOST:-localhost}
REDIS_PORT=${REDIS_PORT:-6379}
```
* Django DB
```
# Generate content for models.py
python manage.py inspectdb
python manage.py makemigrations
python manage.py migrate --fake
```
```
# Server
python manage.py runserver
# Worker
python manage.py rqworker default
```

0
app_urls/api/__init__.py Normal file
View File

3
app_urls/api/admin.py Normal file
View File

@@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

6
app_urls/api/apps.py Normal file
View File

@@ -0,0 +1,6 @@
from django.apps import AppConfig
class ApiConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'api'

View File

@@ -0,0 +1,132 @@
# Generated by Django 5.1.7 on 2025-03-07 16:56
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Feed',
fields=[
('id', models.SmallAutoField(primary_key=True, serialize=False)),
('rss_feed', models.TextField(unique=True)),
],
options={
'db_table': 'feed',
'managed': False,
},
),
migrations.CreateModel(
name='Search',
fields=[
('id', models.SmallAutoField(primary_key=True, serialize=False)),
('keyword_search', models.TextField(unique=True)),
],
options={
'db_table': 'search',
'managed': False,
},
),
migrations.CreateModel(
name='Source',
fields=[
('id', models.SmallAutoField(primary_key=True, serialize=False)),
('source', models.TextField(unique=True)),
],
options={
'db_table': 'source',
'managed': False,
},
),
migrations.CreateModel(
name='StatusPatternMatching',
fields=[
('pattern', models.TextField(primary_key=True, serialize=False)),
('priority', models.SmallIntegerField()),
('status', models.TextField()),
],
options={
'db_table': 'status_pattern_matching',
'managed': False,
},
),
migrations.CreateModel(
name='Urls',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.TextField(unique=True)),
('ts_fetch', models.DateTimeField()),
('status', models.TextField()),
],
options={
'db_table': 'urls',
'managed': False,
},
),
migrations.CreateModel(
name='WebsiteOfInterest',
fields=[
('id', models.SmallAutoField(primary_key=True, serialize=False)),
('url_host', models.TextField(unique=True)),
],
options={
'db_table': 'website_of_interest',
'managed': False,
},
),
migrations.CreateModel(
name='WebsiteToFilter',
fields=[
('id', models.SmallAutoField(primary_key=True, serialize=False)),
('url_host', models.TextField(unique=True)),
],
options={
'db_table': 'website_to_filter',
'managed': False,
},
),
migrations.CreateModel(
name='UrlContent',
fields=[
('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='api.urls')),
('date_published', models.DateTimeField(blank=True, null=True)),
('title', models.TextField(blank=True, null=True)),
('description', models.TextField(blank=True, null=True)),
('content', models.TextField(blank=True, null=True)),
('tags', models.TextField(blank=True, null=True)),
('authors', models.TextField(blank=True, null=True)),
('image_urls', models.TextField(blank=True, null=True)),
],
options={
'db_table': 'url_content',
'managed': False,
},
),
migrations.CreateModel(
name='UrlsDuplicate',
fields=[
('id_url_canonical', models.OneToOneField(db_column='id_url_canonical', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='api.urls')),
],
options={
'db_table': 'urls_duplicate',
'managed': False,
},
),
migrations.CreateModel(
name='UrlsSource',
fields=[
('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='api.urls')),
],
options={
'db_table': 'urls_source',
'managed': False,
},
),
]

View File

101
app_urls/api/models.py Normal file
View File

@@ -0,0 +1,101 @@
from django.db import models
# Create your models here.
class Feed(models.Model):
id = models.SmallAutoField(primary_key=True)
rss_feed = models.TextField(unique=True)
class Meta:
managed = False
db_table = 'feed'
class Search(models.Model):
id = models.SmallAutoField(primary_key=True)
keyword_search = models.TextField(unique=True)
class Meta:
managed = False
db_table = 'search'
class Source(models.Model):
id = models.SmallAutoField(primary_key=True)
source = models.TextField(unique=True)
class Meta:
managed = False
db_table = 'source'
class StatusPatternMatching(models.Model):
pattern = models.TextField(primary_key=True)
priority = models.SmallIntegerField()
status = models.TextField() # This field type is a guess.
class Meta:
managed = False
db_table = 'status_pattern_matching'
class UrlContent(models.Model):
id_url = models.OneToOneField('Urls', models.DO_NOTHING, db_column='id_url', primary_key=True)
date_published = models.DateTimeField(blank=True, null=True)
title = models.TextField(blank=True, null=True)
description = models.TextField(blank=True, null=True)
content = models.TextField(blank=True, null=True)
tags = models.TextField(blank=True, null=True) # This field type is a guess.
authors = models.TextField(blank=True, null=True) # This field type is a guess.
image_urls = models.TextField(blank=True, null=True) # This field type is a guess.
class Meta:
managed = False
db_table = 'url_content'
class Urls(models.Model):
url = models.TextField(unique=True)
ts_fetch = models.DateTimeField()
status = models.TextField() # This field type is a guess.
class Meta:
managed = False
db_table = 'urls'
class UrlsDuplicate(models.Model):
id_url_canonical = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url_canonical', primary_key=True) # The composite primary key (id_url_canonical, id_url_duplicated) found, that is not supported. The first column is selected.
id_url_duplicated = models.ForeignKey(Urls, models.DO_NOTHING, db_column='id_url_duplicated', related_name='urlsduplicate_id_url_duplicated_set')
class Meta:
managed = False
db_table = 'urls_duplicate'
unique_together = (('id_url_canonical', 'id_url_duplicated'),)
class UrlsSource(models.Model):
id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True) # The composite primary key (id_url, id_source) found, that is not supported. The first column is selected.
id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
class Meta:
managed = False
db_table = 'urls_source'
unique_together = (('id_url', 'id_source'),)
class WebsiteOfInterest(models.Model):
id = models.SmallAutoField(primary_key=True)
url_host = models.TextField(unique=True)
class Meta:
managed = False
db_table = 'website_of_interest'
class WebsiteToFilter(models.Model):
id = models.SmallAutoField(primary_key=True)
url_host = models.TextField(unique=True)
class Meta:
managed = False
db_table = 'website_to_filter'

13
app_urls/api/tasks.py Normal file
View File

@@ -0,0 +1,13 @@
from django_rq import job
import time
import logging
logger = logging.getLogger(__name__)
@job
def task_1(message):
logger.info("Message: {}".format(message))
try:
time.sleep(5) # Simulate a long-running task
print(f"Task completed: {message}")
except Exception as e:
logger.error(e)

3
app_urls/api/tests.py Normal file
View File

@@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

6
app_urls/api/urls.py Normal file
View File

@@ -0,0 +1,6 @@
from django.urls import path
from .views import trigger_task
urlpatterns = [
path('trigger_task/', trigger_task, name='trigger_task')
]

10
app_urls/api/views.py Normal file
View File

@@ -0,0 +1,10 @@
import django_rq
from django.http import JsonResponse
from .tasks import task_1
def trigger_task(request):
"""View that enqueues a task."""
queue = django_rq.get_queue('default') # Get the default queue
job = queue.enqueue(task_1, "Hello from Django RQ!")
return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})

View File

16
app_urls/core/asgi.py Normal file
View File

@@ -0,0 +1,16 @@
"""
ASGI config for core project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
application = get_asgi_application()

142
app_urls/core/settings.py Normal file
View File

@@ -0,0 +1,142 @@
"""
Django settings for core project.
Generated by 'django-admin startproject' using Django 5.1.7.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/5.1/ref/settings/
"""
from pathlib import Path
import os
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-kc0jj#_=7i$_79p(n5)p3taxvhnq=w*ori-%%iu_a6wye@$(*n'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
# 'rest_framework',
'django_rq',
'api',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'core.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'core.wsgi.application'
# Database
# https://docs.djangoproject.com/en/5.1/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': os.environ.get("DB_NAME", "matitos"),
'USER': os.environ.get("DB_USER", "supermatitos"),
'PASSWORD': os.environ.get("DB_PASSWORD", "supermatitos"),
'HOST': os.environ.get("DB_HOST", "localhost"),
'PORT': os.environ.get("DB_PORT", "5432"),
#'OPTIONS': {
# 'options': '-c default_transaction_read_only=on'
#}
}
}
RQ_QUEUES = {
'default': {
'HOST': os.environ.get("REDIS_HOST", "localhost"),
'PORT': os.environ.get("REDIS_PORT", 6379),
'DB': os.environ.get("REDIS_DB", 0),
'DEFAULT_TIMEOUT': os.environ.get("REDIS_DEFAULT_TIMEOUT", 360),
}
}
# Password validation
# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/5.1/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/5.1/howto/static-files/
STATIC_URL = 'static/'
# Default primary key field type
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'

23
app_urls/core/urls.py Normal file
View File

@@ -0,0 +1,23 @@
"""
URL configuration for core project.
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/5.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path, include
urlpatterns = [
path('admin/', admin.site.urls),
path('api/', include('api.urls'))
]

16
app_urls/core/wsgi.py Normal file
View File

@@ -0,0 +1,16 @@
"""
WSGI config for core project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
application = get_wsgi_application()

22
app_urls/manage.py Executable file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()