This commit is contained in:
Luciano Gervasoni
2025-03-06 21:53:04 +01:00
parent a65d4a4289
commit 4453a51d6d
50 changed files with 1916 additions and 0 deletions

197
1-DB.ipynb Normal file
View File

@@ -0,0 +1,197 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install psycopg[binary]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"INSERT_TABLES = False\n",
"INSERT_SAMPLE_DATA = False\n",
"\n",
"import psycopg\n",
"connection_info = \"host={} port={} user={} password={} dbname={}\".format(\"localhost\", \"5432\", \"supermatitos\", \"supermatitos\", \"matitos\")\n",
"\n",
"\n",
"if INSERT_TABLES:\n",
" # Connect to an existing database\n",
" with psycopg.connect(connection_info) as conn:\n",
" # Open a cursor to perform database operations\n",
" with conn.cursor() as cur:\n",
" # Autocommit at end of transaction (Atomic insert of URLs and sources)\n",
" with conn.transaction() as tx:\n",
" # Create URLs table\n",
" c = cur.execute(\"\"\"\n",
" CREATE TYPE URL_STATUS AS ENUM ('raw', 'error', 'valid', 'unknown', 'invalid', 'duplicate');\n",
"\n",
" CREATE TABLE URLS (\n",
" id SERIAL PRIMARY KEY,\n",
" url TEXT NOT NULL UNIQUE,\n",
" ts_fetch TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n",
" status URL_STATUS NOT NULL DEFAULT 'raw' -- ,\n",
" -- status_wendy WENDY_STATUS DEFAULT NULL,\n",
" -- ts_wendy TIMESTAMPTZ DEFAULT NULL\n",
" );\n",
" CREATE INDEX idx_urls_status ON urls(status);\n",
" CREATE INDEX idx_urls_ts_fetch ON urls(ts_fetch);\n",
"\n",
" CREATE TABLE URLS_DUPLICATE (\n",
" id_url_canonical INTEGER REFERENCES URLS(id),\n",
" id_url_duplicated INTEGER REFERENCES URLS(id),\n",
" PRIMARY KEY (id_url_canonical, id_url_duplicated)\n",
" );\n",
"\n",
" CREATE TABLE FEED (\n",
" id SMALLSERIAL PRIMARY KEY,\n",
" rss_feed TEXT NOT NULL UNIQUE\n",
" );\n",
" CREATE TABLE WEBSITE_OF_INTEREST (\n",
" id SMALLSERIAL PRIMARY KEY,\n",
" url_host TEXT NOT NULL UNIQUE\n",
" );\n",
" CREATE TABLE SEARCH (\n",
" id SMALLSERIAL PRIMARY KEY,\n",
" keyword_search TEXT NOT NULL UNIQUE\n",
" );\n",
" CREATE TABLE SOURCE (\n",
" id SMALLSERIAL PRIMARY KEY,\n",
" source TEXT NOT NULL UNIQUE\n",
" );\n",
"\n",
" CREATE TABLE URLS_SOURCE (\n",
" id_url INTEGER REFERENCES URLS(id),\n",
" id_source SMALLINT REFERENCES SOURCE(id) ON UPDATE CASCADE ON DELETE RESTRICT, -- Source encodes search information\n",
" PRIMARY KEY(id_url, id_source)\n",
" );\n",
" CREATE INDEX idx_source ON urls_source(id_source);\n",
"\n",
" CREATE TABLE WEBSITE_TO_FILTER (\n",
" id SMALLSERIAL PRIMARY KEY,\n",
" url_host TEXT NOT NULL UNIQUE\n",
" );\n",
"\n",
" CREATE TABLE STATUS_PATTERN_MATCHING (\n",
" pattern TEXT PRIMARY KEY,\n",
" priority SMALLINT NOT NULL,\n",
" status URL_STATUS NOT NULL\n",
" );\n",
" \n",
" \n",
" CREATE TABLE URL_CONTENT (\n",
" id_url INTEGER REFERENCES URLS(id),\n",
" date_published TIMESTAMPTZ NOT NULL DEFAULT NOW(),\n",
" title TEXT,\n",
" description TEXT,\n",
" content TEXT,\n",
" tags TEXT[],\n",
" authors TEXT[],\n",
" image_urls TEXT[],\n",
" );\n",
" CREATE INDEX idx_tags ON URL_CONTENT USING GIN(tags);\n",
" CREATE INDEX idx_authors ON URL_CONTENT USING GIN(authors);\n",
" \"\"\")\n",
"\n",
" # Feeds\n",
" cur.execute( \"INSERT INTO FEED (rss_feed) VALUES ('https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC');\" )\n",
" # Websites of interest\n",
" cur.execute( \"INSERT INTO WEBSITE_OF_INTEREST (url_host) VALUES ('www.unicef.org');\" )\n",
" # Search keywords\n",
" cur.execute( \"INSERT INTO SEARCH (keyword_search) VALUES ('child abuse');\" )\n",
" # Domains to filter\n",
" cur.execute( \"INSERT INTO WEBSITE_TO_FILTER (url_host) VALUES ('yewtu.be');\" )\n",
" cur.execute( \"INSERT INTO WEBSITE_TO_FILTER (url_host) VALUES ('twitter.com');\" )\n",
" cur.execute( \"INSERT INTO WEBSITE_TO_FILTER (url_host) VALUES ('libreddit.de');\" )\n",
" cur.execute( \"INSERT INTO WEBSITE_TO_FILTER (url_host) VALUES ('youtube.com');\" )\n",
" cur.execute( \"INSERT INTO WEBSITE_TO_FILTER (url_host) VALUES ('tiktok.com');\" )\n",
" cur.execute( \"INSERT INTO WEBSITE_TO_FILTER (url_host) VALUES ('radio.foxnews.com');\" )\n",
" # Status update based on pattern matching (with priority to apply in order)\n",
" cur.execute( \"INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*missingkids.org/poster/.*', 50, 'valid');\" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if INSERT_SAMPLE_DATA:\n",
" # Connect to an existing database\n",
" with psycopg.connect(connection_info) as conn:\n",
" # Open a cursor to perform database operations\n",
" with conn.cursor() as cur:\n",
" # Autocommit at end of transaction (Atomic insert of URLs and sources)\n",
" with conn.transaction() as tx:\n",
" # Valid\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.foxnews.com/us/husband-ruby-franke-utah-mommy-blogger-convicted-child-abuse-regrets-wifes-fall-fame', 'valid')\")\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.bbc.com/news/articles/ckg843y8y7no', 'valid')\")\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.wilx.com/2025/03/05/lenawee-county-man-arrested-possessing-child-abuse-material/', 'valid')\")\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.dw.com/en/trauma-how-child-abuse-victims-deal-with-parenthood/a-71833895', 'valid')\")\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://nypost.com/2025/03/06/us-news/colorado-day-care-worker-hit-with-51-charges-of-child-abuse-harassment-for-slapping-toddler/', 'valid')\")\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.fox35orlando.com/news/tavares-police-florida-boys-10-9-abused-sheer-brutality', 'valid')\")\n",
" # Invalid\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.google.com', 'invalid')\")\n",
"\n",
" cur.execute(\"INSERT INTO SOURCE (source) values ('news.google.com')\")\n",
" cur.execute(\"INSERT INTO SOURCE (source) values ('qwant.com')\")\n",
"\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (1, 1)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (2, 1)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (3, 1)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (4, 1)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (5, 1)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (6, 1)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (7, 1)\")\n",
"\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (1, 2)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (2, 2)\")\n",
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (3, 2)\")\n",
"\n",
" for j in range(15):\n",
" import time\n",
" time.sleep(1)\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('www.super_{}.org', 'invalid')\".format(j))\n",
" \n",
" # Long URLs \n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('www.super_url.org/superextrakmsdimsdf/349mvlsdfsdfwr/akivsdmimnsdifmisdf_23dj9sdgj9sdgj8sdf8ds8f.html', 'invalid')\".format(j))\n",
" cur.execute(\"INSERT INTO URLS (url, status) values ('www.super_url.org/superextrakmsdimsdf/349mvlsdfsdfwr/akivsdmimnsdifmisdf.html', 'invalid')\".format(j))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pprint import pprint\n",
"\n",
"# Connect to an existing database\n",
"with psycopg.connect(connection_info) as conn:\n",
" # Open a cursor to perform database operations\n",
" with conn.cursor() as cur:\n",
" # Get tables\n",
" cur.execute(\"SELECT table_name FROM information_schema.tables WHERE table_schema='public';\")\n",
" tables = [t[0] for t in cur.fetchall()]\n",
"\n",
" for t in tables:\n",
" print(\"\\t\", t)\n",
" pprint( cur.execute(\"SELECT * FROM {} LIMIT 50;\".format(t)).fetchall() )"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

247
A_Development.ipynb Normal file
View File

@@ -0,0 +1,247 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"\n",
"#model = \"llama3.2:1b\"\n",
"client = ollama.Client(\n",
" host = 'https://ollamamodel.matitos.org',\n",
")\n",
"l = client.list()\n",
"list_models = [m.get(\"model\") for m in l.model_dump().get(\"models\")]\n",
"\n",
"list_models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for m in list_models:\n",
" context_key = [ k for k in client.show(m).model_dump().get(\"modelinfo\").keys() if \"context_length\" in k]\n",
" if (len(context_key) != 1):\n",
" print(\"Problem!!!\")\n",
" print(m, client.show(m).model_dump().get(\"modelinfo\").get(context_key[0]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = \"...\"\n",
"model = \"falcon3:1b\"\n",
"\n",
"msg_content = {\n",
" \"role\": \"user\", \n",
" \"content\": text,\n",
"}\n",
"response = client.chat(model=model, messages=[msg_content], stream=False)\n",
"print(response[\"message\"][\"content\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import cv2\n",
"import base64\n",
"import numpy as np\n",
"\n",
"endpoint = \"http://192.168.2.64:12343/image\"\n",
"\n",
"\n",
"\n",
"prompt = \"Majestic mountain landscape with snow-capped peaks, autumn foliage in vibrant reds and oranges, a turquoise river winding through a valley, crisp and serene atmosphere, ultra-realistic style.\"\n",
"prompt = \"A group of kids happily playing in a joy environment\"\n",
"#prompt = \"A bitcoin behaving like a king, surrounded by small alternative coins. Detailed, geometric style\"\n",
"\n",
"json = {\n",
" \"prompt\": prompt,\n",
" \"num_inference_steps\": 10,\n",
" \"size\": \"512x512\",\n",
" \"seed\": 123456,\n",
"}\n",
"\n",
"for inf_step in [1, 4, 10, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]:\n",
" json[\"num_inference_steps\"] = inf_step\n",
"\n",
" %time r = requests.post(endpoint, json=json)\n",
" print(\"Status code\", r.status_code)\n",
"\n",
" # Image\n",
" png_as_np = np.frombuffer(base64.b64decode(r.text), dtype=np.uint8)\n",
" image_bgr = cv2.imdecode(png_as_np, cv2.IMREAD_COLOR)\n",
"\n",
" cv2.imwrite(\"sample_img_{}.png\".format(json[\"num_inference_steps\"]), image_bgr)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install trafilatura trafilatura[all] cchardet\n",
"import courlan\n",
"url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n",
"courlan.check_url(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install trafilatura\n",
"import trafilatura\n",
"from pprint import pprint\n",
"\n",
"url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n",
"url = \"https://www.missingkids.org/poster/USVA/VA25-0820/1\"\n",
"\n",
"# Fetch\n",
"doc = trafilatura.fetch_url(url)\n",
"# Content & metadata\n",
"metadata = trafilatura.extract_metadata(doc)\n",
"content = trafilatura.extract(doc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pprint(metadata.as_dict())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install newspaper4k"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import newspaper\n",
"\n",
"url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n",
"url = \"https://www.missingkids.org/poster/USVA/VA25-0820/1\"\n",
"\n",
"article = newspaper.article(url)\n",
"\n",
"url_photo = set([i for i in article.images if \"api.missingkids.org/photographs\" in i])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install news-please\n",
"from newsplease import NewsPlease\n",
"\n",
"url = \"https://variety.com/2025/film/news/gene-hackman-death-suspicious-gas-leak-search-warrant-1236322610/\"\n",
"url = \"https://www.bbc.com/news/articles/cewkkkvkzn9o\"\n",
"url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n",
"article = NewsPlease.from_url(url)\n",
"print(article.title)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(article.maintext)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "matitos",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

27
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,27 @@
version: '3.9'
services:
matitos_db:
image: postgres:17
container_name: db_postgres
restart: unless-stopped
# Set shared memory limit when using docker-compose
shm_size: 128mb
environment:
POSTGRES_PASSWORD: ${DB_PASSWORD:-supermatitos}
POSTGRES_USER: ${DB_USERNAME:-supermatitos}
POSTGRES_DB: ${DB_DATABASE_NAME:-matitos}
POSTGRES_INITDB_ARGS: '--data-checksums'
#volumes:
# - ${PATH_BASE:-.}/postgres:/var/lib/postgresql/data
ports:
- 5432:5432
# django:
# Env: DB_HOST=matitos_db
# DJANGO_DB_NAME=${DB_DATABASE_NAME:-matitos}
# DJANGO_DB_USER=${DB_USERNAME:-supermatitos}
# DJANGO_DB_PASSWORD=${DB_PASSWORD:-supermatitos}
# DJANGO_DB_HOST=${DB_HOST:-localhost}
# DJANGO_DB_PORT=${DB_PORT:-5432}

View File

@@ -0,0 +1,36 @@
FROM continuumio/anaconda3
# Based on:
# https://www.reddit.com/r/StableDiffusion/comments/1gxbwp1/npu_accelerated_sd15_lcm_on_130_rk3588_sbc_30/
WORKDIR /home
RUN apt-get update && \
apt-get install git-lfs && \
# RKNN lib
git clone https://github.com/airockchip/rknn-toolkit2.git && \
cp rknn-toolkit2/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so /usr/lib && \
# Stable Diffusion
git clone https://huggingface.co/happyme531/Stable-Diffusion-1.5-LCM-ONNX-RKNN2 && \
# Dependencies
pip install diffusers pillow "numpy<2" rknn-toolkit-lite2 torch transformers
WORKDIR /home/Stable-Diffusion-1.5-LCM-ONNX-RKNN2
# FastAPI
RUN conda install -c conda-forge libgl
RUN pip install fastapi[standard] opencv-python
COPY ./app /home/app
# Replace writing image path
RUN sed -i '/return os.path.join(out_folder, out_fname + ".png")/c \ \ \ \ return "images/image.png"' ./run_rknn-lcm.py
RUN sed -i '/os.makedirs(out_folder, exist_ok=True)/c \ \ \ \ os.makedirs("images", exist_ok=True)' ./run_rknn-lcm.py
# Multi core NPU
RUN sed -i 's/RKNNLite.NPU_CORE_AUTO/RKNNLite.NPU_CORE_0_1_2/g' ./run_rknn-lcm.py
# CMD ["/bin/bash"]
CMD ["fastapi", "run", "/home/app/main.py", "--port", "80"]
# docker build -t image_generation .
# docker run --rm --privileged --device /dev/rknpu:/dev/rknpu --device /dev/dri:/dev/dri --security-opt systempaths=unconfined -p 12343:80 image_generation

View File

View File

@@ -0,0 +1,48 @@
from fastapi import FastAPI, Response
from fastapi.responses import FileResponse
from pydantic import BaseModel
import cv2
import subprocess
import base64
class Item(BaseModel):
prompt: str | None = None
size: str | None = "512x512"
num_inference_steps: int | None = 4
seed: int | None = 123456
def generate_image(item):
print(item)
# Parameters
seed = item.seed
num_inference_steps = item.num_inference_steps
size = item.size
prompt = item.prompt
command = 'python ./run_rknn-lcm.py --seed {} -i ./model -o ./images --num-inference-steps {} -s {} --prompt "{}"'.format(seed, num_inference_steps, size, prompt)
# Inference
output = subprocess.run(command, shell=True, capture_output=True)
print(output, "\n")
# Path to image
path_img = "./images/image.png" # glob.glob("./images/*")[0]
# Read
img = cv2.imread(path_img)
return img
app = FastAPI()
@app.get("/")
def read_root():
return {"Hello": "World"}
@app.post("/image")
def get_image(item: Item):
# Generate
image = generate_image(item)
# Encode
retval, buffer = cv2.imencode('.png', image)
png_as_text = base64.b64encode(buffer)
# Return
return Response(png_as_text)

22
web_app/manage.py Executable file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

16
web_app/mysite/asgi.py Normal file
View File

@@ -0,0 +1,16 @@
"""
ASGI config for mysite project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
application = get_asgi_application()

132
web_app/mysite/settings.py Normal file
View File

@@ -0,0 +1,132 @@
"""
Django settings for mysite project.
Generated by 'django-admin startproject' using Django 5.1.6.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/5.1/ref/settings/
"""
import os
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-0+jg0u+%s@sj759i7@jn*%-#jl)8&#=siclb5908pwe!7=*$qb'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'news.apps.NewsConfig',
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'mysite.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'mysite.wsgi.application'
# Database
# https://docs.djangoproject.com/en/5.1/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': os.environ.get("DJANGO_DB_NAME", "matitos"),
'USER': os.environ.get("DJANGO_DB_USER", "supermatitos"),
'PASSWORD': os.environ.get("DJANGO_DB_PASSWORD", "supermatitos"),
'HOST': os.environ.get("DJANGO_DB_HOST", "localhost"),
'PORT': os.environ.get("DJANGO_DB_PORT", "5432"),
#'OPTIONS': {
# 'options': '-c default_transaction_read_only=on'
#}
}
}
# Password validation
# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/5.1/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/5.1/howto/static-files/
STATIC_URL = 'static/'
# Default primary key field type
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'

26
web_app/mysite/urls.py Normal file
View File

@@ -0,0 +1,26 @@
"""
URL configuration for mysite project.
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/5.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import include, path
from django.views.generic.base import RedirectView
urlpatterns = [
path("", RedirectView.as_view(url='news/', permanent=False)),
path("news/", include("news.urls")),
path('admin/', admin.site.urls),
# path("facerecognition", include("facerecognition.urls")),
]

16
web_app/mysite/wsgi.py Normal file
View File

@@ -0,0 +1,16 @@
"""
WSGI config for mysite project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
application = get_wsgi_application()

0
web_app/news/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

9
web_app/news/admin.py Normal file
View File

@@ -0,0 +1,9 @@
from django.contrib import admin
# Register your models here.
from .models import Urls, UrlsSource, Source
admin.site.register(Urls)
admin.site.register(UrlsSource)
admin.site.register(Source)

6
web_app/news/apps.py Normal file
View File

@@ -0,0 +1,6 @@
from django.apps import AppConfig
class NewsConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'news'

View File

@@ -0,0 +1,38 @@
# Generated by Django 5.1.6 on 2025-02-20 15:36
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='SOURCE',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.TextField()),
],
),
migrations.CreateModel(
name='URL',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.TextField()),
('pub_date', models.DateTimeField(verbose_name='date published')),
],
),
migrations.CreateModel(
name='URL_SOURCE',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.source')),
('url', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.url')),
],
),
]

View File

@@ -0,0 +1,25 @@
# Generated by Django 5.1.6 on 2025-02-20 16:11
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('news', '0001_initial'),
]
operations = [
migrations.AlterModelTable(
name='source',
table='source',
),
migrations.AlterModelTable(
name='url',
table='urls',
),
migrations.AlterModelTable(
name='url_source',
table='urls_source',
),
]

View File

@@ -0,0 +1,33 @@
# Generated by Django 5.1.6 on 2025-02-20 16:18
import django.db.models.functions.datetime
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('news', '0002_alter_source_table_alter_url_table_and_more'),
]
operations = [
migrations.RemoveField(
model_name='url',
name='pub_date',
),
migrations.AddField(
model_name='url',
name='status',
field=models.CharField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw'),
),
migrations.AddField(
model_name='url',
name='ts_fetch',
field=models.DateTimeField(db_default=django.db.models.functions.datetime.Now(), verbose_name='Date fetched'),
),
migrations.AlterField(
model_name='url',
name='url',
field=models.TextField(verbose_name='URL'),
),
]

View File

@@ -0,0 +1,17 @@
# Generated by Django 5.1.6 on 2025-02-20 16:32
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('news', '0003_remove_url_pub_date_url_status_url_ts_fetch_and_more'),
]
operations = [
migrations.AlterUniqueTogether(
name='url_source',
unique_together={('url', 'source')},
),
]

View File

@@ -0,0 +1,59 @@
# Generated by Django 5.1.6 on 2025-02-20 16:53
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('news', '0004_alter_url_source_unique_together'),
]
operations = [
migrations.CreateModel(
name='Urls',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.TextField(unique=True)),
('ts_fetch', models.DateTimeField()),
('status', models.TextField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw')),
],
options={
'db_table': 'urls',
'managed': False,
},
),
migrations.RemoveField(
model_name='url_source',
name='url',
),
migrations.AlterUniqueTogether(
name='url_source',
unique_together=None,
),
migrations.RemoveField(
model_name='url_source',
name='source',
),
migrations.AlterModelOptions(
name='source',
options={'managed': False},
),
migrations.CreateModel(
name='UrlsSource',
fields=[
('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='news.urls')),
],
options={
'db_table': 'urls_source',
'managed': False,
},
),
migrations.DeleteModel(
name='URL',
),
migrations.DeleteModel(
name='URL_SOURCE',
),
]

View File

@@ -0,0 +1,17 @@
# Generated by Django 5.1.6 on 2025-03-06 09:36
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('news', '0005_urls_remove_url_source_url_and_more'),
]
operations = [
migrations.AlterModelOptions(
name='urls',
options={'managed': False, 'ordering': ['-ts_fetch']},
),
]

View File

46
web_app/news/models.py Normal file
View File

@@ -0,0 +1,46 @@
from django.db import models
# Create your models here.
class Urls(models.Model):
class STATUS_ENUM(models.TextChoices):
RAW = "raw"
ERROR = "error"
VALID = "valid"
UNKNOWN = "unknown"
INVALID = "invalid"
DUPLICATE = "duplicate"
url = models.TextField(unique=True)
ts_fetch = models.DateTimeField()
status = models.TextField(choices=STATUS_ENUM, default=STATUS_ENUM.RAW) # This field type is a guess.
def __str__(self):
return self.url
class Meta:
managed = False
db_table = 'urls' # db_table = '{}_urls'.format(project_name)
ordering = ["-ts_fetch"]
class Source(models.Model):
id = models.SmallAutoField(primary_key=True)
source = models.TextField(unique=True)
def __str__(self):
return self.source
class Meta:
managed = False
db_table = 'source'
class UrlsSource(models.Model):
id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True) # The composite primary key (id_url, id_source) found, that is not supported. The first column is selected.
id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
def __str__(self):
return "Source: {}, URL: {}".format(self.id_source, self.id_url)
class Meta:
managed = False
db_table = 'urls_source'
unique_together = (('id_url', 'id_source'),)

View File

@@ -0,0 +1,508 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>News</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script>
function getQueryString(pageNumber, itemsNumber, sources, statuses){
// Query parameters. If input is null, get most recent value
let queryParams = new URLSearchParams(window.location.search);
// page
if (pageNumber == null) pageNumber = queryParams.get("page") ?? 1;
queryParams.set("page", pageNumber);
// items
if (itemsNumber == null) itemsNumber = queryParams.get("items") ?? 15;
queryParams.set("items", itemsNumber);
// sources
if (sources == null) sources = queryParams.get("sources") ?? "all";
queryParams.set("sources", sources);
// status
if (statuses == null) statuses = queryParams.get("status") ?? "all";
queryParams.set("status", statuses);
// Encoding fix: %2C -> ,
let queryParamsString = queryParams.toString();
while (queryParamsString.includes("%2C")) {
queryParamsString = queryParamsString.replace("%2C", ",");
}
return queryParamsString;
}
function loadPage(pageNumber, itemsNumber, sources, statuses) {
$("#item-list").fadeTo(100, 0.5); // Smooth fade effect
$("#loading").show();
queryParamsString = getQueryString(pageNumber, itemsNumber, sources, statuses);
$.ajax({
url: "?" + queryParamsString,
type: "GET",
headers: { "X-Requested-With": "XMLHttpRequest" },
success: function (data) {
$("#item-list").fadeTo(0, 1).html(data.items_html); // Restore opacity smoothly
$("#loading").hide();
// Update URL without reloading
window.history.pushState({}, "", "?" + queryParamsString);
}
});
}
////////////////////////////////////////////////////////////////////////////
// Pagination
////////////////////////////////////////////////////////////////////////////
$(document).on("click", ".pagination a", function (event) {
event.preventDefault();
let page = $(this).attr("data-page");
loadPage(pageNumber=page, itemsNumber=null, sources=null, statuses=null);
});
$(document).ready(function () {
////////////////////////////////////////////////////////////////////////////
// Filter updates
////////////////////////////////////////////////////////////////////////////
const sourcesToggleAll = $("#toggle-all-sources");
const sourcesCheckboxes = $(".source-checkbox");
const statusesToggleAll = $("#toggle-all-status");
const statusCheckboxes = $(".status-checkbox");
function updateFilters() {
// Get selected sources
let selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
return $(this).val();
}).get().join(",");
// Get selected URL statuses
let selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
return $(this).val();
}).get().join(",");
// Get selected items per page
let selectedItems = $("input[name='items']:checked").val();
// Update pagination and reload data
loadPage(1, selectedItems, selectedSources, selectedStatuses);
}
////////////////////////////////////////////////////////////////////////////
// Change triggers
////////////////////////////////////////////////////////////////////////////
// Sources
sourcesToggleAll.on("change", function () {
sourcesCheckboxes.prop("checked", sourcesToggleAll.prop("checked"));
updateFilters();
});
sourcesCheckboxes.on("change", function () {
sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
updateFilters();
});
// Status
statusesToggleAll.on("change", function () {
statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
updateFilters();
});
statusCheckboxes.on("change", function () {
// If all checkboxes are checked, mark "Toggle All" as checked
statusesToggleAll.prop("checked", statusCheckboxes.length === statusCheckboxes.filter(":checked").length);
updateFilters();
});
// Items change trigger update
$(".items").on("change", updateFilters);
////////////////////////////////////////////////////////////////////////////
// Default values
////////////////////////////////////////////////////////////////////////////
// Sources
sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
sourcesToggleAll.prop("checked", true);
// Statuses
statusCheckboxes.each(function () { $(this).prop("checked", true); });
statusesToggleAll.prop("checked", true);
// Items
$("input[name='items'][value='" + 15 + "']").prop("checked", true);
});
////////////////////////////////////////////////////////////////////////////
// Theme logic
////////////////////////////////////////////////////////////////////////////
function setTheme(mode) {
document.documentElement.setAttribute("data-theme", mode);
document.documentElement.setAttribute("data-bs-theme", mode);
localStorage.setItem("theme", mode);
document.getElementById("theme-icon").innerHTML = mode === "dark" ? "🌞" : "🌙";
document.body.classList.toggle("dark-mode", mode === "dark");
}
function toggleTheme() {
let currentTheme = document.documentElement.getAttribute("data-theme");
setTheme(currentTheme === "dark" ? "light" : "dark");
}
document.addEventListener("DOMContentLoaded", function () {
let savedTheme = localStorage.getItem("theme") ||
(window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
setTheme(savedTheme);
});
////////////////////////////////////////////////////////////////////////////
</script>
<style>
/* Content Area */
#content {
margin-left: 170px; /* Match sidebar width */
min-width: calc(100vw - 170px); /* Ensure it doesn't shrink into the sidebar */
width: calc(100vw - 170px); /* Expands based on screen size */
padding: 20px;
overflow-x: auto; /* Prevent content from being squeezed */
transition: margin-left 0.3s ease;
}
/* Sidebar Styles */
#sidebar {
height: 100vh;
position: fixed;
top: 0;
left: 0;
width: 170px; /* Default width */
background-color: var(--bg-color);
box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
padding: 15px;
transition: width 0.3s ease;
}
#sidebar .nav-link {
color: var(--text-color);
}
#sidebar .nav-link:hover {
background-color: var(--pagination-hover-bg);
}
/* ============================= */
/* Responsive Enhancements */
/* ============================= */
@media (min-width: 1200px) {
.table {
width: 95%; /* Allows table to take more space */
margin: 0 auto; /* Centers the table */
}
}
@media (max-width: 768px) {
#sidebar {
width: 70px; /* Collapse sidebar to smaller width */
/*padding: 10px;*/
}
#content {
margin-left: 70px; /* Adjust margin to match collapsed sidebar */
min-width: calc(100vw - 70px); /* Prevent overlap */
/*padding: 10px;*/
}
/* Adjust table for small screens */
.table-responsive {
overflow-x: auto;
}
.table th,
.table td {
white-space: nowrap; /* Prevent text wrapping in cells */
}
.table a {
word-break: break-word; /* Ensure long URLs break properly */
}
}
/* ============================= */
/* Global Styles */
/* ============================= */
body {
background-color: var(--bg-color);
color: var(--text-color);
transition: background-color 0.3s, color 0.3s;
}
/* ============================= */
/* Light & Dark Mode Variables */
/* ============================= */
:root {
--bg-color: #ffffff;
--text-color: #212529;
--table-bg: #ffffff;
--table-text: #000000;
--table-border: #dee2e6;
--link-color: #007bff;
--pagination-bg: #ffffff;
--pagination-border: #dee2e6;
--pagination-hover-bg: #f8f9fa;
--pagination-active-bg: #007bff;
--pagination-active-text: #ffffff;
--button-bg: #f8f9fa;
--button-border: #ced4da;
--button-text: #212529;
}
[data-theme="dark"] {
--bg-color: #121212;
--text-color: #e0e0e0;
--table-bg: #1e1e1e;
--table-text: #ffffff;
--table-border: #2c2c2c;
--link-color: #9ec5fe;
--pagination-bg: #1e1e1e;
--pagination-border: #444;
--pagination-hover-bg: #333;
--pagination-active-bg: #007bff;
--pagination-active-text: #ffffff;
--button-bg: #1e1e1e;
--button-border: #444;
--button-text: #e0e0e0;
}
/* ============================= */
/* Table Styling */
/* ============================= */
.table-responsive {
width: 100%; /* Ensure it spans the full width of its container */
max-width: 100%;
overflow-x: auto;
}
.table {
background-color: var(--table-bg);
color: var(--table-text);
border: 1px solid var(--table-border);
transition: background-color 0.3s, color 0.3s;
width: 100%; /* Ensures it takes full width of its container */
table-layout: auto; /* Allows columns to adjust dynamically */
/*white-space: nowrap;*/ /* Prevents text wrapping in cells */
}
.table th,
.table td {
border-color: var(--table-border);
}
.table thead {
background-color: var(--pagination-active-bg);
color: var(--pagination-active-text);
}
[data-theme="dark"] .table {
background-color: var(--table-bg);
color: var(--table-text);
}
[data-theme="dark"] .table th,
[data-theme="dark"] .table td {
border-color: var(--table-border);
}
[data-theme="dark"] .table thead {
background-color: #333;
color: #fff;
}
th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
th:nth-child(2), td:nth-child(2) { width: 20%; } /* Fetch Date */
th:nth-child(3), td:nth-child(3) { width: 20%; } /* Sources */
th:nth-child(4), td:nth-child(4) { width: 5%; } /* Status */
th:nth-child(5), td:nth-child(5) { width: 5%; } /* Action */
/* ============================= */
/* Pagination Styling */
/* ============================= */
.pagination {
display: flex;
justify-content: center;
padding: 10px 0;
}
.pagination .page-link {
background-color: var(--pagination-bg);
border-color: var(--pagination-border);
color: var(--text-color);
padding: 10px 14px;
margin: 0 5px;
border-radius: 8px;
transition: background-color 0.3s, color 0.3s, transform 0.2s;
}
.pagination .page-link:hover {
background-color: var(--pagination-hover-bg);
transform: scale(1.05);
}
.pagination .active .page-link {
background-color: var(--pagination-active-bg);
color: var(--pagination-active-text);
border-color: var(--pagination-active-bg);
}
/* ============================= */
/* Theme Toggle Button */
/* ============================= */
.theme-toggle-btn {
background-color: var(--button-bg);
border: 1px solid var(--button-border);
color: var(--button-text);
border-radius: 50%;
width: 40px;
height: 40px;
font-size: 20px;
display: flex;
align-items: center;
justify-content: center;
transition: background-color 0.3s, color 0.3s, transform 0.2s;
cursor: pointer;
}
.theme-toggle-btn:hover {
background-color: var(--pagination-hover-bg);
transform: rotate(20deg);
}
.theme-toggle-btn:active {
transform: scale(0.95);
}
/* ============================= */
/* Loading Spinner Styling */
/* ============================= */
#loading {
position: fixed;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
z-index: 1050;
display: none;
}
.spinner-border {
width: 4rem;
height: 4rem;
}
</style>
</head>
<body>
<!-- Left Sidebar -->
<div id="sidebar" class="d-flex flex-column">
<ul class="nav flex-column">
<!-- Theme Toggle Button -->
<div class="nav-item">
<button onclick="toggleTheme()" class="theme-toggle-btn">
<span id="theme-icon">🌙</span>
</button>
</div>
<!-- Sources -->
<div class="nav-item mt-3">
<strong>Select sources</strong>
<form id="source-filter-form">
<!-- Toggle All Checkbox -->
<div class="form-check">
<input class="form-check-input" type="checkbox" id="toggle-all-sources">
<label class="form-check-label fw-bold" for="toggle-all-sources">
Toggle all
</label>
</div>
<!-- Individual Source Checkboxes -->
{% for source in sources %}
<div class="form-check">
<input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
<label class="form-check-label" for="source-{{ source.id }}">
{{ source.source }}
</label>
</div>
{% empty %}
<tr>
<td colspan="2" class="text-center">No sources available.</td>
</tr>
{% endfor %}
</form>
</div>
<!-- Status -->
<div class="nav-item mt-3">
<strong>Select status</strong>
<form id="status-filter-form">
<!-- Toggle All Checkbox -->
<div class="status-form-check">
<input class="form-check-input" type="checkbox" id="toggle-all-status">
<label class="form-check-label fw-bold" for="toggle-all-status">
Toggle all
</label>
</div>
<!-- Individual Status Checkboxes -->
{% for status in list_status %}
<div class="status-form-check">
<input class="form-check-input status-checkbox" type="checkbox" value="{{ status }}" id="status-{{ status }}">
<label class="form-check-label" for="status-{{ status }}">
{{ status }}
</label>
</div>
{% empty %}
<tr>
<td colspan="2" class="text-center">No sources available.</td>
</tr>
{% endfor %}
</form>
</div>
<!-- URLs per page -->
<div class="nav-item mt-3">
<strong>URLs per page</strong>
<div class="card-body">
<!-- Individual Status Checkboxes -->
{% for url_per_page in list_urls_per_page %}
<div class="items-form-check">
<input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
<label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
</div>
{% empty %}
<tr>
<td colspan="2" class="text-center">No options available.</td>
</tr>
{% endfor %}
</div>
</div>
</ul>
</div>
<!-- Main Content Area -->
<div id="content" class="main-content">
<div class="container mt-4">
<!-- Table -->
<div id="item-list">
{% include 'item_list_partial.html' %}
</div>
<!-- Loading... -->
<div id="loading" class="text-center mt-3" style="display:none;">
<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">Loading...</span>
</div>
</div>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,87 @@
{% load custom_filters %}
<div class="table-responsive">
<table class="table table-hover">
<thead>
<tr>
<th scope="col"><strong>URL</strong></th>
<th scope="col"><strong>Fetch date</strong></th>
<th scope="col"><strong>Sources</strong></th>
<th scope="col"><strong>Status</strong></th>
<th scope="col"><strong>Action</strong></th>
</tr>
</thead>
<tbody>
{% for item in page_obj %}
<tr>
<td><a href="https://{{ item.url }}/">{{ item.url }}</a></td>
<td>{{ item.ts_fetch }}</td>
<td>
{% with sources_map|dict_get:item.id as sources %}
{% if sources %}
{% for source in sources %}
<span class="badge bg-secondary">{{ source }}</span>
{% endfor %}
{% else %}
<span class="text-muted">No sources</span>
{% endif %}
{% endwith %}
</td>
<td>
{% if item.status == 'raw' %}
<span class="badge bg-secondary">{{ item.status|capfirst }}</span>
{% elif item.status == 'error' %}
<span class="badge bg-danger">{{ item.status|capfirst }}</span>
{% elif item.status == 'valid' %}
<span class="badge bg-success">{{ item.status|capfirst }}</span>
{% elif item.status == 'unknown' %}
<span class="badge bg-warning">{{ item.status|capfirst }}</span>
{% elif item.status == 'invalid' %}
<span class="badge bg-danger">{{ item.status|capfirst }}</span>
{% elif item.status == 'duplicate' %}
<span class="badge bg-info">{{ item.status|capfirst }}</span>
{% else %}
<span class="badge bg-light">Unknown</span>
{% endif %}
</td>
<td>
<a href="url/{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">Details</a>
</td>
</tr>
{% empty %}
<tr>
<td colspan="4" class="text-center">No items available.</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="d-flex justify-content-center mt-3">
<nav>
<ul class="pagination">
{% if page_obj.has_previous %}
<li class="page-item">
<a class="page-link" href="#" data-page="1">First</a>
</li>
<li class="page-item">
<a class="page-link" href="#" data-page="{{ page_obj.previous_page_number }}">Previous</a>
</li>
{% endif %}
<li class="page-item active">
<span class="page-link">Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}</span>
</li>
{% if page_obj.has_next %}
<li class="page-item">
<a class="page-link" href="#" data-page="{{ page_obj.next_page_number }}">Next</a>
</li>
<li class="page-item">
<a class="page-link" href="#" data-page="{{ page_obj.paginator.num_pages }}">Last</a>
</li>
{% endif %}
</ul>
</nav>
</div>

View File

@@ -0,0 +1,188 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}News{% endblock %}</title>
<!-- Bootstrap CSS -->
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<!-- Add jQuery from CDN (before other scripts) -->
<script src="https://code.jquery.com/jquery-3.6.4.min.js"></script>
<!-- Markdown -->
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<!-- Custom Styles -->
<style>
body {
background-color: #f4f4f4;
}
.navbar-dark .navbar-nav .nav-link {
color: rgba(255,255,255,0.75);
}
.chat-box {
background-color: #fff;
border: 1px solid #ddd;
padding: 15px;
border-radius: 8px;
overflow-y: auto; /* Enable vertical scrolling */
max-width: 100%;
min-height: 150px;
max-height: 450px;
white-space: normal;
word-wrap: break-word;
word-break: break-word;
}
</style>
</head>
<script>
function fetchDetails(urlId, url) {
// Show the loading spinner
document.getElementById("loading-spinner").style.display = "block";
// Get the input value
let inputText = document.getElementById(`custom-input-${urlId}`).value;
// Get the input model
let selectedModel = document.getElementById(`options-${urlId}`).value;
// Check if a model is selected
if (!selectedModel) {
alert("Please select a model before fetching details.");
return;
}
// Fetch URL
let fetchUrl = `/news/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
let resultContainer = $("#chat-output");
resultContainer.html(""); // Clear previous content before fetching
let fetchButton = $("button[onclick^='fetchDetails']"); // Select the button
fetchButton.prop("disabled", true); // Disable button
fetch(fetchUrl)
.then(response => {
if (!response.ok) {
throw new Error("Error on network response");
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
//////////////////////////////////////
let accumulatedText = ""; // Store streamed text before rendering Markdown
// Create a temporary container for streaming response
let messageContainer = $('<div class="chat-message"></div>');
//let messageContainer = $('');
resultContainer.append(messageContainer);
//////////////////////////////////////
function read() {
return reader.read().then(({ done, value }) => {
if (done) {
//////////////////////////////////////
messageContainer.html(marked.parse(accumulatedText));
//////////////////////////////////////
fetchButton.prop("disabled", false); // Re-enable button when done
return;
}
//////////////////////////////////////
// Decode the streamed chunk
let chunk = decoder.decode(value);
// Append to the accumulated text
accumulatedText += chunk;
// Render Markdown progressively (but safely)
messageContainer.html(marked.parse(accumulatedText));
//////////////////////////////////////
//////////////////////////////////////
// ORIGINAL:
//let text = decoder.decode(value).replace(/\n/g, "<br>");
//resultContainer.append(text); // Append streamed text
//////////////////////////////////////
resultContainer.scrollTop(resultContainer[0].scrollHeight); // Auto-scroll to bottom
return read();
});
}
return read();
})
.catch(error => {
resultContainer.html(`<p class="text-danger">Error fetching details: ${error.message}</p>`);
fetchButton.prop("disabled", false); // Re-enable button on error
})
.finally(() => {
// Hide the loading spinner after request is complete
document.getElementById("loading-spinner").style.display = "none";
});
;
}
</script>
<body>
<!-- Main Content -->
<div class="container mt-4">
<h2>URL Details</h2>
<table class="table table-bordered">
<tr>
<th>URL</th>
<td>{{ url_item.url }}</td>
</tr>
<tr>
<th>Fetch Date</th>
<td>{{ url_item.ts_fetch }}</td>
</tr>
<tr>
<th>Sources</th>
<td>{{ sources|join:", " }}</td>
</tr>
<tr>
<th>Status</th>
<td>{{ url_item.status }}</td>
</tr>
</table>
<!-- Independent form for optional values -->
<form onsubmit="fetchDetailsWithSelection(event, {{ url_item.id }}, '{{ url_item.url }}')">
<label for="options-{{ url_item.id }}">Model:</label>
<select id="options-{{ url_item.id }}" class="form-control mb-2">
<!-- <option value="">-- Select an option --</option> -->
{% for model in models %}
<option value="{{ model }}">{{ model }}</option>
{% endfor %}
</select>
</form>
<!-- Input field with a default value -->
<label for="custom-input-{{ url_item.id }}">Prompt:</label>
<textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="3">{{ prompt }} {{ url_item.url }}</textarea>
<!-- Fetch details button -->
<button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
Fetch Details
</button>
<!-- Chatbot-style response box -->
<div class="chat-box mt-3 p-3 border rounded">
<div id="chat-output"></div>
</div>
<!-- Loading Spinner (Hidden by Default) -->
<div id="loading-spinner" class="spinner-border text-primary mt-3" role="status" style="display: none;">
<span class="visually-hidden">Loading...</span>
</div>
</div>
<!-- Bootstrap JS -->
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
{% block extra_js %}{% endblock %}
</body>
</html>

View File

View File

@@ -0,0 +1,8 @@
from django import template
register = template.Library()
@register.filter
def dict_get(dictionary, key):
"""Custom filter to get a value from a dictionary in Django templates."""
return dictionary.get(key, [])

3
web_app/news/tests.py Normal file
View File

@@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

8
web_app/news/urls.py Normal file
View File

@@ -0,0 +1,8 @@
from django.urls import path
from . import views
urlpatterns = [
path("", views.news, name="home"),
path('url/<int:id>/', views.url_detail_view, name='url_detail'),
path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),]

97
web_app/news/views.py Normal file
View File

@@ -0,0 +1,97 @@
from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
from django.shortcuts import render, get_object_or_404
from django.core.paginator import Paginator
import requests
from django.http import StreamingHttpResponse
import json
import time
import ollama
from .models import Urls, Source, UrlsSource
# Create your views here.
def index(request):
return HttpResponse("Hello, world. You're at the news index.")
def news(request):
# URLs
urls = Urls.objects.all()
# Sources
sources = Source.objects.all()
# Parameters
page_number = request.GET.get("page", 1)
num_items = request.GET.get("items", 15)
source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
status_filters = request.GET.get("status", None)
# Filters
if (status_filters) and (status_filters != "all"):
urls = urls.filter(status__in=status_filters.split(","))
if (source_ids) and (source_ids != "all"):
# TODO: Distinct needed?
urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
# Pagination
paginator = Paginator(urls, num_items)
page_obj = paginator.get_page(page_number)
# Map URL IDs to their sources, only for subset of URLs (page of interest)
sources_map = {
url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
for url in page_obj.object_list
}
context = {
"page_obj": page_obj,
"sources": sources,
"sources_map": sources_map,
"list_status": Urls.STATUS_ENUM.values,
"list_urls_per_page": [15, 50, 100],
}
# If request is AJAX, return JSON response
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
return render(request, "item_list.html", context)
def url_detail_view(request, id):
url_item = get_object_or_404(Urls, id=id)
url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
# LLM models available
client = ollama.Client(host = 'https://ollamamodel.matitos.org')
models = [m.model for m in client.list().models]
context = {
'url_item': url_item,
'sources': url_sources,
'models': models,
"prompt": "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
#"prompt": "Image you are a journalist, TLDR in a paragraph:",
#"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
}
return render(request, 'url_detail.html', context)
def fetch_details(request, id):
url_item = get_object_or_404(Urls, id=id)
url_param = request.GET.get("url", "") # Get URL
model = request.GET.get("model", "") # Get LLM model
text = request.GET.get("text", "") # Get LLM prompt
# LLM
client = ollama.Client(host = 'https://ollamamodel.matitos.org')
def stream_response():
msg_content = {
"role": "user",
"content": text,
}
response = client.chat(model=model, messages=[msg_content], stream=True)
for chunk in response:
yield chunk["message"]["content"] # Stream each chunk of text
return StreamingHttpResponse(stream_response(), content_type="text/plain")