Url content

This commit is contained in:
Luciano Gervasoni
2025-03-07 00:34:46 +01:00
parent 4453a51d6d
commit 54ebd58070
66 changed files with 2072 additions and 21 deletions

22
app_web/manage.py Executable file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()

View File

16
app_web/mysite/asgi.py Normal file
View File

@@ -0,0 +1,16 @@
"""
ASGI config for mysite project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
application = get_asgi_application()

132
app_web/mysite/settings.py Normal file
View File

@@ -0,0 +1,132 @@
"""
Django settings for mysite project.
Generated by 'django-admin startproject' using Django 5.1.6.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/5.1/ref/settings/
"""
import os
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-0+jg0u+%s@sj759i7@jn*%-#jl)8&#=siclb5908pwe!7=*$qb'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'news.apps.NewsConfig',
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'mysite.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'mysite.wsgi.application'
# Database
# https://docs.djangoproject.com/en/5.1/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': os.environ.get("DJANGO_DB_NAME", "matitos"),
'USER': os.environ.get("DJANGO_DB_USER", "supermatitos"),
'PASSWORD': os.environ.get("DJANGO_DB_PASSWORD", "supermatitos"),
'HOST': os.environ.get("DJANGO_DB_HOST", "localhost"),
'PORT': os.environ.get("DJANGO_DB_PORT", "5432"),
#'OPTIONS': {
# 'options': '-c default_transaction_read_only=on'
#}
}
}
# Password validation
# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/5.1/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/5.1/howto/static-files/
STATIC_URL = 'static/'
# Default primary key field type
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'

26
app_web/mysite/urls.py Normal file
View File

@@ -0,0 +1,26 @@
"""
URL configuration for mysite project.
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/5.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import include, path
from django.views.generic.base import RedirectView
urlpatterns = [
path("", RedirectView.as_view(url='news/', permanent=False)),
path("news/", include("news.urls")),
path('admin/', admin.site.urls),
# path("facerecognition", include("facerecognition.urls")),
]

16
app_web/mysite/wsgi.py Normal file
View File

@@ -0,0 +1,16 @@
"""
WSGI config for mysite project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
application = get_wsgi_application()

0
app_web/news/__init__.py Normal file
View File

9
app_web/news/admin.py Normal file
View File

@@ -0,0 +1,9 @@
from django.contrib import admin
# Register your models here.
from .models import Urls, UrlsSource, Source
admin.site.register(Urls)
admin.site.register(UrlsSource)
admin.site.register(Source)

6
app_web/news/apps.py Normal file
View File

@@ -0,0 +1,6 @@
from django.apps import AppConfig
class NewsConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'news'

View File

@@ -0,0 +1,38 @@
# Generated by Django 5.1.6 on 2025-02-20 15:36
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='SOURCE',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.TextField()),
],
),
migrations.CreateModel(
name='URL',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.TextField()),
('pub_date', models.DateTimeField(verbose_name='date published')),
],
),
migrations.CreateModel(
name='URL_SOURCE',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.source')),
('url', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.url')),
],
),
]

View File

@@ -0,0 +1,25 @@
# Generated by Django 5.1.6 on 2025-02-20 16:11
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('news', '0001_initial'),
]
operations = [
migrations.AlterModelTable(
name='source',
table='source',
),
migrations.AlterModelTable(
name='url',
table='urls',
),
migrations.AlterModelTable(
name='url_source',
table='urls_source',
),
]

View File

@@ -0,0 +1,33 @@
# Generated by Django 5.1.6 on 2025-02-20 16:18
import django.db.models.functions.datetime
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('news', '0002_alter_source_table_alter_url_table_and_more'),
]
operations = [
migrations.RemoveField(
model_name='url',
name='pub_date',
),
migrations.AddField(
model_name='url',
name='status',
field=models.CharField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw'),
),
migrations.AddField(
model_name='url',
name='ts_fetch',
field=models.DateTimeField(db_default=django.db.models.functions.datetime.Now(), verbose_name='Date fetched'),
),
migrations.AlterField(
model_name='url',
name='url',
field=models.TextField(verbose_name='URL'),
),
]

View File

@@ -0,0 +1,17 @@
# Generated by Django 5.1.6 on 2025-02-20 16:32
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('news', '0003_remove_url_pub_date_url_status_url_ts_fetch_and_more'),
]
operations = [
migrations.AlterUniqueTogether(
name='url_source',
unique_together={('url', 'source')},
),
]

View File

@@ -0,0 +1,59 @@
# Generated by Django 5.1.6 on 2025-02-20 16:53
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('news', '0004_alter_url_source_unique_together'),
]
operations = [
migrations.CreateModel(
name='Urls',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.TextField(unique=True)),
('ts_fetch', models.DateTimeField()),
('status', models.TextField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw')),
],
options={
'db_table': 'urls',
'managed': False,
},
),
migrations.RemoveField(
model_name='url_source',
name='url',
),
migrations.AlterUniqueTogether(
name='url_source',
unique_together=None,
),
migrations.RemoveField(
model_name='url_source',
name='source',
),
migrations.AlterModelOptions(
name='source',
options={'managed': False},
),
migrations.CreateModel(
name='UrlsSource',
fields=[
('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='news.urls')),
],
options={
'db_table': 'urls_source',
'managed': False,
},
),
migrations.DeleteModel(
name='URL',
),
migrations.DeleteModel(
name='URL_SOURCE',
),
]

View File

@@ -0,0 +1,17 @@
# Generated by Django 5.1.6 on 2025-03-06 09:36
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('news', '0005_urls_remove_url_source_url_and_more'),
]
operations = [
migrations.AlterModelOptions(
name='urls',
options={'managed': False, 'ordering': ['-ts_fetch']},
),
]

View File

61
app_web/news/models.py Normal file
View File

@@ -0,0 +1,61 @@
from django.db import models
from django.contrib.postgres.fields import ArrayField
# Create your models here.
class Urls(models.Model):
class STATUS_ENUM(models.TextChoices):
RAW = "raw"
ERROR = "error"
VALID = "valid"
UNKNOWN = "unknown"
INVALID = "invalid"
DUPLICATE = "duplicate"
url = models.TextField(unique=True)
ts_fetch = models.DateTimeField()
status = models.TextField(choices=STATUS_ENUM, default=STATUS_ENUM.RAW) # This field type is a guess.
def __str__(self):
return self.url
class Meta:
managed = False
db_table = 'urls' # db_table = '{}_urls'.format(project_name)
ordering = ["-ts_fetch"]
class Source(models.Model):
id = models.SmallAutoField(primary_key=True)
source = models.TextField(unique=True)
def __str__(self):
return self.source
class Meta:
managed = False
db_table = 'source'
class UrlsSource(models.Model):
id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True) # The composite primary key (id_url, id_source) found, that is not supported. The first column is selected.
id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
def __str__(self):
return "Source: {}, URL: {}".format(self.id_source, self.id_url)
class Meta:
managed = False
db_table = 'urls_source'
unique_together = (('id_url', 'id_source'),)
class UrlContent(models.Model):
id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)
date_published = models.DateTimeField(blank=True, null=True)
title = models.TextField(blank=True, null=True)
description = models.TextField(blank=True, null=True)
content = models.TextField(blank=True, null=True)
tags = ArrayField(models.TextField(blank=True, null=True))
authors = ArrayField(models.TextField(blank=True, null=True))
image_urls = ArrayField(models.TextField(blank=True, null=True))
class Meta:
managed = False
db_table = 'url_content'

View File

@@ -0,0 +1,508 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>News</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script>
function getQueryString(pageNumber, itemsNumber, sources, statuses){
// Query parameters. If input is null, get most recent value
let queryParams = new URLSearchParams(window.location.search);
// page
if (pageNumber == null) pageNumber = queryParams.get("page") ?? 1;
queryParams.set("page", pageNumber);
// items
if (itemsNumber == null) itemsNumber = queryParams.get("items") ?? 15;
queryParams.set("items", itemsNumber);
// sources
if (sources == null) sources = queryParams.get("sources") ?? "all";
queryParams.set("sources", sources);
// status
if (statuses == null) statuses = queryParams.get("status") ?? "all";
queryParams.set("status", statuses);
// Encoding fix: %2C -> ,
let queryParamsString = queryParams.toString();
while (queryParamsString.includes("%2C")) {
queryParamsString = queryParamsString.replace("%2C", ",");
}
return queryParamsString;
}
function loadPage(pageNumber, itemsNumber, sources, statuses) {
$("#item-list").fadeTo(100, 0.5); // Smooth fade effect
$("#loading").show();
queryParamsString = getQueryString(pageNumber, itemsNumber, sources, statuses);
$.ajax({
url: "?" + queryParamsString,
type: "GET",
headers: { "X-Requested-With": "XMLHttpRequest" },
success: function (data) {
$("#item-list").fadeTo(0, 1).html(data.items_html); // Restore opacity smoothly
$("#loading").hide();
// Update URL without reloading
window.history.pushState({}, "", "?" + queryParamsString);
}
});
}
////////////////////////////////////////////////////////////////////////////
// Pagination
////////////////////////////////////////////////////////////////////////////
$(document).on("click", ".pagination a", function (event) {
event.preventDefault();
let page = $(this).attr("data-page");
loadPage(pageNumber=page, itemsNumber=null, sources=null, statuses=null);
});
$(document).ready(function () {
////////////////////////////////////////////////////////////////////////////
// Filter updates
////////////////////////////////////////////////////////////////////////////
const sourcesToggleAll = $("#toggle-all-sources");
const sourcesCheckboxes = $(".source-checkbox");
const statusesToggleAll = $("#toggle-all-status");
const statusCheckboxes = $(".status-checkbox");
function updateFilters() {
// Get selected sources
let selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
return $(this).val();
}).get().join(",");
// Get selected URL statuses
let selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
return $(this).val();
}).get().join(",");
// Get selected items per page
let selectedItems = $("input[name='items']:checked").val();
// Update pagination and reload data
loadPage(1, selectedItems, selectedSources, selectedStatuses);
}
////////////////////////////////////////////////////////////////////////////
// Change triggers
////////////////////////////////////////////////////////////////////////////
// Sources
sourcesToggleAll.on("change", function () {
sourcesCheckboxes.prop("checked", sourcesToggleAll.prop("checked"));
updateFilters();
});
sourcesCheckboxes.on("change", function () {
sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
updateFilters();
});
// Status
statusesToggleAll.on("change", function () {
statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
updateFilters();
});
statusCheckboxes.on("change", function () {
// If all checkboxes are checked, mark "Toggle All" as checked
statusesToggleAll.prop("checked", statusCheckboxes.length === statusCheckboxes.filter(":checked").length);
updateFilters();
});
// Items change trigger update
$(".items").on("change", updateFilters);
////////////////////////////////////////////////////////////////////////////
// Default values
////////////////////////////////////////////////////////////////////////////
// Sources
sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
sourcesToggleAll.prop("checked", true);
// Statuses
statusCheckboxes.each(function () { $(this).prop("checked", true); });
statusesToggleAll.prop("checked", true);
// Items
$("input[name='items'][value='" + 15 + "']").prop("checked", true);
});
////////////////////////////////////////////////////////////////////////////
// Theme logic
////////////////////////////////////////////////////////////////////////////
function setTheme(mode) {
document.documentElement.setAttribute("data-theme", mode);
document.documentElement.setAttribute("data-bs-theme", mode);
localStorage.setItem("theme", mode);
document.getElementById("theme-icon").innerHTML = mode === "dark" ? "🌞" : "🌙";
document.body.classList.toggle("dark-mode", mode === "dark");
}
function toggleTheme() {
let currentTheme = document.documentElement.getAttribute("data-theme");
setTheme(currentTheme === "dark" ? "light" : "dark");
}
document.addEventListener("DOMContentLoaded", function () {
let savedTheme = localStorage.getItem("theme") ||
(window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
setTheme(savedTheme);
});
////////////////////////////////////////////////////////////////////////////
</script>
<style>
/* Content Area */
#content {
margin-left: 170px; /* Match sidebar width */
min-width: calc(100vw - 170px); /* Ensure it doesn't shrink into the sidebar */
width: calc(100vw - 170px); /* Expands based on screen size */
padding: 20px;
overflow-x: auto; /* Prevent content from being squeezed */
transition: margin-left 0.3s ease;
}
/* Sidebar Styles */
#sidebar {
height: 100vh;
position: fixed;
top: 0;
left: 0;
width: 170px; /* Default width */
background-color: var(--bg-color);
box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
padding: 15px;
transition: width 0.3s ease;
}
#sidebar .nav-link {
color: var(--text-color);
}
#sidebar .nav-link:hover {
background-color: var(--pagination-hover-bg);
}
/* ============================= */
/* Responsive Enhancements */
/* ============================= */
@media (min-width: 1200px) {
.table {
width: 95%; /* Allows table to take more space */
margin: 0 auto; /* Centers the table */
}
}
@media (max-width: 768px) {
#sidebar {
width: 70px; /* Collapse sidebar to smaller width */
/*padding: 10px;*/
}
#content {
margin-left: 70px; /* Adjust margin to match collapsed sidebar */
min-width: calc(100vw - 70px); /* Prevent overlap */
/*padding: 10px;*/
}
/* Adjust table for small screens */
.table-responsive {
overflow-x: auto;
}
.table th,
.table td {
white-space: nowrap; /* Prevent text wrapping in cells */
}
.table a {
word-break: break-word; /* Ensure long URLs break properly */
}
}
/* ============================= */
/* Global Styles */
/* ============================= */
body {
background-color: var(--bg-color);
color: var(--text-color);
transition: background-color 0.3s, color 0.3s;
}
/* ============================= */
/* Light & Dark Mode Variables */
/* ============================= */
:root {
--bg-color: #ffffff;
--text-color: #212529;
--table-bg: #ffffff;
--table-text: #000000;
--table-border: #dee2e6;
--link-color: #007bff;
--pagination-bg: #ffffff;
--pagination-border: #dee2e6;
--pagination-hover-bg: #f8f9fa;
--pagination-active-bg: #007bff;
--pagination-active-text: #ffffff;
--button-bg: #f8f9fa;
--button-border: #ced4da;
--button-text: #212529;
}
[data-theme="dark"] {
--bg-color: #121212;
--text-color: #e0e0e0;
--table-bg: #1e1e1e;
--table-text: #ffffff;
--table-border: #2c2c2c;
--link-color: #9ec5fe;
--pagination-bg: #1e1e1e;
--pagination-border: #444;
--pagination-hover-bg: #333;
--pagination-active-bg: #007bff;
--pagination-active-text: #ffffff;
--button-bg: #1e1e1e;
--button-border: #444;
--button-text: #e0e0e0;
}
/* ============================= */
/* Table Styling */
/* ============================= */
.table-responsive {
width: 100%; /* Ensure it spans the full width of its container */
max-width: 100%;
overflow-x: auto;
}
.table {
background-color: var(--table-bg);
color: var(--table-text);
border: 1px solid var(--table-border);
transition: background-color 0.3s, color 0.3s;
width: 100%; /* Ensures it takes full width of its container */
table-layout: auto; /* Allows columns to adjust dynamically */
/*white-space: nowrap;*/ /* Prevents text wrapping in cells */
}
.table th,
.table td {
border-color: var(--table-border);
}
.table thead {
background-color: var(--pagination-active-bg);
color: var(--pagination-active-text);
}
[data-theme="dark"] .table {
background-color: var(--table-bg);
color: var(--table-text);
}
[data-theme="dark"] .table th,
[data-theme="dark"] .table td {
border-color: var(--table-border);
}
[data-theme="dark"] .table thead {
background-color: #333;
color: #fff;
}
th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
th:nth-child(2), td:nth-child(2) { width: 20%; } /* Fetch Date */
th:nth-child(3), td:nth-child(3) { width: 20%; } /* Sources */
th:nth-child(4), td:nth-child(4) { width: 5%; } /* Status */
th:nth-child(5), td:nth-child(5) { width: 5%; } /* Action */
/* ============================= */
/* Pagination Styling */
/* ============================= */
.pagination {
display: flex;
justify-content: center;
padding: 10px 0;
}
.pagination .page-link {
background-color: var(--pagination-bg);
border-color: var(--pagination-border);
color: var(--text-color);
padding: 10px 14px;
margin: 0 5px;
border-radius: 8px;
transition: background-color 0.3s, color 0.3s, transform 0.2s;
}
.pagination .page-link:hover {
background-color: var(--pagination-hover-bg);
transform: scale(1.05);
}
.pagination .active .page-link {
background-color: var(--pagination-active-bg);
color: var(--pagination-active-text);
border-color: var(--pagination-active-bg);
}
/* ============================= */
/* Theme Toggle Button */
/* ============================= */
.theme-toggle-btn {
background-color: var(--button-bg);
border: 1px solid var(--button-border);
color: var(--button-text);
border-radius: 50%;
width: 40px;
height: 40px;
font-size: 20px;
display: flex;
align-items: center;
justify-content: center;
transition: background-color 0.3s, color 0.3s, transform 0.2s;
cursor: pointer;
}
.theme-toggle-btn:hover {
background-color: var(--pagination-hover-bg);
transform: rotate(20deg);
}
.theme-toggle-btn:active {
transform: scale(0.95);
}
/* ============================= */
/* Loading Spinner Styling */
/* ============================= */
#loading {
position: fixed;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
z-index: 1050;
display: none;
}
.spinner-border {
width: 4rem;
height: 4rem;
}
</style>
</head>
<body>
<!-- Left Sidebar -->
<div id="sidebar" class="d-flex flex-column">
<ul class="nav flex-column">
<!-- Theme Toggle Button -->
<div class="nav-item">
<button onclick="toggleTheme()" class="theme-toggle-btn">
<span id="theme-icon">🌙</span>
</button>
</div>
<!-- Sources -->
<div class="nav-item mt-3">
<strong>Select sources</strong>
<form id="source-filter-form">
<!-- Toggle All Checkbox -->
<div class="form-check">
<input class="form-check-input" type="checkbox" id="toggle-all-sources">
<label class="form-check-label fw-bold" for="toggle-all-sources">
Toggle all
</label>
</div>
<!-- Individual Source Checkboxes -->
{% for source in sources %}
<div class="form-check">
<input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
<label class="form-check-label" for="source-{{ source.id }}">
{{ source.source }}
</label>
</div>
{% empty %}
<tr>
<td colspan="2" class="text-center">No sources available.</td>
</tr>
{% endfor %}
</form>
</div>
<!-- Status -->
<div class="nav-item mt-3">
<strong>Select status</strong>
<form id="status-filter-form">
<!-- Toggle All Checkbox -->
<div class="status-form-check">
<input class="form-check-input" type="checkbox" id="toggle-all-status">
<label class="form-check-label fw-bold" for="toggle-all-status">
Toggle all
</label>
</div>
<!-- Individual Status Checkboxes -->
{% for status in list_status %}
<div class="status-form-check">
<input class="form-check-input status-checkbox" type="checkbox" value="{{ status }}" id="status-{{ status }}">
<label class="form-check-label" for="status-{{ status }}">
{{ status }}
</label>
</div>
{% empty %}
<tr>
<td colspan="2" class="text-center">No sources available.</td>
</tr>
{% endfor %}
</form>
</div>
<!-- URLs per page -->
<div class="nav-item mt-3">
<strong>URLs per page</strong>
<div class="card-body">
<!-- Individual Status Checkboxes -->
{% for url_per_page in list_urls_per_page %}
<div class="items-form-check">
<input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
<label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
</div>
{% empty %}
<tr>
<td colspan="2" class="text-center">No options available.</td>
</tr>
{% endfor %}
</div>
</div>
</ul>
</div>
<!-- Main Content Area -->
<div id="content" class="main-content">
<div class="container mt-4">
<!-- Table -->
<div id="item-list">
{% include 'item_list_partial.html' %}
</div>
<!-- Loading... -->
<div id="loading" class="text-center mt-3" style="display:none;">
<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">Loading...</span>
</div>
</div>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,87 @@
{% load custom_filters %}
<div class="table-responsive">
<table class="table table-hover">
<thead>
<tr>
<th scope="col"><strong>URL</strong></th>
<th scope="col"><strong>Fetch date</strong></th>
<th scope="col"><strong>Sources</strong></th>
<th scope="col"><strong>Status</strong></th>
<th scope="col"><strong>Action</strong></th>
</tr>
</thead>
<tbody>
{% for item in page_obj %}
<tr>
<td><a href="{{ item.url }}/" target="_blank">{{ item.url }}</a></td>
<td>{{ item.ts_fetch }}</td>
<td>
{% with sources_map|dict_get:item.id as sources %}
{% if sources %}
{% for source in sources %}
<span class="badge bg-secondary">{{ source }}</span>
{% endfor %}
{% else %}
<span class="text-muted">No sources</span>
{% endif %}
{% endwith %}
</td>
<td>
{% if item.status == 'raw' %}
<span class="badge bg-secondary">{{ item.status|capfirst }}</span>
{% elif item.status == 'error' %}
<span class="badge bg-danger">{{ item.status|capfirst }}</span>
{% elif item.status == 'valid' %}
<span class="badge bg-success">{{ item.status|capfirst }}</span>
{% elif item.status == 'unknown' %}
<span class="badge bg-warning">{{ item.status|capfirst }}</span>
{% elif item.status == 'invalid' %}
<span class="badge bg-danger">{{ item.status|capfirst }}</span>
{% elif item.status == 'duplicate' %}
<span class="badge bg-info">{{ item.status|capfirst }}</span>
{% else %}
<span class="badge bg-light">Unknown</span>
{% endif %}
</td>
<td>
<a href="url/{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">Details</a>
</td>
</tr>
{% empty %}
<tr>
<td colspan="4" class="text-center">No items available.</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="d-flex justify-content-center mt-3">
<nav>
<ul class="pagination">
{% if page_obj.has_previous %}
<li class="page-item">
<a class="page-link" href="#" data-page="1">First</a>
</li>
<li class="page-item">
<a class="page-link" href="#" data-page="{{ page_obj.previous_page_number }}">Previous</a>
</li>
{% endif %}
<li class="page-item active">
<span class="page-link">Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}</span>
</li>
{% if page_obj.has_next %}
<li class="page-item">
<a class="page-link" href="#" data-page="{{ page_obj.next_page_number }}">Next</a>
</li>
<li class="page-item">
<a class="page-link" href="#" data-page="{{ page_obj.paginator.num_pages }}">Last</a>
</li>
{% endif %}
</ul>
</nav>
</div>

View File

@@ -0,0 +1,211 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}News{% endblock %}</title>
<!-- Bootstrap CSS -->
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<!-- Add jQuery from CDN (before other scripts) -->
<script src="https://code.jquery.com/jquery-3.6.4.min.js"></script>
<!-- Markdown -->
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<!-- Custom Styles -->
<style>
body {
background-color: #f4f4f4;
}
.navbar-dark .navbar-nav .nav-link {
color: rgba(255,255,255,0.75);
}
.chat-box {
background-color: #fff;
border: 1px solid #ddd;
padding: 15px;
border-radius: 8px;
overflow-y: auto; /* Enable vertical scrolling */
max-width: 100%;
min-height: 150px;
max-height: 450px;
white-space: normal;
word-wrap: break-word;
word-break: break-word;
}
</style>
</head>
<script>
function fetchDetails(urlId, url) {
// Show the loading spinner
document.getElementById("loading-spinner").style.display = "block";
// Get the input value
let inputText = document.getElementById(`custom-input-${urlId}`).value;
// Get the input model
let selectedModel = document.getElementById(`options-${urlId}`).value;
// Check if a model is selected
if (!selectedModel) {
alert("Please select a model before fetching details.");
return;
}
// Fetch URL
let fetchUrl = `/news/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
let resultContainer = $("#chat-output");
resultContainer.html(""); // Clear previous content before fetching
let fetchButton = $("button[onclick^='fetchDetails']"); // Select the button
fetchButton.prop("disabled", true); // Disable button
fetch(fetchUrl)
.then(response => {
if (!response.ok) {
throw new Error("Error on network response");
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
//////////////////////////////////////
let accumulatedText = ""; // Store streamed text before rendering Markdown
// Create a temporary container for streaming response
let messageContainer = $('<div class="chat-message"></div>');
//let messageContainer = $('');
resultContainer.append(messageContainer);
//////////////////////////////////////
function read() {
return reader.read().then(({ done, value }) => {
if (done) {
//////////////////////////////////////
messageContainer.html(marked.parse(accumulatedText));
//////////////////////////////////////
fetchButton.prop("disabled", false); // Re-enable button when done
return;
}
//////////////////////////////////////
// Decode the streamed chunk
let chunk = decoder.decode(value);
// Append to the accumulated text
accumulatedText += chunk;
// Render Markdown progressively (but safely)
messageContainer.html(marked.parse(accumulatedText));
//////////////////////////////////////
//////////////////////////////////////
// ORIGINAL:
//let text = decoder.decode(value).replace(/\n/g, "<br>");
//resultContainer.append(text); // Append streamed text
//////////////////////////////////////
resultContainer.scrollTop(resultContainer[0].scrollHeight); // Auto-scroll to bottom
return read();
});
}
return read();
})
.catch(error => {
resultContainer.html(`<p class="text-danger">Error fetching details: ${error.message}</p>`);
fetchButton.prop("disabled", false); // Re-enable button on error
})
.finally(() => {
// Hide the loading spinner after request is complete
document.getElementById("loading-spinner").style.display = "none";
});
;
}
</script>
<body>
<!-- Main Content -->
<div class="container mt-4">
<h2>URL Details</h2>
<table class="table table-bordered">
<tr>
<th>URL</th>
<td><a href="{{ url_item.url }}" target="_blank">{{ url_item.url }}</a></td>
</tr>
<tr>
<th>Fetch Date</th>
<td>{{ url_item.ts_fetch }}</td>
</tr>
<tr>
<th>Sources</th>
<td>{{ sources|join:", " }}</td>
</tr>
<tr>
<th>Status</th>
<td>{{ url_item.status }}</td>
</tr>
<tr>
<th>Title</th>
<td>{{ url_content.title }}</td>
</tr>
<tr>
<th>Description</th>
<td>{{ url_content.description }}</td>
</tr>
<tr>
<th>Content</th>
<td>{{ url_content.content }}</td>
</tr>
<tr>
<th>Tags</th>
<td>{{ url_content.tags }}</td>
</tr>
<tr>
<th>Authors</th>
<td>{{ url_content.authors }}</td>
</tr>
<tr>
<th>Image URLs</th>
<td>{{ url_content.image_urls }}</td>
</tr>
</table>
<!-- Independent form for optional values -->
<form onsubmit="fetchDetailsWithSelection(event, {{ url_item.id }}, '{{ url_item.url }}')">
<label for="options-{{ url_item.id }}">Model:</label>
<select id="options-{{ url_item.id }}" class="form-control mb-2">
<!-- <option value="">-- Select an option --</option> -->
{% for model in models %}
<option value="{{ model }}">{{ model }}</option>
{% endfor %}
</select>
</form>
<!-- Input field with a default value -->
<label for="custom-input-{{ url_item.id }}">Prompt:</label>
<textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="3">{{ prompt }} {{ url_item.url }}</textarea>
<!-- Fetch details button -->
<button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
Fetch Details
</button>
<!-- Chatbot-style response box -->
<div class="chat-box mt-3 p-3 border rounded">
<div id="chat-output"></div>
</div>
<!-- Loading Spinner (Hidden by Default) -->
<div id="loading-spinner" class="spinner-border text-primary mt-3" role="status" style="display: none;">
<span class="visually-hidden">Loading...</span>
</div>
</div>
<!-- Bootstrap JS -->
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
{% block extra_js %}{% endblock %}
</body>
</html>

View File

View File

@@ -0,0 +1,8 @@
from django import template
register = template.Library()
@register.filter
def dict_get(dictionary, key):
"""Custom filter to get a value from a dictionary in Django templates."""
return dictionary.get(key, [])

3
app_web/news/tests.py Normal file
View File

@@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

8
app_web/news/urls.py Normal file
View File

@@ -0,0 +1,8 @@
from django.urls import path
from . import views
urlpatterns = [
path("", views.news, name="home"),
path('url/<int:id>/', views.url_detail_view, name='url_detail'),
path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),]

105
app_web/news/views.py Normal file
View File

@@ -0,0 +1,105 @@
from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
from django.shortcuts import render, get_object_or_404
from django.core.paginator import Paginator
import requests
from django.http import StreamingHttpResponse
import json
import time
import ollama
from .models import Urls, Source, UrlsSource, UrlContent
# Create your views here.
def index(request):
return HttpResponse("Hello, world. You're at the news index.")
def news(request):
# URLs
urls = Urls.objects.all()
# Sources
sources = Source.objects.all()
# Parameters
page_number = request.GET.get("page", 1)
num_items = request.GET.get("items", 15)
source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
status_filters = request.GET.get("status", None)
# Filters
if (status_filters) and (status_filters != "all"):
urls = urls.filter(status__in=status_filters.split(","))
if (source_ids) and (source_ids != "all"):
# TODO: Distinct needed?
urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
# Pagination
paginator = Paginator(urls, num_items)
page_obj = paginator.get_page(page_number)
# Map URL IDs to their sources, only for subset of URLs (page of interest)
sources_map = {
url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
for url in page_obj.object_list
}
context = {
"page_obj": page_obj,
"sources": sources,
"sources_map": sources_map,
"list_status": Urls.STATUS_ENUM.values,
"list_urls_per_page": [15, 50, 100],
}
# If request is AJAX, return JSON response
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
return render(request, "item_list.html", context)
def url_detail_view(request, id):
url_item = get_object_or_404(Urls, id=id)
url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
try:
url_content = UrlContent.objects.get(pk=id)
except UrlContent.DoesNotExist:
url_content = {}
#print(url_content.__dict__)
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
# LLM models available
client = ollama.Client(host = 'https://ollamamodel.matitos.org')
models = sorted([m.model for m in client.list().models])
print(models)
context = {
'url_item': url_item,
'sources': url_sources,
'models': models,
'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
#"prompt": "Image you are a journalist, TLDR in a paragraph:",
#"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
'url_content': url_content,
}
return render(request, 'url_detail.html', context)
def fetch_details(request, id):
url_item = get_object_or_404(Urls, id=id)
url_param = request.GET.get("url", "") # Get URL
model = request.GET.get("model", "") # Get LLM model
text = request.GET.get("text", "") # Get LLM prompt
# LLM
client = ollama.Client(host = 'https://ollamamodel.matitos.org')
def stream_response():
msg_content = {
"role": "user",
"content": text,
}
response = client.chat(model=model, messages=[msg_content], stream=True)
for chunk in response:
yield chunk["message"]["content"] # Stream each chunk of text
return StreamingHttpResponse(stream_response(), content_type="text/plain")