Urls web visualization, cleaning obsolete code

This commit is contained in:
Luciano Gervasoni
2025-03-25 02:51:16 +01:00
parent 0c6b5f1ea4
commit 24b4614049
52 changed files with 371 additions and 3293 deletions

View File

@@ -18,64 +18,80 @@ def link_list(request):
prefix = "http://localhost:8000/api/task"
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]
db_links = ["http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500"]
return JsonResponse({"links": ["http://localhost:8000/api/url"] + db_links + [os.path.join(prefix, l) for l in links]})
list_links = [
# DB
"http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
# Admin panel
"http://localhost:8000/admin",
# URLs
"http://localhost:8000/api/url",
# API tasks
] + [os.path.join(prefix, l) for l in links]
# Json
return JsonResponse({"links": list_links })
from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
from django.http import StreamingHttpResponse, JsonResponse
from django.shortcuts import render, get_object_or_404
from django.core.paginator import Paginator
import requests
from django.http import StreamingHttpResponse
import json
import time
import ollama
from .models import Urls, Source, Search, UrlsSourceSearch, UrlContent
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
# Create your views here.
def news(request):
def urls(request):
# URLs
urls = Urls.objects.all()
# Sources
sources = Source.objects.all()
seaerches = Search.objects.all()
searches = Search.objects.all()
# Parameters
page_number = request.GET.get("page", 1)
num_items = request.GET.get("items", 15)
source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
status_filters = request.GET.get("status", None)
# Filters
if (status_filters) and (status_filters != "all"):
urls = urls.filter(status__in=status_filters.split(","))
if (status_filters == "none"):
urls = []
else:
urls = urls.filter(status__in=status_filters.split(","))
if (source_ids) and (source_ids != "all"):
# TODO: Distinct needed?
# urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
pass
if (source_ids == "none"):
urls = []
else:
urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
if (search_ids) and (search_ids != "all"):
if (search_ids == "none"):
urls = []
else:
urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()
# Pagination
paginator = Paginator(urls, num_items)
page_obj = paginator.get_page(page_number)
# Map URL IDs to their sources, only for subset of URLs (page of interest)
sources_map= {}
"""
# Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
sources_map = {
url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
for url in page_obj.object_list
url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
}
searches_map = {
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
}
"""
context = {
"page_obj": page_obj,
"sources": sources,
"searches": searches,
"sources_map": sources_map,
"searches_map": searches_map,
"list_status": Urls.STATUS_ENUM.values,
"list_urls_per_page": [15, 50, 100],
"list_urls_per_page": [15, 100, 500],
}
# If request is AJAX, return JSON response
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
@@ -83,32 +99,54 @@ def news(request):
return render(request, "item_list.html", context)
class OllamaClient():
def __init__(self):
self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))
def _get_default_model(self):
return "gemma3:1b"
def get_models(self):
models = sorted([m.model for m in self.client.list().models])
if (self._get_default_model() in models):
return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
else:
return models
def get_prompt(self):
return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
#return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
#return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
#return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
# return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
#return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
def url_detail_view(request, id):
url_item = get_object_or_404(Urls, id=id)
url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
try:
url_content = UrlContent.objects.get(pk=id)
except UrlContent.DoesNotExist:
url_content = {}
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
# LLM models available
client = ollama.Client(host = 'https://ollamamodel.matitos.org')
models = sorted([m.model for m in client.list().models])
# default_model = "llama3.2:3b"
ollama = OllamaClient()
context = {
'url_item': url_item,
'sources': url_sources,
'models': models,
#'default_model': default_model,
'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
#"prompt": "Image you are a journalist, TLDR in a paragraph:",
#"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
'searches': url_searches,
'models': ollama.get_models(),
'prompt': ollama.get_prompt(),
'url_content': url_content,
}
return render(request, 'url_detail.html', context)
# TODO: move to ollamajs...
def fetch_details(request, id):
url_item = get_object_or_404(Urls, id=id)
url_param = request.GET.get("url", "") # Get URL
@@ -116,14 +154,14 @@ def fetch_details(request, id):
text = request.GET.get("text", "") # Get LLM prompt
# LLM
client = ollama.Client(host = 'https://ollamamodel.matitos.org')
ollama = OllamaClient()
def stream_response():
msg_content = {
"role": "user",
"content": text,
}
response = client.chat(model=model, messages=[msg_content], stream=True)
response = ollama.client.chat(model=model, messages=[msg_content], stream=True)
for chunk in response:
yield chunk["message"]["content"] # Stream each chunk of text