Urls web visualization, cleaning obsolete code

2025-03-25 02:51:16 +01:00
parent 0c6b5f1ea4
commit 24b4614049
52 changed files with 371 additions and 3293 deletions
--- a/app_urls/api/views.py
+++ b/app_urls/api/views.py
@@ -18,64 +18,80 @@ def link_list(request):
    prefix = "http://localhost:8000/api/task"
    links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]

-    db_links = ["http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500"]
-    return JsonResponse({"links": ["http://localhost:8000/api/url"] + db_links + [os.path.join(prefix, l) for l in links]})
+    list_links = [
+        # DB
+        "http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
+        # Admin panel
+        "http://localhost:8000/admin",
+        # URLs
+        "http://localhost:8000/api/url",
+        # API tasks
+    ] + [os.path.join(prefix, l) for l in links]
+    # Json
+    return JsonResponse({"links": list_links })


-from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
+from django.http import StreamingHttpResponse, JsonResponse
 from django.shortcuts import render, get_object_or_404
 from django.core.paginator import Paginator
-import requests
-from django.http import StreamingHttpResponse
-import json
-import time
 import ollama

-from .models import Urls, Source, Search, UrlsSourceSearch, UrlContent
+from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch

 # Create your views here.
-def news(request):
+def urls(request):
    # URLs
    urls = Urls.objects.all()
    # Sources
    sources = Source.objects.all()
-    seaerches = Search.objects.all()
+    searches = Search.objects.all()

    # Parameters
    page_number = request.GET.get("page", 1)
    num_items = request.GET.get("items", 15)
    source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
+    search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
    status_filters = request.GET.get("status", None)

    # Filters
    if (status_filters) and (status_filters != "all"):
-        urls = urls.filter(status__in=status_filters.split(","))
+        if (status_filters == "none"):
+            urls = []
+        else:
+            urls = urls.filter(status__in=status_filters.split(","))
    if (source_ids) and (source_ids != "all"):
-        # TODO: Distinct needed?
-        # urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
-        pass
+        if (source_ids == "none"):
+            urls = []
+        else:
+            urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
+    if (search_ids) and (search_ids != "all"):
+        if (search_ids == "none"):
+            urls = []
+        else:
+            urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()

    # Pagination
    paginator = Paginator(urls, num_items)
    page_obj = paginator.get_page(page_number)

-    # Map URL IDs to their sources, only for subset of URLs (page of interest)
-    sources_map= {}
-    """
+    # Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
    sources_map = {
-        url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
-        for url in page_obj.object_list
+        url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
+    }
+    searches_map = {
+        url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
    }
-    """

    context = {
        "page_obj": page_obj,
        "sources": sources,
+        "searches": searches,
        "sources_map": sources_map,
+        "searches_map": searches_map,
        "list_status": Urls.STATUS_ENUM.values,
-        "list_urls_per_page": [15, 50, 100],
+        "list_urls_per_page": [15, 100, 500],
    }
-    
+
    # If request is AJAX, return JSON response
    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
        return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
@@ -83,32 +99,54 @@ def news(request):
    return render(request, "item_list.html", context)


+class OllamaClient():
+    def __init__(self):
+        self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))
+    
+    def _get_default_model(self):
+        return "gemma3:1b"
+
+    def get_models(self):
+        models = sorted([m.model for m in self.client.list().models])
+        if (self._get_default_model() in models):
+            return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
+        else:
+            return models
+    
+    def get_prompt(self):
+        return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
+        #return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
+        #return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
+        #return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
+        # return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
+        #return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
+
+
 def url_detail_view(request, id):
    url_item = get_object_or_404(Urls, id=id)
-    url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
+    url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
+    url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
+    # url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
+    
    try:
        url_content = UrlContent.objects.get(pk=id)
    except UrlContent.DoesNotExist:
        url_content = {}
    
    # TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
-    # LLM models available
-    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
-    models = sorted([m.model for m in client.list().models])
-    # default_model = "llama3.2:3b"
+    ollama = OllamaClient()

    context = {
        'url_item': url_item,
        'sources': url_sources,
-        'models': models,
-        #'default_model': default_model,
-        'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
-        #"prompt": "Image you are a journalist, TLDR in a paragraph:",
-        #"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
+        'searches': url_searches,
+        'models': ollama.get_models(),
+        'prompt': ollama.get_prompt(),
        'url_content': url_content,
    }
    return render(request, 'url_detail.html', context)

+# TODO: move to ollamajs...
 def fetch_details(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_param = request.GET.get("url", "")  # Get URL
@@ -116,14 +154,14 @@ def fetch_details(request, id):
    text = request.GET.get("text", "")  # Get LLM prompt

    # LLM
-    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
+    ollama = OllamaClient()

    def stream_response():
        msg_content = {
            "role": "user", 
            "content": text,
        }
-        response = client.chat(model=model, messages=[msg_content], stream=True)
+        response = ollama.client.chat(model=model, messages=[msg_content], stream=True)
        for chunk in response:
            yield chunk["message"]["content"]  # Stream each chunk of text