matitos_news/app_urls/api/views.py

# import django_rq
from .tasks import background_task
from django.http import JsonResponse
import os

def trigger_task(request, task):
    # View that enqueues a task

    # Enqueue function in "default" queue
    background_task.delay(task)
    return JsonResponse({"message": "Task has been enqueued!", "task": task})

    # queue = django_rq.get_queue('default')  # Get the default queue
    # job = queue.enqueue(background_task, task, job_timeout="30m")
    # return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})

def link_list(request):
    prefix = "http://localhost:8000/api/task"
    links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]

    list_links = [
        # DB
        "http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
        # Admin panel
        "http://localhost:8000/admin",
        # URLs
        "http://localhost:8000/api/url",
        # Charts
        "http://localhost:8000/api/charts",
        # Logs
        "http://localhost:8000/api/logs",
        "http://localhost:8000/api/logs_error",
        # API tasks
    ] + [os.path.join(prefix, l) for l in links]
    # Json
    return JsonResponse({"links": list_links })


from django.http import StreamingHttpResponse, JsonResponse
from django.shortcuts import render, get_object_or_404
from django.core.paginator import Paginator
import ollama

from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch

# Create your views here.
def urls(request):
    # URLs
    urls = Urls.objects.all()
    # Sources
    sources = Source.objects.all()
    searches = Search.objects.all()

    # Parameters
    page_number = request.GET.get("page", 1)
    num_items = request.GET.get("items", 15)
    source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
    search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
    status_filters = request.GET.get("status", None)

    # Filters
    if (status_filters) and (status_filters != "all"):
        if (status_filters == "none"):
            urls = []
        else:
            urls = urls.filter(status__in=status_filters.split(","))
    if (source_ids) and (source_ids != "all"):
        if (source_ids == "none"):
            urls = []
        else:
            urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
    if (search_ids) and (search_ids != "all"):
        if (search_ids == "none"):
            urls = []
        else:
            urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()

    # Pagination
    paginator = Paginator(urls, num_items)
    page_obj = paginator.get_page(page_number)

    # Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
    sources_map = {
        url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
    }
    searches_map = {
        url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
    }

    context = {
        "page_obj": page_obj,
        "sources": sources,
        "searches": searches,
        "sources_map": sources_map,
        "searches_map": searches_map,
        "list_status": Urls.STATUS_ENUM.values,
        "list_urls_per_page": [15, 100, 500],
    }

    # If request is AJAX, return JSON response
    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
        return JsonResponse({'urls': render(request, 'urls_partial.html', context).content.decode('utf-8')})

    return render(request, "urls.html", context)

####################################################################################################
class OllamaClient():
    def __init__(self):
        self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))

    def _get_default_model(self):
        return "llama3.2:3b"

    def get_models(self):
        models = sorted([m.model for m in self.client.list().models])
        if (self._get_default_model() in models):
            return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
        else:
            return models

    def get_prompt(self):
        return "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
        #return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
        #return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
        #return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
        #return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
        # return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
        #return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)


def url_detail_view(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
    url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
    # url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)

    try:
        url_content = UrlContent.objects.get(pk=id)
    except UrlContent.DoesNotExist:
        url_content = {}

    # TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
    ollama = OllamaClient()

    context = {
        'url_item': url_item,
        'sources': url_sources,
        'searches': url_searches,
        'models': ollama.get_models(),
        'prompt': ollama.get_prompt(),
        'url_content': url_content,
    }
    return render(request, 'url_detail.html', context)

# TODO: move to ollamajs...
def fetch_details(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_param = request.GET.get("url", "")  # Get URL
    model = request.GET.get("model", "")  # Get LLM model
    text = request.GET.get("text", "")  # Get LLM prompt

    # print(request)
    # print(text)

    # LLM
    ollama = OllamaClient()

    def stream_response():
        msg_content = {
            "role": "user",
            "content": text,
        }
        response = ollama.client.chat(model=model, messages=[msg_content], stream=True)
        for chunk in response:
            yield chunk["message"]["content"]  # Stream each chunk of text

    return StreamingHttpResponse(stream_response(), content_type="text/plain")


####################################################################################################
from django.shortcuts import render
from django.http import JsonResponse
from django.db.models import Count
from datetime import timedelta
from django.utils import timezone
from .models import Urls, UrlsSourceSearch

def charts(request):
    return render(request, 'charts.html')

def urls_by_fetch_date(request):
    # Get the date for 30 days ago
    start_date = timezone.now() - timedelta(days=30)

    # Count the number of URLs grouped by fetch date
    urls_data = Urls.objects.filter(ts_fetch__gte=start_date) \
                            .values('ts_fetch__date') \
                            .annotate(count=Count('id')) \
                            .order_by('ts_fetch__date')

    # Format data to return as JSON
    data = {
        'dates': [item['ts_fetch__date'] for item in urls_data],
        'counts': [item['count'] for item in urls_data],
    }

    return JsonResponse(data)

def urls_per_status(request):
    # Get the filtering date parameter
    days = int(request.GET.get('days', 30))  # Default is 30 days
    start_date = timezone.now() - timedelta(days=days)

    # Count the number of URLs grouped by status within the date range
    urls_data = Urls.objects.filter(ts_fetch__gte=start_date) \
                            .values('status') \
                            .annotate(count=Count('id')) \
                            .order_by('status')

    # Format data for JSON
    data = {
        'statuses': [item['status'] for item in urls_data],
        'counts': [item['count'] for item in urls_data],
    }

    return JsonResponse(data)

def urls_per_source(request):
    # Count the number of URLs grouped by source
    urls_data = UrlsSourceSearch.objects \
                                .values('id_source__source') \
                                .annotate(count=Count('id_url')) \
                                .order_by('id_source__source')

    # Format data for JSON
    data = {
        'sources': [item['id_source__source'] for item in urls_data],
        'counts': [item['count'] for item in urls_data],
    }

    return JsonResponse(data)

def urls_per_search(request):
    # Count the number of URLs grouped by search
    urls_data = UrlsSourceSearch.objects \
                                .values('id_search__search') \
                                .annotate(count=Count('id_url')) \
                                .order_by('id_search__search')

    # Format data for JSON
    data = {
        'searches': [item['id_search__search'] for item in urls_data],
        'counts': [item['count'] for item in urls_data],
    }

    return JsonResponse(data)

####################################################################################################
from django.http import HttpResponse

def logs_error(request):
    with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
        file_content = f.read()
    return HttpResponse(file_content, content_type="text/plain")

def logs(request):
    with open(os.getenv("PATH_LOGS", "logs/log_app_fetcher.log"), "r") as f:
        file_content = f.read()
    return HttpResponse(file_content, content_type="text/plain")

####################################################################################################
from django.shortcuts import render
from .models import Urls, Search, Source
from django.db.models import Q
from django.utils.timezone import now, timedelta

def filtered_urls(request):
    statuses = Urls.STATUS_ENUM.choices
    searches = Search.objects.all()
    sources = Source.objects.all()

    # Check if filters are applied; if not, select all by default
    selected_status = request.GET.getlist('status', [str(status[0]) for status in statuses])
    selected_search = request.GET.getlist('search', [str(search.id) for search in searches])
    selected_source = request.GET.getlist('source', [str(source.id) for source in sources])
    selected_days = int(request.GET.get("selected_days", 30))

    print(selected_days)

    # Filter URLs based on selected filters
    urls = Urls.objects.filter(
        Q(urlssourcesearch__id_source__in=selected_source) &
        Q(urlssourcesearch__id_search__in=selected_search) &
        Q(status__in=selected_status) &
        Q(ts_fetch__gte=now() - timedelta(days=selected_days))
    ).distinct() # .order_by('-ts_fetch')

    # Custom replace search type
    for s in searches:
        s.type = s.type.replace("rss_feed", "rss").replace("url_host", "url").replace("keyword_search", "keyword")

    # Pagination
    per_page = request.GET.get('per_page', 25)  # Default is 50 URLs per page
    paginator = Paginator(urls, per_page)  # Paginate the filtered URLs
    page_number = request.GET.get('page')  # Get the current page number
    page_obj = paginator.get_page(page_number)  # Get the current page object

    # Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
    sources_map = {
        url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
    }
    searches_map = {
        url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
    }

    context = {
        'urls': page_obj,  # Pass the paginated URLs
        'per_page': per_page,  # Send per_page value for dynamic pagination
        'statuses': statuses,
        'searches': searches,
        'sources': sources,
        'selected_status': selected_status,
        'selected_search': selected_search,
        'selected_source': selected_source,
        "selected_days": selected_days,
        "sources_map": sources_map,
        "searches_map": searches_map,
    }

    return render(request, 'filtered_urls.html', context)

####################################################################################################