173 lines
7.2 KiB
Python
173 lines
7.2 KiB
Python
# import django_rq
|
|
from .tasks import background_task
|
|
from django.http import JsonResponse
|
|
import os
|
|
|
|
def trigger_task(request, task):
|
|
# View that enqueues a task
|
|
|
|
# Enqueue function in "default" queue
|
|
background_task.delay(task)
|
|
return JsonResponse({"message": "Task has been enqueued!", "task": task})
|
|
|
|
# queue = django_rq.get_queue('default') # Get the default queue
|
|
# job = queue.enqueue(background_task, task, job_timeout="30m")
|
|
# return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})
|
|
|
|
def link_list(request):
|
|
prefix = "http://localhost:8000/api/task"
|
|
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]
|
|
|
|
list_links = [
|
|
# DB
|
|
"http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
|
|
# Admin panel
|
|
"http://localhost:8000/admin",
|
|
# URLs
|
|
"http://localhost:8000/api/url",
|
|
# API tasks
|
|
] + [os.path.join(prefix, l) for l in links]
|
|
# Json
|
|
return JsonResponse({"links": list_links })
|
|
|
|
|
|
from django.http import StreamingHttpResponse, JsonResponse
|
|
from django.shortcuts import render, get_object_or_404
|
|
from django.core.paginator import Paginator
|
|
import ollama
|
|
|
|
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
|
|
|
|
# Create your views here.
|
|
def urls(request):
|
|
# URLs
|
|
urls = Urls.objects.all()
|
|
# Sources
|
|
sources = Source.objects.all()
|
|
searches = Search.objects.all()
|
|
|
|
# Parameters
|
|
page_number = request.GET.get("page", 1)
|
|
num_items = request.GET.get("items", 15)
|
|
source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
|
|
search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
|
|
status_filters = request.GET.get("status", None)
|
|
|
|
# Filters
|
|
if (status_filters) and (status_filters != "all"):
|
|
if (status_filters == "none"):
|
|
urls = []
|
|
else:
|
|
urls = urls.filter(status__in=status_filters.split(","))
|
|
if (source_ids) and (source_ids != "all"):
|
|
if (source_ids == "none"):
|
|
urls = []
|
|
else:
|
|
urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
|
|
if (search_ids) and (search_ids != "all"):
|
|
if (search_ids == "none"):
|
|
urls = []
|
|
else:
|
|
urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()
|
|
|
|
# Pagination
|
|
paginator = Paginator(urls, num_items)
|
|
page_obj = paginator.get_page(page_number)
|
|
|
|
# Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
|
|
sources_map = {
|
|
url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
|
}
|
|
searches_map = {
|
|
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
|
}
|
|
|
|
context = {
|
|
"page_obj": page_obj,
|
|
"sources": sources,
|
|
"searches": searches,
|
|
"sources_map": sources_map,
|
|
"searches_map": searches_map,
|
|
"list_status": Urls.STATUS_ENUM.values,
|
|
"list_urls_per_page": [15, 100, 500],
|
|
}
|
|
|
|
# If request is AJAX, return JSON response
|
|
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
|
|
return JsonResponse({'urls': render(request, 'urls_partial.html', context).content.decode('utf-8')})
|
|
|
|
return render(request, "urls.html", context)
|
|
|
|
|
|
class OllamaClient():
|
|
def __init__(self):
|
|
self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))
|
|
|
|
def _get_default_model(self):
|
|
return "gemma3:1b"
|
|
|
|
def get_models(self):
|
|
models = sorted([m.model for m in self.client.list().models])
|
|
if (self._get_default_model() in models):
|
|
return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
|
|
else:
|
|
return models
|
|
|
|
def get_prompt(self):
|
|
return "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
|
|
#return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
|
|
#return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
|
|
#return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
|
|
#return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
|
|
# return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
|
|
#return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
|
|
|
|
|
|
def url_detail_view(request, id):
|
|
url_item = get_object_or_404(Urls, id=id)
|
|
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
|
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
|
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
|
|
|
|
try:
|
|
url_content = UrlContent.objects.get(pk=id)
|
|
except UrlContent.DoesNotExist:
|
|
url_content = {}
|
|
|
|
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
|
|
ollama = OllamaClient()
|
|
|
|
context = {
|
|
'url_item': url_item,
|
|
'sources': url_sources,
|
|
'searches': url_searches,
|
|
'models': ollama.get_models(),
|
|
'prompt': ollama.get_prompt(),
|
|
'url_content': url_content,
|
|
}
|
|
return render(request, 'url_detail.html', context)
|
|
|
|
# TODO: move to ollamajs...
|
|
def fetch_details(request, id):
|
|
url_item = get_object_or_404(Urls, id=id)
|
|
url_param = request.GET.get("url", "") # Get URL
|
|
model = request.GET.get("model", "") # Get LLM model
|
|
text = request.GET.get("text", "") # Get LLM prompt
|
|
|
|
# print(request)
|
|
# print(text)
|
|
|
|
# LLM
|
|
ollama = OllamaClient()
|
|
|
|
def stream_response():
|
|
msg_content = {
|
|
"role": "user",
|
|
"content": text,
|
|
}
|
|
response = ollama.client.chat(model=model, messages=[msg_content], stream=True)
|
|
for chunk in response:
|
|
yield chunk["message"]["content"] # Stream each chunk of text
|
|
|
|
return StreamingHttpResponse(stream_response(), content_type="text/plain")
|