Process all missing kids task, urls views cleaner, adding language filter WIP

This commit is contained in:
Luciano Gervasoni
2025-03-28 16:21:35 +01:00
parent e34284abbe
commit b3f896b35a
11 changed files with 284 additions and 196 deletions

View File

@@ -1,108 +1,53 @@
# import django_rq
from .tasks import background_task
from django.http import JsonResponse
from django.core.paginator import Paginator
from django.shortcuts import render, get_object_or_404
from django.http import StreamingHttpResponse, JsonResponse, HttpResponse
import ollama
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
import os
####################################################################################################
def trigger_task(request, task):
# View that enqueues a task
# Enqueue function in "default" queue
background_task.delay(task)
return JsonResponse({"message": "Task has been enqueued!", "task": task})
# queue = django_rq.get_queue('default') # Get the default queue
# job = queue.enqueue(background_task, task, job_timeout="30m")
# return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})
####################################################################################################
def link_list(request):
prefix = "http://localhost:8000/api/task"
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]
prefix = "http://localhost:8000/task"
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all"]
list_links = [
# DB
"http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
# Admin panel
"http://localhost:8000/admin",
# URLs
"http://localhost:8000/api/url",
# Charts
"http://localhost:8000/api/charts",
# Logs
"http://localhost:8000/api/logs_debug",
"http://localhost:8000/api/logs_info",
"http://localhost:8000/api/logs_error",
"http://localhost:8000/logs_debug",
"http://localhost:8000/logs_info",
"http://localhost:8000/logs_error",
# URLs
"http://localhost:8000/urls",
# Charts
"http://localhost:8000/charts",
# API tasks
] + [os.path.join(prefix, l) for l in links]
# Json
return JsonResponse({"links": list_links })
from django.http import StreamingHttpResponse, JsonResponse
from django.shortcuts import render, get_object_or_404
from django.core.paginator import Paginator
import ollama
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
# Create your views here.
def urls(request):
# URLs
urls = Urls.objects.all()
# Sources
sources = Source.objects.all()
searches = Search.objects.all()
# Parameters
page_number = request.GET.get("page", 1)
num_items = request.GET.get("items", 15)
source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
status_filters = request.GET.get("status", None)
# Filters
if (status_filters) and (status_filters != "all"):
if (status_filters == "none"):
urls = []
else:
urls = urls.filter(status__in=status_filters.split(","))
if (source_ids) and (source_ids != "all"):
if (source_ids == "none"):
urls = []
else:
urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
if (search_ids) and (search_ids != "all"):
if (search_ids == "none"):
urls = []
else:
urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()
# Pagination
paginator = Paginator(urls, num_items)
page_obj = paginator.get_page(page_number)
# Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
sources_map = {
url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
}
searches_map = {
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
}
context = {
"page_obj": page_obj,
"sources": sources,
"searches": searches,
"sources_map": sources_map,
"searches_map": searches_map,
"list_status": Urls.STATUS_ENUM.values,
"list_urls_per_page": [15, 100, 500],
}
# If request is AJAX, return JSON response
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return JsonResponse({'urls': render(request, 'urls_partial.html', context).content.decode('utf-8')})
return render(request, "urls.html", context)
####################################################################################################
def logs_error(request):
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs_info(request):
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs_debug(request):
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
####################################################################################################
class OllamaClient():
@@ -128,31 +73,6 @@ class OllamaClient():
# return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
#return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
def url_detail_view(request, id):
url_item = get_object_or_404(Urls, id=id)
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
try:
url_content = UrlContent.objects.get(pk=id)
except UrlContent.DoesNotExist:
url_content = {}
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
ollama = OllamaClient()
context = {
'url_item': url_item,
'sources': url_sources,
'searches': url_searches,
'models': ollama.get_models(),
'prompt': ollama.get_prompt(),
'url_content': url_content,
}
return render(request, 'url_detail.html', context)
# TODO: move to ollamajs...
def fetch_details(request, id):
url_item = get_object_or_404(Urls, id=id)
@@ -178,6 +98,30 @@ def fetch_details(request, id):
return StreamingHttpResponse(stream_response(), content_type="text/plain")
def url_detail_view(request, id):
url_item = get_object_or_404(Urls, id=id)
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
try:
url_content = UrlContent.objects.get(pk=id)
except UrlContent.DoesNotExist:
url_content = {}
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
ollama = OllamaClient()
context = {
'url_item': url_item,
'sources': url_sources,
'searches': url_searches,
'models': ollama.get_models(),
'prompt': ollama.get_prompt(),
'url_content': url_content,
}
return render(request, 'url_detail.html', context)
####################################################################################################
from django.shortcuts import render
from django.http import JsonResponse
@@ -256,23 +200,7 @@ def urls_per_search(request):
return JsonResponse(data)
####################################################################################################
from django.http import HttpResponse
def logs_error(request):
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs_info(request):
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs_debug(request):
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
####################################################################################################
from django.shortcuts import render
@@ -284,33 +212,39 @@ def filtered_urls(request):
statuses = Urls.STATUS_ENUM.choices
searches = Search.objects.all()
sources = Source.objects.all()
# TODO: Cache languages, update once every N
languages = UrlContent.objects.distinct('language').values_list('language', flat=True)
# languages = [l for l in languages if l is not None]
# Get selected parameters
selected_status = request.GET.getlist('status')
selected_search = request.GET.getlist('search')
selected_source = request.GET.getlist('source')
selected_language = request.GET.getlist('language')
selected_days = request.GET.get("days", 30)
per_page = request.GET.get('per_page', 100) # Default is X URLs per page
page_number = request.GET.get('page') # Get the current page number
# charts = request.GET.get('charts', False)
# "Home" -> No parameters -> Override filter with default values
if ( len(request.GET.keys()) == 0 ):
# Override with default filters? [Case: no params update on URL] -> Only on "Home" click, or "Next page"
if (len(request.GET.keys()) == 0) or ((len(request.GET.keys()) == 1) and ("page" in request.GET.keys())):
selected_status = [str(status[0]) for status in statuses]
selected_search = [str(search.id) for search in searches]
selected_source = [str(source.id) for source in sources]
selected_language = languages
# Filter URLs based on selected filters
if ('' in selected_status) or ('' in selected_search) or ('' in selected_source):
urls = []
else:
urls = Urls.objects.filter(
Q(urlssourcesearch__id_source__in=selected_source) &
Q(urlssourcesearch__id_search__in=selected_search) &
Q(status__in=selected_status) &
query = Q(urlssourcesearch__id_source__in=selected_source) & \
Q(urlssourcesearch__id_search__in=selected_search) & \
Q(status__in=selected_status) & \
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
).distinct() # .order_by('-ts_fetch')
if selected_language:
query &= Q(urlcontent__language__in=selected_language)
urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch')
# Custom replace search type
for s in searches:
@@ -327,22 +261,31 @@ def filtered_urls(request):
searches_map = {
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
}
url_content_map = {
url.id: UrlContent.objects.filter(pk=url).first() for url in page_obj.object_list
}
context = {
'urls': page_obj, # Pass the paginated URLs
'per_page': per_page, # Send per_page value for dynamic pagination
'statuses': statuses,
'searches': searches,
'sources': sources,
'searches': sorted(searches, key=lambda x: (x.type, x.search)),
'sources': sorted(sources, key=lambda x: x.source),
'languages': sorted(languages, key=lambda x: (x is None, x)),
# Selection
'selected_status': selected_status,
'selected_search': selected_search,
'selected_source': selected_source,
'selected_language': selected_language,
"selected_days": selected_days,
# Map
"sources_map": sources_map,
"searches_map": searches_map,
"url_content_map": url_content_map,
# "charts": charts,
# "list_per_page": [15, 100, 500],
# "list_days_text": ([0.25, 1, 7, 30, 365], ["Last 6 hours", "Last 24 hours", "Last 7 days", "Last 30 days", "Last 365 days"])
}
return render(request, 'filtered_urls.html', context)
####################################################################################################