Process all missing kids task, urls views cleaner, adding language filter WIP
This commit is contained in:
@@ -1,108 +1,53 @@
|
||||
# import django_rq
|
||||
from .tasks import background_task
|
||||
from django.http import JsonResponse
|
||||
from django.core.paginator import Paginator
|
||||
from django.shortcuts import render, get_object_or_404
|
||||
from django.http import StreamingHttpResponse, JsonResponse, HttpResponse
|
||||
import ollama
|
||||
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
|
||||
import os
|
||||
|
||||
####################################################################################################
|
||||
def trigger_task(request, task):
|
||||
# View that enqueues a task
|
||||
|
||||
# Enqueue function in "default" queue
|
||||
background_task.delay(task)
|
||||
return JsonResponse({"message": "Task has been enqueued!", "task": task})
|
||||
|
||||
# queue = django_rq.get_queue('default') # Get the default queue
|
||||
# job = queue.enqueue(background_task, task, job_timeout="30m")
|
||||
# return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})
|
||||
|
||||
####################################################################################################
|
||||
def link_list(request):
|
||||
prefix = "http://localhost:8000/api/task"
|
||||
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]
|
||||
prefix = "http://localhost:8000/task"
|
||||
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all"]
|
||||
|
||||
list_links = [
|
||||
# DB
|
||||
"http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
|
||||
# Admin panel
|
||||
"http://localhost:8000/admin",
|
||||
# URLs
|
||||
"http://localhost:8000/api/url",
|
||||
# Charts
|
||||
"http://localhost:8000/api/charts",
|
||||
# Logs
|
||||
"http://localhost:8000/api/logs_debug",
|
||||
"http://localhost:8000/api/logs_info",
|
||||
"http://localhost:8000/api/logs_error",
|
||||
"http://localhost:8000/logs_debug",
|
||||
"http://localhost:8000/logs_info",
|
||||
"http://localhost:8000/logs_error",
|
||||
# URLs
|
||||
"http://localhost:8000/urls",
|
||||
# Charts
|
||||
"http://localhost:8000/charts",
|
||||
# API tasks
|
||||
] + [os.path.join(prefix, l) for l in links]
|
||||
# Json
|
||||
return JsonResponse({"links": list_links })
|
||||
|
||||
|
||||
from django.http import StreamingHttpResponse, JsonResponse
|
||||
from django.shortcuts import render, get_object_or_404
|
||||
from django.core.paginator import Paginator
|
||||
import ollama
|
||||
|
||||
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
|
||||
|
||||
# Create your views here.
|
||||
def urls(request):
|
||||
# URLs
|
||||
urls = Urls.objects.all()
|
||||
# Sources
|
||||
sources = Source.objects.all()
|
||||
searches = Search.objects.all()
|
||||
|
||||
# Parameters
|
||||
page_number = request.GET.get("page", 1)
|
||||
num_items = request.GET.get("items", 15)
|
||||
source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
|
||||
search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
|
||||
status_filters = request.GET.get("status", None)
|
||||
|
||||
# Filters
|
||||
if (status_filters) and (status_filters != "all"):
|
||||
if (status_filters == "none"):
|
||||
urls = []
|
||||
else:
|
||||
urls = urls.filter(status__in=status_filters.split(","))
|
||||
if (source_ids) and (source_ids != "all"):
|
||||
if (source_ids == "none"):
|
||||
urls = []
|
||||
else:
|
||||
urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
|
||||
if (search_ids) and (search_ids != "all"):
|
||||
if (search_ids == "none"):
|
||||
urls = []
|
||||
else:
|
||||
urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()
|
||||
|
||||
# Pagination
|
||||
paginator = Paginator(urls, num_items)
|
||||
page_obj = paginator.get_page(page_number)
|
||||
|
||||
# Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
|
||||
sources_map = {
|
||||
url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
||||
}
|
||||
searches_map = {
|
||||
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
||||
}
|
||||
|
||||
context = {
|
||||
"page_obj": page_obj,
|
||||
"sources": sources,
|
||||
"searches": searches,
|
||||
"sources_map": sources_map,
|
||||
"searches_map": searches_map,
|
||||
"list_status": Urls.STATUS_ENUM.values,
|
||||
"list_urls_per_page": [15, 100, 500],
|
||||
}
|
||||
|
||||
# If request is AJAX, return JSON response
|
||||
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
|
||||
return JsonResponse({'urls': render(request, 'urls_partial.html', context).content.decode('utf-8')})
|
||||
|
||||
return render(request, "urls.html", context)
|
||||
####################################################################################################
|
||||
def logs_error(request):
|
||||
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
def logs_info(request):
|
||||
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
def logs_debug(request):
|
||||
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
####################################################################################################
|
||||
class OllamaClient():
|
||||
@@ -128,31 +73,6 @@ class OllamaClient():
|
||||
# return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
|
||||
#return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
|
||||
|
||||
|
||||
def url_detail_view(request, id):
|
||||
url_item = get_object_or_404(Urls, id=id)
|
||||
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
|
||||
|
||||
try:
|
||||
url_content = UrlContent.objects.get(pk=id)
|
||||
except UrlContent.DoesNotExist:
|
||||
url_content = {}
|
||||
|
||||
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
|
||||
ollama = OllamaClient()
|
||||
|
||||
context = {
|
||||
'url_item': url_item,
|
||||
'sources': url_sources,
|
||||
'searches': url_searches,
|
||||
'models': ollama.get_models(),
|
||||
'prompt': ollama.get_prompt(),
|
||||
'url_content': url_content,
|
||||
}
|
||||
return render(request, 'url_detail.html', context)
|
||||
|
||||
# TODO: move to ollamajs...
|
||||
def fetch_details(request, id):
|
||||
url_item = get_object_or_404(Urls, id=id)
|
||||
@@ -178,6 +98,30 @@ def fetch_details(request, id):
|
||||
return StreamingHttpResponse(stream_response(), content_type="text/plain")
|
||||
|
||||
|
||||
def url_detail_view(request, id):
|
||||
url_item = get_object_or_404(Urls, id=id)
|
||||
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
|
||||
|
||||
try:
|
||||
url_content = UrlContent.objects.get(pk=id)
|
||||
except UrlContent.DoesNotExist:
|
||||
url_content = {}
|
||||
|
||||
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
|
||||
ollama = OllamaClient()
|
||||
|
||||
context = {
|
||||
'url_item': url_item,
|
||||
'sources': url_sources,
|
||||
'searches': url_searches,
|
||||
'models': ollama.get_models(),
|
||||
'prompt': ollama.get_prompt(),
|
||||
'url_content': url_content,
|
||||
}
|
||||
return render(request, 'url_detail.html', context)
|
||||
|
||||
####################################################################################################
|
||||
from django.shortcuts import render
|
||||
from django.http import JsonResponse
|
||||
@@ -256,23 +200,7 @@ def urls_per_search(request):
|
||||
|
||||
return JsonResponse(data)
|
||||
|
||||
####################################################################################################
|
||||
from django.http import HttpResponse
|
||||
|
||||
def logs_error(request):
|
||||
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
def logs_info(request):
|
||||
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
def logs_debug(request):
|
||||
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
####################################################################################################
|
||||
from django.shortcuts import render
|
||||
@@ -284,33 +212,39 @@ def filtered_urls(request):
|
||||
statuses = Urls.STATUS_ENUM.choices
|
||||
searches = Search.objects.all()
|
||||
sources = Source.objects.all()
|
||||
# TODO: Cache languages, update once every N
|
||||
languages = UrlContent.objects.distinct('language').values_list('language', flat=True)
|
||||
# languages = [l for l in languages if l is not None]
|
||||
|
||||
# Get selected parameters
|
||||
selected_status = request.GET.getlist('status')
|
||||
selected_search = request.GET.getlist('search')
|
||||
selected_source = request.GET.getlist('source')
|
||||
selected_language = request.GET.getlist('language')
|
||||
selected_days = request.GET.get("days", 30)
|
||||
per_page = request.GET.get('per_page', 100) # Default is X URLs per page
|
||||
page_number = request.GET.get('page') # Get the current page number
|
||||
|
||||
# charts = request.GET.get('charts', False)
|
||||
|
||||
# "Home" -> No parameters -> Override filter with default values
|
||||
if ( len(request.GET.keys()) == 0 ):
|
||||
# Override with default filters? [Case: no params update on URL] -> Only on "Home" click, or "Next page"
|
||||
if (len(request.GET.keys()) == 0) or ((len(request.GET.keys()) == 1) and ("page" in request.GET.keys())):
|
||||
selected_status = [str(status[0]) for status in statuses]
|
||||
selected_search = [str(search.id) for search in searches]
|
||||
selected_source = [str(source.id) for source in sources]
|
||||
selected_language = languages
|
||||
|
||||
# Filter URLs based on selected filters
|
||||
if ('' in selected_status) or ('' in selected_search) or ('' in selected_source):
|
||||
urls = []
|
||||
else:
|
||||
urls = Urls.objects.filter(
|
||||
Q(urlssourcesearch__id_source__in=selected_source) &
|
||||
Q(urlssourcesearch__id_search__in=selected_search) &
|
||||
Q(status__in=selected_status) &
|
||||
query = Q(urlssourcesearch__id_source__in=selected_source) & \
|
||||
Q(urlssourcesearch__id_search__in=selected_search) & \
|
||||
Q(status__in=selected_status) & \
|
||||
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
|
||||
).distinct() # .order_by('-ts_fetch')
|
||||
|
||||
if selected_language:
|
||||
query &= Q(urlcontent__language__in=selected_language)
|
||||
|
||||
urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch')
|
||||
|
||||
# Custom replace search type
|
||||
for s in searches:
|
||||
@@ -327,22 +261,31 @@ def filtered_urls(request):
|
||||
searches_map = {
|
||||
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
||||
}
|
||||
url_content_map = {
|
||||
url.id: UrlContent.objects.filter(pk=url).first() for url in page_obj.object_list
|
||||
}
|
||||
|
||||
context = {
|
||||
'urls': page_obj, # Pass the paginated URLs
|
||||
'per_page': per_page, # Send per_page value for dynamic pagination
|
||||
'statuses': statuses,
|
||||
'searches': searches,
|
||||
'sources': sources,
|
||||
'searches': sorted(searches, key=lambda x: (x.type, x.search)),
|
||||
'sources': sorted(sources, key=lambda x: x.source),
|
||||
'languages': sorted(languages, key=lambda x: (x is None, x)),
|
||||
# Selection
|
||||
'selected_status': selected_status,
|
||||
'selected_search': selected_search,
|
||||
'selected_source': selected_source,
|
||||
'selected_language': selected_language,
|
||||
"selected_days": selected_days,
|
||||
# Map
|
||||
"sources_map": sources_map,
|
||||
"searches_map": searches_map,
|
||||
"url_content_map": url_content_map,
|
||||
# "charts": charts,
|
||||
# "list_per_page": [15, 100, 500],
|
||||
# "list_days_text": ([0.25, 1, 7, 30, 365], ["Last 6 hours", "Last 24 hours", "Last 7 days", "Last 30 days", "Last 365 days"])
|
||||
}
|
||||
|
||||
return render(request, 'filtered_urls.html', context)
|
||||
|
||||
####################################################################################################
|
||||
Reference in New Issue
Block a user