General search fix, status pattern match regex, find feeds on startup

This commit is contained in:
Luciano Gervasoni
2025-04-09 15:52:35 +02:00
parent 296a8fe8a8
commit f369b23d81
22 changed files with 538 additions and 356 deletions

View File

@@ -1,44 +1,16 @@
from .tasks import background_task
from .views_base import link_list, logs, log_db, trigger_task
from django.core.paginator import Paginator
from django.shortcuts import render, get_object_or_404
from django.http import StreamingHttpResponse, JsonResponse, HttpResponse
from django.contrib.auth.decorators import login_required
import ollama
from django.http import StreamingHttpResponse, JsonResponse
from django.db.models import Q, Count
from django.utils import timezone
from django.utils.timezone import now, timedelta
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
import ollama
import os
from .src.logger import get_logger
logger = get_logger()
#from datetime import timedelta
####################################################################################################
def trigger_task(request, task):
# Enqueue function in "default" queue
background_task.delay(task)
return JsonResponse({"message": "Task has been enqueued!", "task": task})
####################################################################################################
def link_list(request):
# Base URL path
app_url = request.build_absolute_uri()
# Tasks
links_fetch = ["fetch_feeds", "fetch_parser", "fetch_search", "fetch_missingkids_5", "fetch_missingkids_all"]
links_process = ["process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all"]
# List of links
list_links = \
[ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
[ os.path.join(app_url, "logs", log_type) for log_type in ["debug", "info", "warning"] ] + \
[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
# Json
return JsonResponse({"links": list_links })
####################################################################################################
def logs(request, log_type):
# Capture output: python manage.py rqstats
try:
with open( os.path.join( os.getenv("PATH_LOGS_DIRECTORY", "logs"), "{}.log".format(log_type) ), "r") as f:
file_content = f.read()
except Exception as e:
file_content = "Error reading logs for log type :{}".format(log_type)
return HttpResponse(file_content, content_type="text/plain")
####################################################################################################
class OllamaClient():
@@ -57,13 +29,6 @@ class OllamaClient():
def get_prompt(self):
return "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
#return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
#return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
#return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
#return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
# return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
#return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
def fetch_details(request, id):
url_item = get_object_or_404(Urls, id=id)
@@ -83,7 +48,6 @@ def fetch_details(request, id):
return StreamingHttpResponse(stream_response(), content_type="text/plain")
def url_detail_view(request, id):
url_item = get_object_or_404(Urls, id=id)
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
@@ -114,13 +78,6 @@ def url_detail_view(request, id):
return render(request, 'url_detail.html', context)
####################################################################################################
from django.shortcuts import render
from django.http import JsonResponse
from django.db.models import Count
from datetime import timedelta
from django.utils import timezone
from .models import Urls, UrlsSourceSearch
def charts(request):
return render(request, 'charts.html')
@@ -202,14 +159,7 @@ def urls_per_search(request):
return JsonResponse(data)
####################################################################################################
from django.shortcuts import render
from .models import Urls, Search, Source
from django.db.models import Q
from django.utils.timezone import now, timedelta
def filtered_urls(request):
statuses = Urls.STATUS_ENUM.choices
@@ -342,4 +292,16 @@ def filtered_urls(request):
}
return render(request, 'filtered_urls.html', context)
####################################################################################################
def content_generation(request):
# https://fetcher.matitos.org/urls/?per_page=100&days=1&valid_content=True&min_sources=1&search=13&status=all&language=all&source=all
'''
# Get list of URLs ID
selected_urls = request.GET.getlist('urls', [])
# Sample URLs
selected_urls = [13460, 13455, 13454, 13452, 13210]
'''
####################################################################################################