General search fix, status pattern match regex, find feeds on startup

2025-04-09 15:52:35 +02:00
parent 296a8fe8a8
commit f369b23d81
22 changed files with 538 additions and 356 deletions
--- a/app_urls/fetcher/views.py
+++ b/app_urls/fetcher/views.py
@@ -1,44 +1,16 @@
-from .tasks import background_task
+from .views_base import link_list, logs, log_db, trigger_task
+
 from django.core.paginator import Paginator
 from django.shortcuts import render, get_object_or_404
-from django.http import StreamingHttpResponse, JsonResponse, HttpResponse
-from django.contrib.auth.decorators import login_required
-import ollama
+from django.http import StreamingHttpResponse, JsonResponse
+from django.db.models import Q, Count
+from django.utils import timezone
+from django.utils.timezone import now, timedelta
 from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
+import ollama
 import os
-from .src.logger import get_logger
-logger = get_logger()
+#from datetime import timedelta

-####################################################################################################
-def trigger_task(request, task):
-    # Enqueue function in "default" queue
-    background_task.delay(task)  
-    return JsonResponse({"message": "Task has been enqueued!", "task": task})
-
-####################################################################################################
-def link_list(request):
-    # Base URL path
-    app_url = request.build_absolute_uri()
-    # Tasks
-    links_fetch = ["fetch_feeds", "fetch_parser", "fetch_search", "fetch_missingkids_5", "fetch_missingkids_all"]
-    links_process = ["process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all"]
-    # List of links
-    list_links = \
-        [ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
-        [ os.path.join(app_url, "logs", log_type) for log_type in ["debug", "info", "warning"] ] + \
-        [ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
-    # Json
-    return JsonResponse({"links": list_links })
-
-####################################################################################################
-def logs(request, log_type):
-    # Capture output: python manage.py rqstats
-    try:
-        with open( os.path.join( os.getenv("PATH_LOGS_DIRECTORY", "logs"), "{}.log".format(log_type) ), "r") as f:
-            file_content = f.read()
-    except Exception as e:
-        file_content = "Error reading logs for log type :{}".format(log_type)
-    return HttpResponse(file_content, content_type="text/plain")

 ####################################################################################################
 class OllamaClient():
@@ -57,13 +29,6 @@ class OllamaClient():
    
    def get_prompt(self):
        return "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
-        #return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
-        #return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
-        #return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
-        #return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
-        # return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
-        #return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
-

 def fetch_details(request, id):
    url_item = get_object_or_404(Urls, id=id)
@@ -83,7 +48,6 @@ def fetch_details(request, id):
    
    return StreamingHttpResponse(stream_response(), content_type="text/plain")

-
 def url_detail_view(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
@@ -114,13 +78,6 @@ def url_detail_view(request, id):
    return render(request, 'url_detail.html', context)

 ####################################################################################################
-from django.shortcuts import render
-from django.http import JsonResponse
-from django.db.models import Count
-from datetime import timedelta
-from django.utils import timezone
-from .models import Urls, UrlsSourceSearch
-
 def charts(request):
    return render(request, 'charts.html')

@@ -202,14 +159,7 @@ def urls_per_search(request):
    
    return JsonResponse(data)

-
-
 ####################################################################################################
-from django.shortcuts import render
-from .models import Urls, Search, Source
-from django.db.models import Q
-from django.utils.timezone import now, timedelta
-

 def filtered_urls(request):
    statuses = Urls.STATUS_ENUM.choices
@@ -342,4 +292,16 @@ def filtered_urls(request):
    }

    return render(request, 'filtered_urls.html', context)
+####################################################################################################
+
+def content_generation(request):
+    # https://fetcher.matitos.org/urls/?per_page=100&days=1&valid_content=True&min_sources=1&search=13&status=all&language=all&source=all
+    '''
+    # Get list of URLs ID
+    selected_urls = request.GET.getlist('urls', [])
+    
+    # Sample URLs
+    selected_urls = [13460, 13455, 13454, 13452, 13210]
+    '''
+
 ####################################################################################################