Refactor searches, env vars fetcher config, urls webpage update

This commit is contained in:
Luciano Gervasoni
2025-04-02 18:45:43 +02:00
parent 077219fcb6
commit 84da104dc8
22 changed files with 676 additions and 1521 deletions

View File

@@ -23,9 +23,9 @@ def link_list(request):
# Admin panel
"http://localhost:8000/admin",
# Logs
"http://localhost:8000/logs_debug",
"http://localhost:8000/logs_info",
"http://localhost:8000/logs_error",
"http://localhost:8000/logs/debug",
"http://localhost:8000/logs/info",
"http://localhost:8000/logs/error",
# URLs
"http://localhost:8000/urls",
# Charts
@@ -36,17 +36,13 @@ def link_list(request):
return JsonResponse({"links": list_links })
####################################################################################################
def logs_error(request):
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs_info(request):
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs_debug(request):
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
file_content = f.read()
def logs(request, log_type):
# Capture output: python manage.py rqstats
try:
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_{}.log".format(log_type)), "r") as f:
file_content = f.read()
except Exception as e:
file_content = "Error reading logs for log type :{}".format(log_type)
return HttpResponse(file_content, content_type="text/plain")
####################################################################################################
@@ -208,48 +204,77 @@ from .models import Urls, Search, Source
from django.db.models import Q
from django.utils.timezone import now, timedelta
def filtered_urls(request):
statuses = Urls.STATUS_ENUM.choices
searches = Search.objects.all()
sources = Source.objects.all()
# TODO: Cache languages, update once every N
languages = UrlContent.objects.distinct('language').values_list('language', flat=True)
# languages = [l for l in languages if l is not None]
languages = list(UrlContent.objects.distinct('language').values_list('language', flat=True))
# Null for visualization
languages = ["Null"] + [l for l in languages if l is not None]
# Get selected parameters
selected_status = request.GET.getlist('status')
selected_search = request.GET.getlist('search')
selected_source = request.GET.getlist('source')
selected_language = request.GET.getlist('language')
selected_status = request.GET.getlist('status', ["null"])
selected_search = request.GET.getlist('search', ["null"])
selected_source = request.GET.getlist('source', ["null"])
selected_language = request.GET.getlist('language', ["null"])
selected_days = request.GET.get("days", 30)
per_page = request.GET.get('per_page', 100) # Default is X URLs per page
page_number = request.GET.get('page') # Get the current page number
all_status = [str(status[0]) for status in statuses]
all_search = [str(search.id) for search in searches]
all_source = [str(source.id) for source in sources]
all_languages = languages
# Override with default filters? [Case: no params update on URL] -> Only on "Home" click, or "Next page"
if (len(request.GET.keys()) == 0) or ((len(request.GET.keys()) == 1) and ("page" in request.GET.keys())):
selected_status = [str(status[0]) for status in statuses]
selected_search = [str(search.id) for search in searches]
selected_source = [str(source.id) for source in sources]
selected_language = languages
selected_status = ["all"]
selected_search = ["all"]
selected_source = ["all"]
selected_language = ["all"]
# print(set(selected_status), set(all_status))
"""
# List of TODO remove...
if (set(selected_status) == set(all_status)):
selected_status = ["all"]
if (set(selected_search) == set(all_search)):
selected_search = ["all"]
if (set(selected_source) == set(all_source)):
selected_source = ["all"]
if (set(selected_language) == set(languages)):
selected_language = ["all"]"
"""
# Filter URLs based on selected filters
if ('' in selected_status) or ('' in selected_search) or ('' in selected_source):
if ('null' in selected_status) or ('null' in selected_search) or ('null' in selected_source) or ('null' in selected_language):
urls = []
else:
query = Q(urlssourcesearch__id_source__in=selected_source) & \
Q(urlssourcesearch__id_search__in=selected_search) & \
Q(status__in=selected_status) & \
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
if selected_language:
query &= Q(urlcontent__language__in=selected_language)
# Filter by date
query = Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
# Additional filters
if ("all" not in selected_status):
query &= Q(status__in=selected_status)
if ("all" not in selected_source):
query &= Q(urlssourcesearch__id_source__in=selected_source)
if ("all" not in selected_search):
query &= Q(urlssourcesearch__id_search__in=selected_search)
if ("all" not in selected_language):
# URLs with selected languages
subquery = Q(urlcontent__language__in=selected_language)
if ("Null" in selected_language):
# URLs with NULL language
subquery |= Q(urlcontent__language__isnull=True)
# URLs with no UrlContent record at all (similar to URLs with NULL language)
subquery |= Q(urlcontent__id_url__isnull=True)
# Update query
query &= (subquery)
urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch')
# Custom replace search type
for s in searches:
s.type = s.type.replace("rss_feed", "rss").replace("url_host", "url").replace("keyword_search", "keyword")
# Pagination
paginator = Paginator(urls, per_page) # Paginate the filtered URLs
page_obj = paginator.get_page(page_number) # Get the current page object
@@ -264,6 +289,9 @@ def filtered_urls(request):
url_content_map = {
url.id: UrlContent.objects.filter(pk=url).first() for url in page_obj.object_list
}
# Custom replace search type text
for s in searches:
s.type = s.type.replace("rss_feed", "rss").replace("url_host", "url").replace("keyword_search", "keyword")
context = {
'urls': page_obj, # Pass the paginated URLs