281 lines
12 KiB
Python
281 lines
12 KiB
Python
from .views_base import link_list, logs, log_db, notify_status #, trigger_task,
|
|
|
|
from django.core.paginator import Paginator
|
|
from django.shortcuts import render, get_object_or_404
|
|
from django.http import StreamingHttpResponse, JsonResponse
|
|
from django.db.models import Q, Count
|
|
from django.utils import timezone
|
|
from django.utils.timezone import now, timedelta
|
|
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
|
|
from .src.llm import OllamaClient
|
|
import json
|
|
|
|
|
|
####################################################################################################
|
|
|
|
def llm(request):
|
|
if request.method == 'POST':
|
|
try:
|
|
body_data = json.loads(request.body)
|
|
message = body_data.get('message')
|
|
model = body_data.get('model')
|
|
|
|
if message is None:
|
|
return JsonResponse({'error': 'No message found in request'}, status=400)
|
|
|
|
return StreamingHttpResponse(OllamaClient().generate_stream(model, message), content_type="text/plain")
|
|
except json.JSONDecodeError:
|
|
return JsonResponse({'error': 'Invalid JSON'}, status=400)
|
|
|
|
return JsonResponse({'error': 'Only POST method allowed'}, status=405)
|
|
|
|
def url_detail_view(request, id):
|
|
url_item = get_object_or_404(Urls, id=id)
|
|
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
|
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
|
|
|
if (url_item.status == Urls.STATUS_ENUM.DUPLICATE):
|
|
url_canonical = UrlsDuplicate.objects.get(id_url_duplicated=url_item).id_url_canonical
|
|
else:
|
|
url_canonical = None
|
|
|
|
try:
|
|
url_content = UrlContent.objects.get(pk=id)
|
|
except UrlContent.DoesNotExist:
|
|
url_content = {}
|
|
|
|
ollama = OllamaClient()
|
|
try:
|
|
# prompt_content = "{}\n{}\n{}".format(url_content.title, url_content.description, url_content.content)
|
|
prompt_content = "{}".format(url_content.content)
|
|
except Exception as e:
|
|
prompt_content = ""
|
|
|
|
context = {
|
|
'url_item': url_item,
|
|
'sources': url_sources,
|
|
'searches': url_searches,
|
|
'models': ollama.get_models(),
|
|
'prompt': ollama.get_prompt(prompt_content),
|
|
'url_content': url_content,
|
|
'url_canonical': url_canonical,
|
|
}
|
|
return render(request, 'url_detail.html', context)
|
|
|
|
####################################################################################################
|
|
def charts(request):
|
|
return render(request, 'charts.html')
|
|
|
|
def urls_by_fetch_date(request):
|
|
# Get the filtering date parameter
|
|
days = float(request.GET.get('days', 30)) # Default is 30 days
|
|
start_date = timezone.now() - timedelta(days=days)
|
|
|
|
# Count the number of URLs grouped by fetch date
|
|
urls_data = Urls.objects.filter(ts_fetch__gte=start_date) \
|
|
.values('ts_fetch__date') \
|
|
.annotate(count=Count('id')) \
|
|
.order_by('ts_fetch__date')
|
|
|
|
# Format data to return as JSON
|
|
data = {
|
|
'labels': [item['ts_fetch__date'] for item in urls_data],
|
|
'values': [item['count'] for item in urls_data],
|
|
}
|
|
|
|
return JsonResponse(data)
|
|
|
|
def urls_per_status(request):
|
|
# Get the filtering date parameter
|
|
days = float(request.GET.get('days', 30)) # Default is 30 days
|
|
start_date = timezone.now() - timedelta(days=days)
|
|
|
|
# Count the number of URLs grouped by status within the date range
|
|
urls_data = Urls.objects.filter(ts_fetch__gte=start_date) \
|
|
.values('status') \
|
|
.annotate(count=Count('id')) \
|
|
.order_by('status')
|
|
|
|
# Format data for JSON
|
|
data = {
|
|
'labels': [item['status'] for item in urls_data],
|
|
'values': [item['count'] for item in urls_data],
|
|
}
|
|
|
|
return JsonResponse(data)
|
|
|
|
def urls_per_source(request):
|
|
# Get the filtering date parameter
|
|
days = float(request.GET.get('days', 30)) # Default is 30 days
|
|
start_date = timezone.now() - timedelta(days=days)
|
|
|
|
# Count the number of URLs grouped by source
|
|
urls_data = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_source__source') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_source__source')
|
|
|
|
# Format data for JSON
|
|
data = {
|
|
'labels': [item['id_source__source'] for item in urls_data],
|
|
'values': [item['count'] for item in urls_data],
|
|
}
|
|
|
|
return JsonResponse(data)
|
|
|
|
def urls_per_search(request):
|
|
# Get the filtering date parameter
|
|
days = float(request.GET.get('days', 30)) # Default is 30 days
|
|
start_date = timezone.now() - timedelta(days=days)
|
|
|
|
# Count the number of URLs grouped by search
|
|
urls_data = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_search__search') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_search__search')
|
|
|
|
# Format data for JSON
|
|
data = {
|
|
'labels': [item['id_search__search'] for item in urls_data],
|
|
'values': [item['count'] for item in urls_data],
|
|
}
|
|
|
|
return JsonResponse(data)
|
|
|
|
####################################################################################################
|
|
|
|
def filtered_urls(request):
|
|
statuses = Urls.STATUS_ENUM.choices
|
|
searches = Search.objects.all()
|
|
sources = Source.objects.all()
|
|
languages = list(UrlContent.objects.distinct('language').values_list('language', flat=True)) # TODO: Cache languages
|
|
languages = ["Unknown"] + [l for l in languages if l is not None]
|
|
valid_contents = ["True", "False", "Unknown"]
|
|
|
|
# Get selected parameters
|
|
selected_status = request.GET.getlist('status', ["null"])
|
|
selected_search = request.GET.getlist('search', ["null"])
|
|
selected_source = request.GET.getlist('source', ["null"])
|
|
selected_language = request.GET.getlist('language', ["null"])
|
|
selected_valid_contents = request.GET.getlist('valid_content', ["null"])
|
|
selected_min_sources = int(request.GET.get('min_sources', 1))
|
|
selected_days = request.GET.get("days", 30)
|
|
per_page = request.GET.get('per_page', 100) # Default is X URLs per page
|
|
page_number = request.GET.get('page') # Get the current page number
|
|
|
|
# Override with default filters? [Case: no params update on URL] -> Only on "Home" click, or "Next page"
|
|
if (len(request.GET.keys()) == 0) or ((len(request.GET.keys()) == 1) and ("page" in request.GET.keys())):
|
|
selected_status = ["all"]
|
|
selected_search = ["all"]
|
|
selected_source = ["all"]
|
|
selected_language = ["all"]
|
|
selected_valid_contents = ["all"]
|
|
else:
|
|
# All elements
|
|
all_status = [str(status[0]) for status in statuses]
|
|
all_search = [str(search.id) for search in searches]
|
|
all_source = [str(source.id) for source in sources]
|
|
all_languages = languages
|
|
all_valid_contents = valid_contents
|
|
|
|
# Non-defult parameters, if list with all elements, replace with "all" and avoid heavy query
|
|
selected_status = ["all"] if (set(selected_status) == set(all_status)) else selected_status
|
|
selected_search = ["all"] if (set(selected_search) == set(all_search)) else selected_search
|
|
selected_source = ["all"] if (set(selected_source) == set(all_source)) else selected_source
|
|
selected_language = ["all"] if (set(selected_language) == set(all_languages)) else selected_language
|
|
selected_valid_contents = ["all"] if (set(selected_valid_contents) == set(all_valid_contents)) else selected_valid_contents
|
|
|
|
# Filter URLs based on selected filters
|
|
if any( 'null' in l for l in [selected_status, selected_search, selected_source, selected_language, selected_valid_contents] ):
|
|
urls = []
|
|
else:
|
|
# Filter by date
|
|
query = Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
|
|
# Additional filters
|
|
if ("all" not in selected_status):
|
|
query &= Q(status__in=selected_status)
|
|
if ("all" not in selected_source):
|
|
query &= Q(urlssourcesearch__id_source__in=selected_source)
|
|
if ("all" not in selected_search):
|
|
query &= Q(urlssourcesearch__id_search__in=selected_search)
|
|
if ("all" not in selected_language):
|
|
# URLs with selected languages
|
|
subquery = Q(urlcontent__language__in=selected_language)
|
|
if ("Unknown" in selected_language):
|
|
# URLs with NULL language
|
|
subquery |= Q(urlcontent__language__isnull=True)
|
|
# URLs with no UrlContent record at all (similar to URLs with NULL language)
|
|
subquery |= Q(urlcontent__id_url__isnull=True)
|
|
# Update query
|
|
query &= (subquery)
|
|
if ("all" not in selected_valid_contents):
|
|
# Boolean array
|
|
bool_array = []
|
|
if ('True' in selected_valid_contents):
|
|
bool_array.append(True)
|
|
if ('False' in selected_valid_contents):
|
|
bool_array.append(False)
|
|
# URLs with selected valid_contents
|
|
subquery = Q(urlcontent__valid_content__in=bool_array)
|
|
if ("Unknown" in selected_valid_contents):
|
|
# URLs with NULL valid_content
|
|
subquery |= Q(urlcontent__valid_content__isnull=True)
|
|
# URLs with no UrlContent record at all (similar to URLs with NULL valid_content)
|
|
subquery |= Q(urlcontent__id_url__isnull=True)
|
|
# Update query
|
|
query &= (subquery)
|
|
|
|
if (selected_min_sources > 1):
|
|
query &= Q(pk__in=UrlsSourceSearch.objects.values('id_url').annotate(search_count=Count('id_source', distinct=True)).filter(search_count__gte=selected_min_sources).values('id_url'))
|
|
|
|
# Run query
|
|
urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch')
|
|
|
|
# Pagination
|
|
paginator = Paginator(urls, per_page) # Paginate the filtered URLs
|
|
page_obj = paginator.get_page(page_number) # Get the current page object
|
|
|
|
# Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
|
|
sources_map = {
|
|
url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
|
}
|
|
searches_map = {
|
|
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
|
}
|
|
url_content_map = {
|
|
url.id: UrlContent.objects.filter(pk=url).first() for url in page_obj.object_list
|
|
}
|
|
# Custom replace search type text
|
|
for s in searches:
|
|
s.type = s.type.replace("rss_feed", "rss").replace("url_host", "url").replace("keyword_search", "keyword")
|
|
|
|
context = {
|
|
'urls': page_obj, # Pass the paginated URLs
|
|
'per_page': per_page, # Send per_page value for dynamic pagination
|
|
'statuses': statuses,
|
|
'searches': sorted(searches, key=lambda x: (x.type, x.search)),
|
|
'sources': sorted(sources, key=lambda x: x.source),
|
|
'languages': sorted(languages, key=lambda x: (x is None, x)),
|
|
'valid_contents': valid_contents,
|
|
# Selection
|
|
'selected_status': selected_status,
|
|
'selected_search': selected_search,
|
|
'selected_source': selected_source,
|
|
'selected_language': selected_language,
|
|
'selected_valid_contents': selected_valid_contents,
|
|
"selected_min_sources": selected_min_sources,
|
|
"selected_days": selected_days,
|
|
# Map
|
|
"sources_map": sources_map,
|
|
"searches_map": searches_map,
|
|
"url_content_map": url_content_map,
|
|
# "charts": charts,
|
|
# "list_per_page": [15, 100, 500],
|
|
# "list_days_text": ([0.25, 1, 7, 30, 365], ["Last 6 hours", "Last 24 hours", "Last 7 days", "Last 30 days", "Last 365 days"])
|
|
}
|
|
|
|
return render(request, 'filtered_urls.html', context)
|
|
|
|
#################################################################################################### |