Process all missing kids task, urls views cleaner, adding language filter WIP
This commit is contained in:
@@ -61,9 +61,9 @@ class DB_Handler():
|
||||
# URL
|
||||
obj_url, created = Urls.objects.get_or_create(url=url)
|
||||
if (created):
|
||||
logger.info("CREATED: {}".format(obj_url.url))
|
||||
logger.debug("Inserted: {}".format(obj_url.url))
|
||||
else:
|
||||
logger.info("NOT CREATED: {}".format(obj_url.url))
|
||||
logger.debug("Not inserted: {}".format(obj_url.url))
|
||||
# (URL, source, search)
|
||||
UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
|
||||
except Exception as e:
|
||||
@@ -76,7 +76,7 @@ class DB_Handler():
|
||||
cache.set("insert_{}".format(url), True, timeout=self._cache_timeout_insert_url)
|
||||
cache.set("insert_{}{}{}".format(url, obj_source.source, obj_search.search), True, timeout=self._cache_timeout_insert_url)
|
||||
|
||||
logger.info("Inserted #{} raw URLs".format(len(urls_to_insert)))
|
||||
logger.info("Inserted #{} raw URLs, Source-Search {}-{}".format(len(urls_to_insert), obj_source.source, obj_search.search))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Exception inserting raw URLs: {}\n{}".format(e, traceback.format_exc()))
|
||||
@@ -243,15 +243,19 @@ class DB_Handler():
|
||||
except Exception as e:
|
||||
logger.warning("Exception processing error URLs: {}\n{}".format(e, traceback.format_exc()))
|
||||
|
||||
def process_missing_kids_urls(self, batch_size):
|
||||
def process_missing_kids_urls(self, batch_size=None):
|
||||
try:
|
||||
logger.debug("Processing MissingKids URLs")
|
||||
logger.debug("Processing MissingKids URLs - batch_size={}".format(batch_size))
|
||||
# Get batch of URLs, %missingkids.org/poster% AND (status='valid' OR status='invalid')
|
||||
missingkids_urls = Urls.objects.order_by("-ts_fetch").filter(
|
||||
(Q(url__contains="missingkids.org/poster") | Q(url__contains="missingkids.org/new-poster"))
|
||||
&
|
||||
(Q(status=Urls.STATUS_ENUM.VALID) | Q(status=Urls.STATUS_ENUM.INVALID) | Q(status=Urls.STATUS_ENUM.ERROR))
|
||||
)[:batch_size]
|
||||
)
|
||||
|
||||
# Get batch size
|
||||
if (batch_size is not None):
|
||||
missingkids_urls = missingkids_urls[:batch_size]
|
||||
|
||||
# Per URL
|
||||
for obj_url in missingkids_urls:
|
||||
|
||||
@@ -64,7 +64,7 @@ def process_url(url):
|
||||
if ("Website protected with PerimeterX" in str(e.args)):
|
||||
logger.debug("TODO: process_url Implement bypass PerimeterX")
|
||||
|
||||
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args)))
|
||||
logger.debug("ArticleException for input URL {}\n{}".format(url, str(e.args)))
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("Exception for input URL {}\n{}".format(url, str(e)))
|
||||
|
||||
@@ -56,6 +56,12 @@ def process_missing_kids_urls(batch_size=50):
|
||||
DB_Handler().process_missing_kids_urls(batch_size=batch_size)
|
||||
logger.info("Task completed: {}".format(task))
|
||||
|
||||
@job('default')
|
||||
def process_missing_kids_urls_all(batch_size=None):
|
||||
task = "Process Missing Kids URLs ALL"
|
||||
logger.info("Task triggered: {}".format(task))
|
||||
DB_Handler().process_missing_kids_urls(batch_size=batch_size)
|
||||
logger.info("Task completed: {}".format(task))
|
||||
|
||||
|
||||
|
||||
@@ -76,7 +82,10 @@ def background_task(process_type: str):
|
||||
# FetchMissingKids().run()
|
||||
elif ("process_" in process_type):
|
||||
# Batch size encoded in URL
|
||||
batch_size = int(process_type.split("_")[-1])
|
||||
try:
|
||||
batch_size = int(process_type.split("_")[-1])
|
||||
except Exception as e:
|
||||
batch_size = None
|
||||
# Task type
|
||||
if ("process_raw_urls" in process_type):
|
||||
DB_Handler().process_raw_urls(batch_size=batch_size)
|
||||
@@ -87,7 +96,6 @@ def background_task(process_type: str):
|
||||
else:
|
||||
logger.info("Task unknown!: {}".format(process_type))
|
||||
|
||||
|
||||
'''
|
||||
# Selenium based
|
||||
elif (process_type == "fetch_missing_kids_reduced"):
|
||||
|
||||
@@ -60,6 +60,7 @@
|
||||
<div class="filter-container">
|
||||
<label for="daysFilter">Select Number of Days:</label>
|
||||
<select id="daysFilter">
|
||||
<option value="0.25">Last 6 Hours</option>
|
||||
<option value="1">Last 24 Hours</option>
|
||||
<option value="3">Last 3 Days</option>
|
||||
<option value="7" selected>Last 7 Days</option>
|
||||
@@ -102,22 +103,22 @@
|
||||
|
||||
function fetchDataAndRenderCharts(days) {
|
||||
// Fetch and render the URL Fetch Date chart
|
||||
$.getJSON(`/api/urls-by-fetch-date/?days=${days}`, function (data) {
|
||||
$.getJSON(`/urls-by-fetch-date/?days=${days}`, function (data) {
|
||||
renderUrlFetchDateChart(data);
|
||||
});
|
||||
|
||||
// Fetch and render the URL Status chart (with dynamic date filtering)
|
||||
$.getJSON(`/api/urls-per-status/?days=${days}`, function (data) {
|
||||
$.getJSON(`/urls-per-status/?days=${days}`, function (data) {
|
||||
renderUrlStatusChart(data);
|
||||
});
|
||||
|
||||
// Fetch and render the URLs per Source chart
|
||||
$.getJSON(`/api/urls-per-source/?days=${days}`, function (data) {
|
||||
$.getJSON(`/urls-per-source/?days=${days}`, function (data) {
|
||||
renderUrlsPerSourceChart(data);
|
||||
});
|
||||
|
||||
// Fetch and render the URLs per Search chart
|
||||
$.getJSON(`/api/urls-per-search/?days=${days}`, function (data) {
|
||||
$.getJSON(`/urls-per-search/?days=${days}`, function (data) {
|
||||
renderUrlsPerSearchChart(data);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -48,15 +48,17 @@ a:hover {
|
||||
|
||||
/* Sidebar */
|
||||
.sidebar {
|
||||
width: 250px;
|
||||
padding: 10px;
|
||||
min-width: 110px; /* Minimum width */
|
||||
max-width: 200px; /* Maximum width */
|
||||
width: 100%; /* Make it take full width within the defined min and max */
|
||||
padding: 5px;
|
||||
box-sizing: border-box; /* Ensure padding doesn't increase the overall width */
|
||||
transition: width 0.3s ease-in-out; /* Smooth transition for resizing */
|
||||
background-color: #f4f4f4;
|
||||
margin-right: 20px;
|
||||
overflow-x: hidden;
|
||||
white-space: normal;
|
||||
word-wrap: break-word;
|
||||
word-break: break-word;
|
||||
transition: background 0.1s ease, color 0.1s ease;
|
||||
box-sizing: border-box;
|
||||
word-wrap: break-word; /* Allow wrapping of long words */
|
||||
overflow-wrap: break-word; /* Ensures wrapping across browsers */
|
||||
white-space: normal; /* Ensure normal word wrapping */
|
||||
}
|
||||
|
||||
.dark-mode .sidebar {
|
||||
@@ -65,7 +67,8 @@ a:hover {
|
||||
|
||||
/* Sidebar Headers */
|
||||
.sidebar h3 {
|
||||
margin-top: 5px;
|
||||
margin-top: 15px;
|
||||
margin-bottom: 2px;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
@@ -141,6 +144,41 @@ input[type="checkbox"] {
|
||||
|
||||
|
||||
|
||||
/* PAGINATION */
|
||||
.pagination-container {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
font-family: Arial, sans-serif;
|
||||
}
|
||||
.pagination-link {
|
||||
padding: 8px 15px;
|
||||
background-color: #007bff;
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
border-radius: 25px;
|
||||
font-size: 14px;
|
||||
display: inline-block;
|
||||
transition: background-color 0.3s ease, transform 0.2s ease;
|
||||
}
|
||||
.pagination-link:hover {
|
||||
background-color: #0056b3;
|
||||
transform: scale(1.1);
|
||||
}
|
||||
.pagination-link:active {
|
||||
background-color: #003366;
|
||||
transform: scale(0.95);
|
||||
}
|
||||
.first-page, .last-page {
|
||||
font-weight: bold;
|
||||
}
|
||||
.prev-page, .next-page {
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ROUNDED SWITCH*/
|
||||
/* Hide the default checkbox */
|
||||
.checkbox-slider {
|
||||
@@ -232,7 +270,7 @@ input[type="checkbox"] {
|
||||
<option value="100" {% if per_page|stringformat:"s" == '100' %}selected{% endif %}>100</option>
|
||||
<option value="500" {% if per_page|stringformat:"s" == '500' %}selected{% endif %}>500</option>
|
||||
</select>
|
||||
<br><br>
|
||||
<br>
|
||||
|
||||
<!-- Filter by Time Range -->
|
||||
<h3>Fetch Date</h3>
|
||||
@@ -249,10 +287,23 @@ input[type="checkbox"] {
|
||||
<option value="90" {% if selected_days|stringformat:"s" == '90' %}selected{% endif %}>Last 90 days</option>
|
||||
<option value="365" {% if selected_days|stringformat:"s" == '365' %}selected{% endif %}>Last 365 days</option>
|
||||
</select>
|
||||
<br><br>
|
||||
<br>
|
||||
|
||||
<!-- Filter by Status -->
|
||||
<h3>Status</h3>
|
||||
<!--
|
||||
<label for="toggle-all-checkbox">
|
||||
<input type="checkbox" id="toggle-all-checkbox" class="toggle-all-checkbox"> Toggle All
|
||||
</label><br>
|
||||
{% for status in statuses %}
|
||||
<label>
|
||||
<input type="checkbox" name="status" value="{{ status.0 }}"
|
||||
{% if status.0 in selected_status %}checked{% endif %}
|
||||
class="status-checkbox">
|
||||
{{ status.1 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
-->
|
||||
<button type="button" class="toggle-all-btn" data-toggle="status">Toggle All</button><br>
|
||||
{% for status in statuses %}
|
||||
<label>
|
||||
@@ -261,7 +312,7 @@ input[type="checkbox"] {
|
||||
{{ status.1 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
<br><br>
|
||||
|
||||
|
||||
<!-- Filter by Search -->
|
||||
<h3>Search</h3>
|
||||
@@ -270,10 +321,10 @@ input[type="checkbox"] {
|
||||
<label>
|
||||
<input type="checkbox" name="search" value="{{ search.id }}"
|
||||
{% if search.id|stringformat:"s" in selected_search %}checked{% endif %}>
|
||||
[{{ search.type }}] {{ search.search|truncatechars:70 }}
|
||||
[{{ search.type }}] {{ search.search|truncatechars:50 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
<br><br>
|
||||
|
||||
|
||||
<!-- Filter by Source -->
|
||||
<h3>Source</h3>
|
||||
@@ -282,10 +333,21 @@ input[type="checkbox"] {
|
||||
<label>
|
||||
<input type="checkbox" name="source" value="{{ source.id }}"
|
||||
{% if source.id|stringformat:"s" in selected_source %}checked{% endif %}>
|
||||
{{ source.source|truncatechars:70 }}
|
||||
{{ source.source|truncatechars:50 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
<br><br>
|
||||
|
||||
<!-- Filter by language -->
|
||||
<h3>Language</h3>
|
||||
<button type="button" class="toggle-all-btn" data-toggle="language">Toggle All</button><br>
|
||||
{% for lang in languages %}
|
||||
<label>
|
||||
<input type="checkbox" name="language" value="{{ lang }}"
|
||||
{% if lang|stringformat:"s" in selected_lang %}checked{% endif %}>
|
||||
{{ lang|truncatechars:50 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
|
||||
</form>
|
||||
</div>
|
||||
|
||||
@@ -300,6 +362,8 @@ input[type="checkbox"] {
|
||||
<th>Fetch Date</th>
|
||||
<th>Search</th>
|
||||
<th>Source</th>
|
||||
<th>Valid content?</th>
|
||||
<th>Language</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -349,7 +413,18 @@ input[type="checkbox"] {
|
||||
{% endif %}
|
||||
{% endwith %}
|
||||
</td>
|
||||
<td>
|
||||
{% with url_content_map|dict_get:url.id as content %}
|
||||
{{ content.valid_content }}
|
||||
{% endwith %}
|
||||
</td>
|
||||
<td>
|
||||
{% with url_content_map|dict_get:url.id as content %}
|
||||
{{ content.language }}
|
||||
{% endwith %}
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
{% empty %}
|
||||
<tr>
|
||||
<td colspan="5">No URLs found for the selected filters.</td>
|
||||
@@ -360,7 +435,8 @@ input[type="checkbox"] {
|
||||
|
||||
<!-- Pagination Controls -->
|
||||
<div class="pagination">
|
||||
<div class="pagination-controls">
|
||||
<!-- <div class="pagination-controls"> -->
|
||||
<div class="pagination-container" style="margin-top: 20px;margin-bottom: 20px;">
|
||||
{% if urls.has_previous %}
|
||||
<a href="#" class="pagination-link" data-page="1">« First</a>
|
||||
<a href="#" class="pagination-link" data-page="{{ urls.previous_page_number }}">Previous</a>
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
<script src="https://code.jquery.com/jquery-3.6.4.min.js"></script>
|
||||
<!-- Markdown -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<!-- Bootstrap JS -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||
|
||||
<!-- Custom Styles -->
|
||||
<style>
|
||||
@@ -34,10 +36,63 @@
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
</style>
|
||||
.table {
|
||||
table-layout: auto;
|
||||
width: 100%;
|
||||
}
|
||||
th {
|
||||
white-space: nowrap;
|
||||
}
|
||||
td {
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
|
||||
}
|
||||
|
||||
/* Sidebar */
|
||||
.sidebar {
|
||||
min-width: 110px; /* Minimum width */
|
||||
max-width: 200px; /* Maximum width */
|
||||
width: 100%; /* Make it take full width within the defined min and max */
|
||||
padding: 5px;
|
||||
box-sizing: border-box; /* Ensure padding doesn't increase the overall width */
|
||||
transition: width 0.3s ease-in-out; /* Smooth transition for resizing */
|
||||
background-color: #f4f4f4;
|
||||
box-sizing: border-box;
|
||||
word-wrap: break-word; /* Allow wrapping of long words */
|
||||
overflow-wrap: break-word; /* Ensures wrapping across browsers */
|
||||
white-space: normal; /* Ensure normal word wrapping */
|
||||
}
|
||||
|
||||
.dark-mode .sidebar {
|
||||
background-color: #1e1e1e;
|
||||
}
|
||||
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<script>
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
//////////////////////////////////////////////
|
||||
// Timestamp to local timezone
|
||||
document.querySelectorAll(".ts-fetch").forEach(element => {
|
||||
let utcDate = element.getAttribute("data-ts"); // Get timestamp from data attribute
|
||||
let options = { year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit', second: '2-digit', hour12:false};
|
||||
if (utcDate) {
|
||||
let localDate = new Date(utcDate).toLocaleString("en-GB", options); // Convert to local timezone
|
||||
element.textContent = localDate; // Update the text content
|
||||
}
|
||||
});
|
||||
document.querySelectorAll(".ts-publish").forEach(element => {
|
||||
let utcDate = element.getAttribute("data-ts"); // Get timestamp from data attribute
|
||||
let options = { year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit', second: '2-digit', hour12:false};
|
||||
if (utcDate) {
|
||||
let localDate = new Date(utcDate).toLocaleString("en-GB", options); // Convert to local timezone
|
||||
element.textContent = localDate; // Update the text content
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
function fetchDetails(urlId, url) {
|
||||
// Show the loading spinner
|
||||
@@ -54,15 +109,13 @@
|
||||
}
|
||||
|
||||
// Fetch URL
|
||||
let fetchUrl = `/api/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
|
||||
let fetchUrl = `/urls/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
|
||||
|
||||
let resultContainer = $("#chat-output");
|
||||
resultContainer.html(""); // Clear previous content before fetching
|
||||
|
||||
let fetchButton = $("button[onclick^='fetchDetails']"); // Select the button
|
||||
fetchButton.prop("disabled", true); // Disable button
|
||||
|
||||
|
||||
fetch(fetchUrl/*, {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
@@ -89,7 +142,6 @@
|
||||
fetchButton.prop("disabled", false); // Re-enable button when done
|
||||
return;
|
||||
}
|
||||
|
||||
// Decode the streamed chunk
|
||||
let chunk = decoder.decode(value);
|
||||
// Append to the accumulated text
|
||||
@@ -111,14 +163,21 @@
|
||||
// Hide the loading spinner after request is complete
|
||||
document.getElementById("loading-spinner").style.display = "none";
|
||||
});
|
||||
;
|
||||
}
|
||||
</script>
|
||||
<body>
|
||||
|
||||
<div class="sidebar">
|
||||
<div class="button-container">
|
||||
<button id="homeButton" class="home-button">🏠</button>
|
||||
<button id="themeToggle" class="theme-button">🌙</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<!-- Main Content -->
|
||||
<div class="container mt-4">
|
||||
<h2>URL Details</h2>
|
||||
<!-- <h2>URL Details</h2> -->
|
||||
<table class="table table-bordered">
|
||||
<tr>
|
||||
<th>URL</th>
|
||||
@@ -126,7 +185,7 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Fetch Date</th>
|
||||
<td>{{ url_item.ts_fetch }} UTC</td>
|
||||
<td> <span class="ts-fetch" data-ts="{{ url_item.ts_fetch|date:'c' }}"></span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Source</th>
|
||||
@@ -142,59 +201,59 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<th>URL host</th>
|
||||
<td><a href="{{ url_content.url_host|safe }}" target="_blank">{{ url_content.url_host }}</a></td>
|
||||
<td> <a href="{{ url_content.url_host|safe }}" target="_blank">{{ url_content.url_host }}</a> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Site name</th>
|
||||
<td>{{ url_content.site_name }}</td>
|
||||
<td>{{ url_content.site_name|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Published Date</th>
|
||||
<td>{{ url_content.date_published }} UTC</td>
|
||||
<td> <span class="ts-publish" data-ts="{{ url_content.date_published|date:'c' }}"></span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Valid news article content?</th>
|
||||
<th>Valid news content?</th>
|
||||
<td>{{ url_content.valid_content }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Tags</th>
|
||||
<td>{{ url_content.tags }}</td>
|
||||
<td>{{ url_content.tags|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Authors</th>
|
||||
<td>{{ url_content.authors }}</td>
|
||||
<td>{{ url_content.authors|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Keywords</th>
|
||||
<td>{{ url_content.keywords }}</td>
|
||||
<td>{{ url_content.keywords|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Language</th>
|
||||
<td>{{ url_content.language }}</td>
|
||||
<td>{{ url_content.language|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Main image</th>
|
||||
<td><a href="{{ url_content.image_main_url|safe }}" target="_blank">{{ url_content.image_main_url }}</a></td>
|
||||
<td><a href="{{ url_content.image_main_url|safe }}" target="_blank">{{ url_content.image_main_url|default:"" }}</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Image URLs</th>
|
||||
<td>{{ url_content.image_urls }}</td>
|
||||
<td>{{ url_content.image_urls|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Video URLs</th>
|
||||
<td>{{ url_content.videos_url }}</td>
|
||||
<td>{{ url_content.videos_url|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Title</th>
|
||||
<td>{{ url_content.title }}</td>
|
||||
<td>{{ url_content.title|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Description</th>
|
||||
<td>{{ url_content.description }}</td>
|
||||
<td>{{ url_content.description|default:"" }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Content</th>
|
||||
<td>{{ url_content.content }}</td>
|
||||
<td>{{ url_content.content|default:"" }}</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
@@ -232,9 +291,6 @@
|
||||
|
||||
</div>
|
||||
|
||||
<!-- Bootstrap JS -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||
|
||||
{% block extra_js %}{% endblock %}
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -18,9 +18,9 @@ urlpatterns = [
|
||||
path('urls/<int:id>/', views.url_detail_view, name='url_detail'),
|
||||
path('urls/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
|
||||
#
|
||||
path('url/', views.urls, name='url_detail'),
|
||||
path('url/<int:id>/', views.url_detail_view, name='url_detail'),
|
||||
path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
|
||||
#path('url/', views.urls, name='url_detail'),
|
||||
#path('url/<int:id>/', views.url_detail_view, name='url_detail'),
|
||||
#path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
|
||||
#
|
||||
path('task/<str:task>', views.trigger_task, name='trigger_task'),
|
||||
]
|
||||
|
||||
@@ -1,108 +1,53 @@
|
||||
# import django_rq
|
||||
from .tasks import background_task
|
||||
from django.http import JsonResponse
|
||||
from django.core.paginator import Paginator
|
||||
from django.shortcuts import render, get_object_or_404
|
||||
from django.http import StreamingHttpResponse, JsonResponse, HttpResponse
|
||||
import ollama
|
||||
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
|
||||
import os
|
||||
|
||||
####################################################################################################
|
||||
def trigger_task(request, task):
|
||||
# View that enqueues a task
|
||||
|
||||
# Enqueue function in "default" queue
|
||||
background_task.delay(task)
|
||||
return JsonResponse({"message": "Task has been enqueued!", "task": task})
|
||||
|
||||
# queue = django_rq.get_queue('default') # Get the default queue
|
||||
# job = queue.enqueue(background_task, task, job_timeout="30m")
|
||||
# return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})
|
||||
|
||||
####################################################################################################
|
||||
def link_list(request):
|
||||
prefix = "http://localhost:8000/api/task"
|
||||
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]
|
||||
prefix = "http://localhost:8000/task"
|
||||
links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all"]
|
||||
|
||||
list_links = [
|
||||
# DB
|
||||
"http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
|
||||
# Admin panel
|
||||
"http://localhost:8000/admin",
|
||||
# URLs
|
||||
"http://localhost:8000/api/url",
|
||||
# Charts
|
||||
"http://localhost:8000/api/charts",
|
||||
# Logs
|
||||
"http://localhost:8000/api/logs_debug",
|
||||
"http://localhost:8000/api/logs_info",
|
||||
"http://localhost:8000/api/logs_error",
|
||||
"http://localhost:8000/logs_debug",
|
||||
"http://localhost:8000/logs_info",
|
||||
"http://localhost:8000/logs_error",
|
||||
# URLs
|
||||
"http://localhost:8000/urls",
|
||||
# Charts
|
||||
"http://localhost:8000/charts",
|
||||
# API tasks
|
||||
] + [os.path.join(prefix, l) for l in links]
|
||||
# Json
|
||||
return JsonResponse({"links": list_links })
|
||||
|
||||
|
||||
from django.http import StreamingHttpResponse, JsonResponse
|
||||
from django.shortcuts import render, get_object_or_404
|
||||
from django.core.paginator import Paginator
|
||||
import ollama
|
||||
|
||||
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
|
||||
|
||||
# Create your views here.
|
||||
def urls(request):
|
||||
# URLs
|
||||
urls = Urls.objects.all()
|
||||
# Sources
|
||||
sources = Source.objects.all()
|
||||
searches = Search.objects.all()
|
||||
|
||||
# Parameters
|
||||
page_number = request.GET.get("page", 1)
|
||||
num_items = request.GET.get("items", 15)
|
||||
source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
|
||||
search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
|
||||
status_filters = request.GET.get("status", None)
|
||||
|
||||
# Filters
|
||||
if (status_filters) and (status_filters != "all"):
|
||||
if (status_filters == "none"):
|
||||
urls = []
|
||||
else:
|
||||
urls = urls.filter(status__in=status_filters.split(","))
|
||||
if (source_ids) and (source_ids != "all"):
|
||||
if (source_ids == "none"):
|
||||
urls = []
|
||||
else:
|
||||
urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
|
||||
if (search_ids) and (search_ids != "all"):
|
||||
if (search_ids == "none"):
|
||||
urls = []
|
||||
else:
|
||||
urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()
|
||||
|
||||
# Pagination
|
||||
paginator = Paginator(urls, num_items)
|
||||
page_obj = paginator.get_page(page_number)
|
||||
|
||||
# Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
|
||||
sources_map = {
|
||||
url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
||||
}
|
||||
searches_map = {
|
||||
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
||||
}
|
||||
|
||||
context = {
|
||||
"page_obj": page_obj,
|
||||
"sources": sources,
|
||||
"searches": searches,
|
||||
"sources_map": sources_map,
|
||||
"searches_map": searches_map,
|
||||
"list_status": Urls.STATUS_ENUM.values,
|
||||
"list_urls_per_page": [15, 100, 500],
|
||||
}
|
||||
|
||||
# If request is AJAX, return JSON response
|
||||
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
|
||||
return JsonResponse({'urls': render(request, 'urls_partial.html', context).content.decode('utf-8')})
|
||||
|
||||
return render(request, "urls.html", context)
|
||||
####################################################################################################
|
||||
def logs_error(request):
|
||||
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
def logs_info(request):
|
||||
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
def logs_debug(request):
|
||||
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
####################################################################################################
|
||||
class OllamaClient():
|
||||
@@ -128,31 +73,6 @@ class OllamaClient():
|
||||
# return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
|
||||
#return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
|
||||
|
||||
|
||||
def url_detail_view(request, id):
|
||||
url_item = get_object_or_404(Urls, id=id)
|
||||
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
|
||||
|
||||
try:
|
||||
url_content = UrlContent.objects.get(pk=id)
|
||||
except UrlContent.DoesNotExist:
|
||||
url_content = {}
|
||||
|
||||
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
|
||||
ollama = OllamaClient()
|
||||
|
||||
context = {
|
||||
'url_item': url_item,
|
||||
'sources': url_sources,
|
||||
'searches': url_searches,
|
||||
'models': ollama.get_models(),
|
||||
'prompt': ollama.get_prompt(),
|
||||
'url_content': url_content,
|
||||
}
|
||||
return render(request, 'url_detail.html', context)
|
||||
|
||||
# TODO: move to ollamajs...
|
||||
def fetch_details(request, id):
|
||||
url_item = get_object_or_404(Urls, id=id)
|
||||
@@ -178,6 +98,30 @@ def fetch_details(request, id):
|
||||
return StreamingHttpResponse(stream_response(), content_type="text/plain")
|
||||
|
||||
|
||||
def url_detail_view(request, id):
|
||||
url_item = get_object_or_404(Urls, id=id)
|
||||
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
|
||||
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
|
||||
|
||||
try:
|
||||
url_content = UrlContent.objects.get(pk=id)
|
||||
except UrlContent.DoesNotExist:
|
||||
url_content = {}
|
||||
|
||||
# TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
|
||||
ollama = OllamaClient()
|
||||
|
||||
context = {
|
||||
'url_item': url_item,
|
||||
'sources': url_sources,
|
||||
'searches': url_searches,
|
||||
'models': ollama.get_models(),
|
||||
'prompt': ollama.get_prompt(),
|
||||
'url_content': url_content,
|
||||
}
|
||||
return render(request, 'url_detail.html', context)
|
||||
|
||||
####################################################################################################
|
||||
from django.shortcuts import render
|
||||
from django.http import JsonResponse
|
||||
@@ -256,23 +200,7 @@ def urls_per_search(request):
|
||||
|
||||
return JsonResponse(data)
|
||||
|
||||
####################################################################################################
|
||||
from django.http import HttpResponse
|
||||
|
||||
def logs_error(request):
|
||||
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
def logs_info(request):
|
||||
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
def logs_debug(request):
|
||||
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
####################################################################################################
|
||||
from django.shortcuts import render
|
||||
@@ -284,33 +212,39 @@ def filtered_urls(request):
|
||||
statuses = Urls.STATUS_ENUM.choices
|
||||
searches = Search.objects.all()
|
||||
sources = Source.objects.all()
|
||||
# TODO: Cache languages, update once every N
|
||||
languages = UrlContent.objects.distinct('language').values_list('language', flat=True)
|
||||
# languages = [l for l in languages if l is not None]
|
||||
|
||||
# Get selected parameters
|
||||
selected_status = request.GET.getlist('status')
|
||||
selected_search = request.GET.getlist('search')
|
||||
selected_source = request.GET.getlist('source')
|
||||
selected_language = request.GET.getlist('language')
|
||||
selected_days = request.GET.get("days", 30)
|
||||
per_page = request.GET.get('per_page', 100) # Default is X URLs per page
|
||||
page_number = request.GET.get('page') # Get the current page number
|
||||
|
||||
# charts = request.GET.get('charts', False)
|
||||
|
||||
# "Home" -> No parameters -> Override filter with default values
|
||||
if ( len(request.GET.keys()) == 0 ):
|
||||
# Override with default filters? [Case: no params update on URL] -> Only on "Home" click, or "Next page"
|
||||
if (len(request.GET.keys()) == 0) or ((len(request.GET.keys()) == 1) and ("page" in request.GET.keys())):
|
||||
selected_status = [str(status[0]) for status in statuses]
|
||||
selected_search = [str(search.id) for search in searches]
|
||||
selected_source = [str(source.id) for source in sources]
|
||||
selected_language = languages
|
||||
|
||||
# Filter URLs based on selected filters
|
||||
if ('' in selected_status) or ('' in selected_search) or ('' in selected_source):
|
||||
urls = []
|
||||
else:
|
||||
urls = Urls.objects.filter(
|
||||
Q(urlssourcesearch__id_source__in=selected_source) &
|
||||
Q(urlssourcesearch__id_search__in=selected_search) &
|
||||
Q(status__in=selected_status) &
|
||||
query = Q(urlssourcesearch__id_source__in=selected_source) & \
|
||||
Q(urlssourcesearch__id_search__in=selected_search) & \
|
||||
Q(status__in=selected_status) & \
|
||||
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
|
||||
).distinct() # .order_by('-ts_fetch')
|
||||
|
||||
if selected_language:
|
||||
query &= Q(urlcontent__language__in=selected_language)
|
||||
|
||||
urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch')
|
||||
|
||||
# Custom replace search type
|
||||
for s in searches:
|
||||
@@ -327,22 +261,31 @@ def filtered_urls(request):
|
||||
searches_map = {
|
||||
url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
|
||||
}
|
||||
url_content_map = {
|
||||
url.id: UrlContent.objects.filter(pk=url).first() for url in page_obj.object_list
|
||||
}
|
||||
|
||||
context = {
|
||||
'urls': page_obj, # Pass the paginated URLs
|
||||
'per_page': per_page, # Send per_page value for dynamic pagination
|
||||
'statuses': statuses,
|
||||
'searches': searches,
|
||||
'sources': sources,
|
||||
'searches': sorted(searches, key=lambda x: (x.type, x.search)),
|
||||
'sources': sorted(sources, key=lambda x: x.source),
|
||||
'languages': sorted(languages, key=lambda x: (x is None, x)),
|
||||
# Selection
|
||||
'selected_status': selected_status,
|
||||
'selected_search': selected_search,
|
||||
'selected_source': selected_source,
|
||||
'selected_language': selected_language,
|
||||
"selected_days": selected_days,
|
||||
# Map
|
||||
"sources_map": sources_map,
|
||||
"searches_map": searches_map,
|
||||
"url_content_map": url_content_map,
|
||||
# "charts": charts,
|
||||
# "list_per_page": [15, 100, 500],
|
||||
# "list_days_text": ([0.25, 1, 7, 30, 365], ["Last 6 hours", "Last 24 hours", "Last 7 days", "Last 30 days", "Last 365 days"])
|
||||
}
|
||||
|
||||
return render(request, 'filtered_urls.html', context)
|
||||
|
||||
####################################################################################################
|
||||
@@ -19,6 +19,6 @@ from django.urls import path, include
|
||||
|
||||
urlpatterns = [
|
||||
path('admin/', admin.site.urls),
|
||||
path('api/', include('api.urls')),
|
||||
path('scheduler/', include('scheduler.urls')),
|
||||
path('', include('api.urls')),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user