Dependencies for other languages, scheduled tasks import, logs per type, home button filter urls

This commit is contained in:
Luciano Gervasoni
2025-03-27 15:56:02 +01:00
parent a6b25fe915
commit e34284abbe
6 changed files with 382 additions and 143 deletions

View File

@@ -5,7 +5,7 @@ conda activate matitos_urls
# Core
pip install django psycopg[binary] django-redis django-tasks-scheduler
# Fetcher
pip install feedparser python-dateutil newspaper4k lxml[html_clean] googlenewsdecoder gnews duckduckgo_search GoogleNews
pip install feedparser python-dateutil newspaper4k[all] lxml[html_clean] googlenewsdecoder gnews duckduckgo_search GoogleNews
# News visualization
pip install ollama
```
@@ -110,6 +110,8 @@ http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=publ
* Scheduled tasks
```
# 1) Modify the scheduled tasks on the admin panel:
Names: Fetch Feeds, Fetch Parser, Fetch Search
Callable: api.tasks.fetch_feeds, api.tasks.fetch_parser, api.tasks.fetch_search
Task type: Repetable task (or cron...)
@@ -122,6 +124,12 @@ Task type: Repetable task (or cron...)
Queue: Low, Low, Default
Interval: 1h, 4h, 2h
# 2) Export
# python manage.py export > scheduled_tasks.json
# Or simply import saved definitions
python manage.py import --filename scheduled_tasks.json
```
* Utils

View File

@@ -3,7 +3,9 @@ import os
''' TODO: PATH LOGS
PATH_LOGS_ERROR=logs/log_app_fetcher_error.log
PATH_LOGS=logs/log_app_fetcher.log
PATH_LOGS_INFO=logs/log_app_fetcher_info.log
PATH_LOGS_DEBUG=logs/log_app_fetcher_debug.log
# PATH_LOGS=logs/log_app_fetcher.log
'''
os.makedirs("logs", exist_ok=True)
@@ -11,12 +13,18 @@ logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(messa
logger = logging.getLogger("news_fetcher")
logger.setLevel(logging.DEBUG)
# To file log: INFO / WARNING / ERROR
fh = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher.log", mode="a", maxBytes=10000000, backupCount=4)
# To file log: INFO / WARNING / ERROR / CRITICAL
fh = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_debug.log", mode="a", maxBytes=10000000, backupCount=4)
fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
logger.addHandler(fh)
# To file log: WARNING / ERROR
# To file log: INFO / WARNING / ERROR
fh_ = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_info.log", mode="a", maxBytes=10000000, backupCount=2)
fh_.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
fh_.setLevel(logging.INFO)
logger.addHandler(fh_)
# To file log: WARNING / ERROR / CRITICAL
fh_ = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_error.log", mode="a", maxBytes=10000000, backupCount=1)
fh_.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
fh_.setLevel(logging.WARNING)

View File

@@ -110,7 +110,7 @@ input[type="checkbox"] {
}
/* Themed Toggle Button */
.theme-button {
.theme-button, .home-button {
background-color: var(--sidebar);
border: 1px solid var(--sidebar);
border-radius: 50%;
@@ -123,13 +123,70 @@ input[type="checkbox"] {
transition: background-color 0.1s, color 0.1s, transform 0.1s;
cursor: pointer;
}
.theme-button:hover {
.theme-button:hover, .home-button:hover {
transform: rotate(20deg);
}
.theme-button:active {
.theme-button:active, .home-button:active {
transform: scale(0.95);
}
.button-container {
display: flex;
align-items: center;
gap: 10px; /* Space between buttons */
}
/* ROUNDED SWITCH*/
/* Hide the default checkbox */
.checkbox-slider {
display: none;
}
/* Container for the toggle switch */
.slider-container {
display: inline-block;
width: 60px;
height: 30px;
position: relative;
}
/* Label for the slider */
.slider-container label {
display: block;
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: #ccc;
border-radius: 30px;
cursor: pointer;
transition: background-color 0.3s ease;
}
/* The toggle circle */
.slider-container label::before {
content: '';
position: absolute;
top: 3px;
left: 3px;
width: 24px;
height: 24px;
background-color: white;
border-radius: 50%;
transition: transform 0.3s ease;
}
/* When the checkbox is checked */
.checkbox-slider:checked + .slider-container label {
background-color: #0940b8;
}
/* When the checkbox is checked, move the circle */
.checkbox-slider:checked + .slider-container label::before {
transform: translateX(30px);
}
</style>
</head>
<body>
@@ -137,22 +194,54 @@ input[type="checkbox"] {
<div class="container">
<div class="sidebar">
<button id="themeToggle" class="theme-button">🌙</button>
<div class="button-container">
<button id="homeButton" class="home-button">🏠</button>
<button id="themeToggle" class="theme-button">🌙</button>
</div>
<form method="GET" action="" id="filterForm">
<!-- Switch: Table / Charts
<form>
<label>
<input type="radio" name="view" value="table" checked id="tableRadio"> Table
</label>
<label>
<input type="radio" name="view" value="chart" id="chartRadio"> Charts
</label>
</form>
-->
<!-- Rounded switch
<input type="checkbox" id="toggle" class="checkbox-slider">
<div class="slider-container">
<label for="toggle"></label>
<span class="slider-text">
<span id="onText" class="on-text">ON</span>
<span id="offText" class="off-text">OFF</span>
</span>
</div>
-->
<!-- Pages Per Page Dropdown -->
<h3>Pages Per Page</h3>
<select id="perPageSelect" name="per_page">
<option value="25" {% if per_page == '25' %}selected{% endif %}>25</option>
<option value="100" {% if per_page == '100' %}selected{% endif %}>100</option>
<option value="500" {% if per_page == '500' %}selected{% endif %}>500</option>
<option value="25" {% if per_page|stringformat:"s" == '25' %}selected{% endif %}>25</option>
<option value="100" {% if per_page|stringformat:"s" == '100' %}selected{% endif %}>100</option>
<option value="500" {% if per_page|stringformat:"s" == '500' %}selected{% endif %}>500</option>
</select>
<br><br>
<!-- Filter by Time Range -->
<h3>Fetch Date</h3>
<select id="timeFilterSelect" name="days">
<!--
{% for form_days in form_days_list %}
<option value=form_days.1|stringformat:"s" {% if selected_days|stringformat:"s" == form_days.1|stringformat:"s" %}selected{% endif %}>form_days.2</option>
{% endfor %}
-->
<option value="0.25" {% if selected_days|stringformat:"s" == '0.25' %}selected{% endif %}>Last 6 hours</option>
<option value="1" {% if selected_days|stringformat:"s" == '1' %}selected{% endif %}>Last 24 hours</option>
<option value="7" {% if selected_days|stringformat:"s" == '7' %}selected{% endif %}>Last 7 days</option>
@@ -197,115 +286,104 @@ input[type="checkbox"] {
</label><br>
{% endfor %}
<br><br>
</form>
</div>
<!-- Table URLs data -->
<div class="table-container">
<table>
<thead>
<tr>
<th>ID</th>
<th>URL</th>
<th>Status</th>
<th>Fetch Date</th>
<th>Search</th>
<th>Source</th>
</tr>
</thead>
<tbody>
{% for url in urls %}
<!-- Table URLs data -->
<div class="table-container">
<table>
<thead>
<tr>
<td><a href="./{{ url.id }}" class="btn btn-primary btn-sm" target="_blank">{{ url.id }}</a></td>
<td><a href="{{ url.url }}/" target="_blank">{{ url.url }}</a></td>
<td>
{% if url.status == 'raw' %}
<span class="badge bg-secondary">{{ url.status|capfirst }}</span>
{% elif url.status == 'error' %}
<span class="badge bg-danger">{{ url.status|capfirst }}</span>
{% elif url.status == 'valid' %}
<span class="badge bg-success">{{ url.status|capfirst }}</span>
{% elif url.status == 'unknown' %}
<span class="badge bg-warning">{{ url.status|capfirst }}</span>
{% elif url.status == 'invalid' %}
<span class="badge bg-danger">{{ url.status|capfirst }}</span>
{% elif url.status == 'duplicate' %}
<span class="badge bg-info">{{ url.status|capfirst }}</span>
{% else %}
<span class="badge bg-light">Unknown</span>
{% endif %}
</td>
<td>
<span class="ts-fetch" data-ts="{{ url.ts_fetch|date:'c' }}"></span>
</td>
<td>
{% with sources_map|dict_get:url.id as sources %}
{% if sources %}
{% for source in sources %}
<span class="badge bg-secondary">{{ source }}</span>
{% endfor %}
{% else %}
<span class="text-muted">No sources</span>
{% endif %}
{% endwith %}
</td>
<td>
{% with searches_map|dict_get:url.id as searches %}
{% if searches %}
{% for search in searches %}
<span class="badge bg-secondary">{{ search }}</span>
{% endfor %}
{% else %}
<span class="text-muted">No searches</span>
{% endif %}
{% endwith %}
</td>
<th>ID</th>
<th>URL</th>
<th>Status</th>
<th>Fetch Date</th>
<th>Search</th>
<th>Source</th>
</tr>
{% empty %}
<tr>
<td colspan="5">No URLs found for the selected filters.</td>
</tr>
{% endfor %}
</tbody>
</table>
</thead>
<tbody>
{% for url in urls %}
<tr>
<td><a href="./{{ url.id }}" class="btn btn-primary btn-sm" target="_blank">{{ url.id }}</a></td>
<td><a href="{{ url.url }}/" target="_blank">{{ url.url }}</a></td>
<td>
{% if url.status == 'raw' %}
<span class="badge bg-secondary">{{ url.status|capfirst }}</span>
{% elif url.status == 'error' %}
<span class="badge bg-danger">{{ url.status|capfirst }}</span>
{% elif url.status == 'valid' %}
<span class="badge bg-success">{{ url.status|capfirst }}</span>
{% elif url.status == 'unknown' %}
<span class="badge bg-warning">{{ url.status|capfirst }}</span>
{% elif url.status == 'invalid' %}
<span class="badge bg-danger">{{ url.status|capfirst }}</span>
{% elif url.status == 'duplicate' %}
<span class="badge bg-info">{{ url.status|capfirst }}</span>
{% else %}
<span class="badge bg-light">Unknown</span>
{% endif %}
</td>
<td>
<span class="ts-fetch" data-ts="{{ url.ts_fetch|date:'c' }}"></span>
</td>
<td>
{% with sources_map|dict_get:url.id as sources %}
{% if sources %}
{% for source in sources %}
<span class="badge bg-secondary">{{ source }}</span>
{% endfor %}
{% else %}
<span class="text-muted">No sources</span>
{% endif %}
{% endwith %}
</td>
<td>
{% with searches_map|dict_get:url.id as searches %}
{% if searches %}
{% for search in searches %}
<span class="badge bg-secondary">{{ search }}</span>
{% endfor %}
{% else %}
<span class="text-muted">No searches</span>
{% endif %}
{% endwith %}
</td>
</tr>
{% empty %}
<tr>
<td colspan="5">No URLs found for the selected filters.</td>
</tr>
{% endfor %}
</tbody>
</table>
<!-- Pagination Controls -->
<div class="pagination">
<div class="pagination-controls">
{% if urls.has_previous %}
<a href="#" class="pagination-link" data-page="1">« First</a>
<a href="#" class="pagination-link" data-page="{{ urls.previous_page_number }}">Previous</a>
{% endif %}
<!-- Pagination Controls -->
<div class="pagination">
<div class="pagination-controls">
{% if urls.has_previous %}
<a href="#" class="pagination-link" data-page="1">« First</a>
<a href="#" class="pagination-link" data-page="{{ urls.previous_page_number }}">Previous</a>
{% endif %}
<span>Page {{ urls.number }} of {{ urls.paginator.num_pages }}</span>
<span>Page {{ urls.number }} of {{ urls.paginator.num_pages }}</span>
{% if urls.has_next %}
<a href="#" class="pagination-link" data-page="{{ urls.next_page_number }}">Next</a>
<a href="#" class="pagination-link" data-page="{{ urls.paginator.num_pages }}">Last »</a>
{% endif %}
{% if urls.has_next %}
<a href="#" class="pagination-link" data-page="{{ urls.next_page_number }}">Next</a>
<a href="#" class="pagination-link" data-page="{{ urls.paginator.num_pages }}">Last »</a>
{% endif %}
</div>
</div>
</div>
</div>
<!-- Passing the selected filters as JavaScript variables -->
<script type="text/javascript">
// Make sure these variables are accessible in your JavaScript
var selectedStatus = {{ selected_status|safe }};
var selectedSearch = {{ selected_search|safe }};
var selectedSource = {{ selected_source|safe }};
var perPage = {{ per_page|default:"25" }};
</script>
<script>
//////////////////////////////////////////////////////////////////////
document.addEventListener("DOMContentLoaded", function () {
//////////////////////////////////////////////
// Theme
// Theme & Home
const themeToggle = document.getElementById("themeToggle");
const body = document.body;
// Load theme from localStorage
@@ -325,7 +403,10 @@ input[type="checkbox"] {
themeToggle.textContent = "🌞";
}
});
//////////////////////////////////////////////
// Home
document.getElementById("homeButton").addEventListener("click", function () {
window.location.href = "./"; // Change this to your homepage URL if different
});
//////////////////////////////////////////////
// Timestamp to local timezone
@@ -362,7 +443,6 @@ input[type="checkbox"] {
function toggleCheckboxes(section) {
const checkboxes = document.querySelectorAll(`[name='${section}']`);
const allChecked = Array.from(checkboxes).every(checkbox => checkbox.checked);
checkboxes.forEach(checkbox => {
checkbox.checked = !allChecked;
});
@@ -383,34 +463,30 @@ input[type="checkbox"] {
// Automatically submit the form when any checkbox changes
document.querySelectorAll('input[type="checkbox"]').forEach(function(checkbox) {
checkbox.addEventListener('change', function() {
// Automatically submit the form when a checkbox is toggled
document.getElementById('filterForm').submit();
//const currentUrl = new URL(window.location.href);
//currentUrl.searchParams.set('page', 1); // Reset page number to 1 when any checkbox changes
//window.location.href = currentUrl.toString(); // Redirect to the updated URL with the new filter values
});
});
//////////////////////////////////////////////////////////////////////
// Automatically submit the form when per_page dropdown changes
document.getElementById('perPageSelect').addEventListener('change', function() {
const currentUrl = new URL(window.location.href);
currentUrl.searchParams.set('per_page', this.value); // Update per_page value
currentUrl.searchParams.set('page', 1); // Reset page number to 1 when any checkbox changes
window.location.href = currentUrl.toString(); // Redirect to the updated URL with new per_page value
document.getElementById('filterForm').submit();
});
document.getElementById('timeFilterSelect').addEventListener('change', function() {
const currentUrl = new URL(window.location.href);
currentUrl.searchParams.set('days', this.value); // Update days value
currentUrl.searchParams.set('page', 1); // Reset page number to 1 when any checkbox changes
window.location.href = currentUrl.toString(); // Redirect to the updated URL with new days value
//document.getElementById('filterForm').submit(); // Submits the form instead of manually changing the URL
document.getElementById('filterForm').submit();
});
/*
document.getElementById('tableRadio').addEventListener('change', function() {
document.getElementById('tableViewContent').classList.remove('d-none');
document.getElementById('chartViewContent').classList.add('d-none');
document.getElementById('filterForm').submit();
});
document.getElementById('chartRadio').addEventListener('change', function() {
document.getElementById('chartViewContent').classList.remove('d-none');
document.getElementById('tableViewContent').classList.add('d-none');
document.getElementById('filterForm').submit();
});
*/
</script>

View File

@@ -4,7 +4,8 @@ from . import views
urlpatterns = [
path('', views.link_list, name='link_list'),
#
path('logs', views.logs, name='logs'),
path('logs_debug', views.logs_debug, name='logs_debug'),
path('logs_info', views.logs_info, name='logs_info'),
path('logs_error', views.logs_error, name='logs_error'),
#
path('charts/', views.charts, name='charts'),

View File

@@ -28,7 +28,8 @@ def link_list(request):
# Charts
"http://localhost:8000/api/charts",
# Logs
"http://localhost:8000/api/logs",
"http://localhost:8000/api/logs_debug",
"http://localhost:8000/api/logs_info",
"http://localhost:8000/api/logs_error",
# API tasks
] + [os.path.join(prefix, l) for l in links]
@@ -263,8 +264,13 @@ def logs_error(request):
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs(request):
with open(os.getenv("PATH_LOGS", "logs/log_app_fetcher.log"), "r") as f:
def logs_info(request):
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
def logs_debug(request):
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
file_content = f.read()
return HttpResponse(file_content, content_type="text/plain")
@@ -278,29 +284,40 @@ def filtered_urls(request):
statuses = Urls.STATUS_ENUM.choices
searches = Search.objects.all()
sources = Source.objects.all()
# Check if filters are applied; if not, select all by default
selected_status = request.GET.getlist('status', [str(status[0]) for status in statuses])
selected_search = request.GET.getlist('search', [str(search.id) for search in searches])
selected_source = request.GET.getlist('source', [str(source.id) for source in sources])
# Get selected parameters
selected_status = request.GET.getlist('status')
selected_search = request.GET.getlist('search')
selected_source = request.GET.getlist('source')
selected_days = request.GET.get("days", 30)
per_page = request.GET.get('per_page', 100) # Default is X URLs per page
page_number = request.GET.get('page') # Get the current page number
# charts = request.GET.get('charts', False)
# "Home" -> No parameters -> Override filter with default values
if ( len(request.GET.keys()) == 0 ):
selected_status = [str(status[0]) for status in statuses]
selected_search = [str(search.id) for search in searches]
selected_source = [str(source.id) for source in sources]
# Filter URLs based on selected filters
urls = Urls.objects.filter(
Q(urlssourcesearch__id_source__in=selected_source) &
Q(urlssourcesearch__id_search__in=selected_search) &
Q(status__in=selected_status) &
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
).distinct() # .order_by('-ts_fetch')
if ('' in selected_status) or ('' in selected_search) or ('' in selected_source):
urls = []
else:
urls = Urls.objects.filter(
Q(urlssourcesearch__id_source__in=selected_source) &
Q(urlssourcesearch__id_search__in=selected_search) &
Q(status__in=selected_status) &
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
).distinct() # .order_by('-ts_fetch')
# Custom replace search type
for s in searches:
s.type = s.type.replace("rss_feed", "rss").replace("url_host", "url").replace("keyword_search", "keyword")
# Pagination
per_page = request.GET.get('per_page', 25) # Default is 50 URLs per page
paginator = Paginator(urls, per_page) # Paginate the filtered URLs
page_number = request.GET.get('page') # Get the current page number
page_obj = paginator.get_page(page_number) # Get the current page object
# Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
@@ -323,6 +340,7 @@ def filtered_urls(request):
"selected_days": selected_days,
"sources_map": sources_map,
"searches_map": searches_map,
# "charts": charts,
}
return render(request, 'filtered_urls.html', context)

View File

@@ -0,0 +1,128 @@
[
{
"model": "RepeatableTaskType",
"name": "Fetch Feeds",
"callable": "api.tasks.fetch_feeds",
"callable_args": [],
"callable_kwargs": [],
"enabled": true,
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-03-27T14:33:56+00:00",
"interval": 15,
"interval_unit": "minutes",
"successful_runs": 215,
"failed_runs": 0,
"last_successful_run": "2025-03-27 14:18:58.028684+00:00",
"last_failed_run": null
},
{
"model": "RepeatableTaskType",
"name": "Process raw URLs",
"callable": "api.tasks.process_raw_urls",
"callable_args": [],
"callable_kwargs": [],
"enabled": true,
"queue": "low",
"repeat": null,
"at_front": false,
"timeout": null,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-03-27T14:35:08+00:00",
"interval": 1,
"interval_unit": "hours",
"successful_runs": 41,
"failed_runs": 0,
"last_successful_run": "2025-03-27 13:35:48.534489+00:00",
"last_failed_run": null
},
{
"model": "RepeatableTaskType",
"name": "Process error URLs",
"callable": "api.tasks.process_error_urls",
"callable_args": [],
"callable_kwargs": [],
"enabled": true,
"queue": "low",
"repeat": null,
"at_front": false,
"timeout": null,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-03-27T16:36:21+00:00",
"interval": 4,
"interval_unit": "hours",
"successful_runs": 10,
"failed_runs": 0,
"last_successful_run": "2025-03-27 12:37:28.301866+00:00",
"last_failed_run": null
},
{
"model": "RepeatableTaskType",
"name": "Fetch Parser",
"callable": "api.tasks.fetch_parser",
"callable_args": [],
"callable_kwargs": [],
"enabled": true,
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-03-27T14:25:42+00:00",
"interval": 1,
"interval_unit": "hours",
"successful_runs": 44,
"failed_runs": 0,
"last_successful_run": "2025-03-27 13:25:46.205433+00:00",
"last_failed_run": null
},
{
"model": "RepeatableTaskType",
"name": "Fetch Search",
"callable": "api.tasks.fetch_search",
"callable_args": [],
"callable_kwargs": [],
"enabled": true,
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-03-27T14:29:33+00:00",
"interval": 1,
"interval_unit": "hours",
"successful_runs": 46,
"failed_runs": 0,
"last_successful_run": "2025-03-27 13:33:00.628827+00:00",
"last_failed_run": null
},
{
"model": "RepeatableTaskType",
"name": "Process MissingKids URLs",
"callable": "api.tasks.process_missing_kids_urls",
"callable_args": [],
"callable_kwargs": [],
"enabled": true,
"queue": "default",
"repeat": null,
"at_front": false,
"timeout": null,
"result_ttl": 86400,
"cron_string": null,
"scheduled_time": "2025-03-27T14:37:50+00:00",
"interval": 2,
"interval_unit": "hours",
"successful_runs": 20,
"failed_runs": 0,
"last_successful_run": "2025-03-27 12:38:42.545373+00:00",
"last_failed_run": null
}
]