Days handling URLs visualization, exception handling url_processor

This commit is contained in:
Luciano Gervasoni
2025-03-27 12:32:18 +01:00
parent 8dce5206af
commit a6b25fe915
4 changed files with 33 additions and 26 deletions

View File

@@ -101,20 +101,22 @@ class DB_Handler():
try: try:
# Get data # Get data
dict_url_data = process_url(obj_url.url) dict_url_data = process_url(obj_url.url)
# Not none or handle as exception
assert(dict_url_data is not None)
except Exception as e: except Exception as e:
if (raise_exception_on_error): if (raise_exception_on_error):
# Simply raise exception # Simply raise exception, handled in a different way
raise Exception("Error processing URL") raise Exception("Error processing URL, raising exception as expected")
else: else:
logger.debug("Error processing URL: {}\n{}\n{}".format(obj_url.url, str(e), traceback.format_exc()))
# Set status to error # Set status to error
logger.debug("Error processing URL: {}\n{}\n".format(obj_url.url, str(e), traceback.format_exc())) dict_url_data = None
# Update status
set_status(obj_url, Urls.STATUS_ENUM.ERROR) # (dict_url_data is None) or (Exception while processing URL) ? -> Error status
# Next URL if (dict_url_data is None):
return # Update status
set_status(obj_url, Urls.STATUS_ENUM.ERROR)
# Next URL
return
# Invalid? e.g. binary data # Invalid? e.g. binary data
if (dict_url_data.get("override_status") == "invalid"): if (dict_url_data.get("override_status") == "invalid"):
# Update status # Update status

View File

@@ -52,17 +52,17 @@ def process_url(url):
# Too many requests? Cool down... # Too many requests? Cool down...
if ("Status code 429" in str(e.args)): if ("Status code 429" in str(e.args)):
# TODO: cool down and retry once?, proxy/VPN, ... # TODO: cool down and retry once?, proxy/VPN, ...
logger.debug("TODO: Implement code 429") logger.debug("TODO: process_url Implement code 429")
# Unavailable for legal reasons # Unavailable for legal reasons
if ("Status code 451" in str(e.args)): if ("Status code 451" in str(e.args)):
# TODO: Bypass with VPN # TODO: Bypass with VPN
logger.debug("TODO: Implement code 451") logger.debug("TODO: process_url Implement code 451")
# CloudFlare protection? # CloudFlare protection?
if ("Website protected with Cloudflare" in str(e.args)): if ("Website protected with Cloudflare" in str(e.args)):
logger.debug("TODO: Implement bypass CloudFlare") logger.debug("TODO: process_url Implement bypass CloudFlare")
# PerimeterX protection? # PerimeterX protection?
if ("Website protected with PerimeterX" in str(e.args)): if ("Website protected with PerimeterX" in str(e.args)):
logger.debug("TODO: Implement bypass PerimeterX") logger.debug("TODO: process_url Implement bypass PerimeterX")
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args))) logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args)))
return None return None

View File

@@ -152,11 +152,13 @@ input[type="checkbox"] {
<!-- Filter by Time Range --> <!-- Filter by Time Range -->
<h3>Fetch Date</h3> <h3>Fetch Date</h3>
<select id="timeFilterSelect" name="selected_days"> <select id="timeFilterSelect" name="days">
<option value="0.25" {% if selected_days|stringformat:"s" == '0.25' %}selected{% endif %}>Last 6 hours</option>
<option value="1" {% if selected_days|stringformat:"s" == '1' %}selected{% endif %}>Last 24 hours</option> <option value="1" {% if selected_days|stringformat:"s" == '1' %}selected{% endif %}>Last 24 hours</option>
<option value="7" {% if selected_days|stringformat:"s" == '7' %}selected{% endif %}>Last 7 days</option> <option value="7" {% if selected_days|stringformat:"s" == '7' %}selected{% endif %}>Last 7 days</option>
<option value="30" {% if selected_days|stringformat:"s" == '30' %}selected{% endif %}>Last 30 days</option> <option value="30" {% if selected_days|stringformat:"s" == '30' %}selected{% endif %}>Last 30 days</option>
<option value="90" {% if selected_days|stringformat:"s" == '90' %}selected{% endif %}>Last 90 days</option> <option value="90" {% if selected_days|stringformat:"s" == '90' %}selected{% endif %}>Last 90 days</option>
<option value="365" {% if selected_days|stringformat:"s" == '365' %}selected{% endif %}>Last 365 days</option>
</select> </select>
<br><br> <br><br>
@@ -200,6 +202,7 @@ input[type="checkbox"] {
</form> </form>
</div> </div>
<!-- Table URLs data -->
<div class="table-container"> <div class="table-container">
<table> <table>
<thead> <thead>
@@ -296,21 +299,20 @@ input[type="checkbox"] {
var selectedSearch = {{ selected_search|safe }}; var selectedSearch = {{ selected_search|safe }};
var selectedSource = {{ selected_source|safe }}; var selectedSource = {{ selected_source|safe }};
var perPage = {{ per_page|default:"25" }}; var perPage = {{ per_page|default:"25" }};
//var selectedDays = {{ selected_days|default:"30" }};
</script> </script>
<script> <script>
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
document.addEventListener("DOMContentLoaded", function () { document.addEventListener("DOMContentLoaded", function () {
//////////////////////////////////////////////
// Theme
const themeToggle = document.getElementById("themeToggle"); const themeToggle = document.getElementById("themeToggle");
const body = document.body; const body = document.body;
// Load theme from localStorage // Load theme from localStorage
if (localStorage.getItem("theme") === "dark") { if (localStorage.getItem("theme") === "dark") {
body.classList.add("dark-mode"); body.classList.add("dark-mode");
themeToggle.textContent = "🌞"; themeToggle.textContent = "🌞";
} }
// Toggle theme on button click // Toggle theme on button click
themeToggle.addEventListener("click", function () { themeToggle.addEventListener("click", function () {
if (body.classList.contains("dark-mode")) { if (body.classList.contains("dark-mode")) {
@@ -323,7 +325,10 @@ input[type="checkbox"] {
themeToggle.textContent = "🌞"; themeToggle.textContent = "🌞";
} }
}); });
//////////////////////////////////////////////
//////////////////////////////////////////////
// Timestamp to local timezone
document.querySelectorAll(".ts-fetch").forEach(element => { document.querySelectorAll(".ts-fetch").forEach(element => {
let utcDate = element.getAttribute("data-ts"); // Get timestamp from data attribute let utcDate = element.getAttribute("data-ts"); // Get timestamp from data attribute
let options = { year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit', second: '2-digit', hour12:false}; let options = { year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit', second: '2-digit', hour12:false};
@@ -332,6 +337,7 @@ input[type="checkbox"] {
element.textContent = localDate; // Update the text content element.textContent = localDate; // Update the text content
} }
}); });
//////////////////////////////////////////////
}); });
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
@@ -342,7 +348,6 @@ input[type="checkbox"] {
currentUrl.searchParams.set('page', pageNumber); // Update page parameter currentUrl.searchParams.set('page', pageNumber); // Update page parameter
window.location.href = currentUrl.toString(); // Redirect to the updated URL window.location.href = currentUrl.toString(); // Redirect to the updated URL
} }
// Attach event listeners to pagination links // Attach event listeners to pagination links
document.querySelectorAll('.pagination-link').forEach(link => { document.querySelectorAll('.pagination-link').forEach(link => {
link.addEventListener('click', function(e) { link.addEventListener('click', function(e) {
@@ -398,9 +403,11 @@ input[type="checkbox"] {
document.getElementById('timeFilterSelect').addEventListener('change', function() { document.getElementById('timeFilterSelect').addEventListener('change', function() {
const currentUrl = new URL(window.location.href); const currentUrl = new URL(window.location.href);
currentUrl.searchParams.set('selected_days', this.value); // Update per_page value currentUrl.searchParams.set('days', this.value); // Update days value
currentUrl.searchParams.set('page', 1); // Reset page number to 1 when any checkbox changes currentUrl.searchParams.set('page', 1); // Reset page number to 1 when any checkbox changes
window.location.href = currentUrl.toString(); // Redirect to the updated URL with new per_page value window.location.href = currentUrl.toString(); // Redirect to the updated URL with new days value
//document.getElementById('filterForm').submit(); // Submits the form instead of manually changing the URL
}); });

View File

@@ -208,7 +208,7 @@ def urls_by_fetch_date(request):
def urls_per_status(request): def urls_per_status(request):
# Get the filtering date parameter # Get the filtering date parameter
days = int(request.GET.get('days', 30)) # Default is 30 days days = float(request.GET.get('days', 30)) # Default is 30 days
start_date = timezone.now() - timedelta(days=days) start_date = timezone.now() - timedelta(days=days)
# Count the number of URLs grouped by status within the date range # Count the number of URLs grouped by status within the date range
@@ -283,16 +283,14 @@ def filtered_urls(request):
selected_status = request.GET.getlist('status', [str(status[0]) for status in statuses]) selected_status = request.GET.getlist('status', [str(status[0]) for status in statuses])
selected_search = request.GET.getlist('search', [str(search.id) for search in searches]) selected_search = request.GET.getlist('search', [str(search.id) for search in searches])
selected_source = request.GET.getlist('source', [str(source.id) for source in sources]) selected_source = request.GET.getlist('source', [str(source.id) for source in sources])
selected_days = int(request.GET.get("selected_days", 30)) selected_days = request.GET.get("days", 30)
print(selected_days)
# Filter URLs based on selected filters # Filter URLs based on selected filters
urls = Urls.objects.filter( urls = Urls.objects.filter(
Q(urlssourcesearch__id_source__in=selected_source) & Q(urlssourcesearch__id_source__in=selected_source) &
Q(urlssourcesearch__id_search__in=selected_search) & Q(urlssourcesearch__id_search__in=selected_search) &
Q(status__in=selected_status) & Q(status__in=selected_status) &
Q(ts_fetch__gte=now() - timedelta(days=selected_days)) Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
).distinct() # .order_by('-ts_fetch') ).distinct() # .order_by('-ts_fetch')
# Custom replace search type # Custom replace search type