Days handling URLs visualization, exception handling url_processor
This commit is contained in:
@@ -101,20 +101,22 @@ class DB_Handler():
|
||||
try:
|
||||
# Get data
|
||||
dict_url_data = process_url(obj_url.url)
|
||||
# Not none or handle as exception
|
||||
assert(dict_url_data is not None)
|
||||
except Exception as e:
|
||||
if (raise_exception_on_error):
|
||||
# Simply raise exception
|
||||
raise Exception("Error processing URL")
|
||||
# Simply raise exception, handled in a different way
|
||||
raise Exception("Error processing URL, raising exception as expected")
|
||||
else:
|
||||
logger.debug("Error processing URL: {}\n{}\n{}".format(obj_url.url, str(e), traceback.format_exc()))
|
||||
# Set status to error
|
||||
logger.debug("Error processing URL: {}\n{}\n".format(obj_url.url, str(e), traceback.format_exc()))
|
||||
# Update status
|
||||
set_status(obj_url, Urls.STATUS_ENUM.ERROR)
|
||||
# Next URL
|
||||
return
|
||||
|
||||
dict_url_data = None
|
||||
|
||||
# (dict_url_data is None) or (Exception while processing URL) ? -> Error status
|
||||
if (dict_url_data is None):
|
||||
# Update status
|
||||
set_status(obj_url, Urls.STATUS_ENUM.ERROR)
|
||||
# Next URL
|
||||
return
|
||||
|
||||
# Invalid? e.g. binary data
|
||||
if (dict_url_data.get("override_status") == "invalid"):
|
||||
# Update status
|
||||
|
||||
@@ -52,17 +52,17 @@ def process_url(url):
|
||||
# Too many requests? Cool down...
|
||||
if ("Status code 429" in str(e.args)):
|
||||
# TODO: cool down and retry once?, proxy/VPN, ...
|
||||
logger.debug("TODO: Implement code 429")
|
||||
logger.debug("TODO: process_url Implement code 429")
|
||||
# Unavailable for legal reasons
|
||||
if ("Status code 451" in str(e.args)):
|
||||
# TODO: Bypass with VPN
|
||||
logger.debug("TODO: Implement code 451")
|
||||
logger.debug("TODO: process_url Implement code 451")
|
||||
# CloudFlare protection?
|
||||
if ("Website protected with Cloudflare" in str(e.args)):
|
||||
logger.debug("TODO: Implement bypass CloudFlare")
|
||||
logger.debug("TODO: process_url Implement bypass CloudFlare")
|
||||
# PerimeterX protection?
|
||||
if ("Website protected with PerimeterX" in str(e.args)):
|
||||
logger.debug("TODO: Implement bypass PerimeterX")
|
||||
logger.debug("TODO: process_url Implement bypass PerimeterX")
|
||||
|
||||
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e.args)))
|
||||
return None
|
||||
|
||||
@@ -152,11 +152,13 @@ input[type="checkbox"] {
|
||||
|
||||
<!-- Filter by Time Range -->
|
||||
<h3>Fetch Date</h3>
|
||||
<select id="timeFilterSelect" name="selected_days">
|
||||
<select id="timeFilterSelect" name="days">
|
||||
<option value="0.25" {% if selected_days|stringformat:"s" == '0.25' %}selected{% endif %}>Last 6 hours</option>
|
||||
<option value="1" {% if selected_days|stringformat:"s" == '1' %}selected{% endif %}>Last 24 hours</option>
|
||||
<option value="7" {% if selected_days|stringformat:"s" == '7' %}selected{% endif %}>Last 7 days</option>
|
||||
<option value="30" {% if selected_days|stringformat:"s" == '30' %}selected{% endif %}>Last 30 days</option>
|
||||
<option value="90" {% if selected_days|stringformat:"s" == '90' %}selected{% endif %}>Last 90 days</option>
|
||||
<option value="365" {% if selected_days|stringformat:"s" == '365' %}selected{% endif %}>Last 365 days</option>
|
||||
</select>
|
||||
<br><br>
|
||||
|
||||
@@ -200,6 +202,7 @@ input[type="checkbox"] {
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<!-- Table URLs data -->
|
||||
<div class="table-container">
|
||||
<table>
|
||||
<thead>
|
||||
@@ -296,21 +299,20 @@ input[type="checkbox"] {
|
||||
var selectedSearch = {{ selected_search|safe }};
|
||||
var selectedSource = {{ selected_source|safe }};
|
||||
var perPage = {{ per_page|default:"25" }};
|
||||
//var selectedDays = {{ selected_days|default:"30" }};
|
||||
</script>
|
||||
|
||||
<script>
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
//////////////////////////////////////////////
|
||||
// Theme
|
||||
const themeToggle = document.getElementById("themeToggle");
|
||||
const body = document.body;
|
||||
|
||||
// Load theme from localStorage
|
||||
if (localStorage.getItem("theme") === "dark") {
|
||||
body.classList.add("dark-mode");
|
||||
themeToggle.textContent = "🌞";
|
||||
}
|
||||
|
||||
// Toggle theme on button click
|
||||
themeToggle.addEventListener("click", function () {
|
||||
if (body.classList.contains("dark-mode")) {
|
||||
@@ -323,7 +325,10 @@ input[type="checkbox"] {
|
||||
themeToggle.textContent = "🌞";
|
||||
}
|
||||
});
|
||||
//////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Timestamp to local timezone
|
||||
document.querySelectorAll(".ts-fetch").forEach(element => {
|
||||
let utcDate = element.getAttribute("data-ts"); // Get timestamp from data attribute
|
||||
let options = { year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit', second: '2-digit', hour12:false};
|
||||
@@ -332,6 +337,7 @@ input[type="checkbox"] {
|
||||
element.textContent = localDate; // Update the text content
|
||||
}
|
||||
});
|
||||
//////////////////////////////////////////////
|
||||
});
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
@@ -342,7 +348,6 @@ input[type="checkbox"] {
|
||||
currentUrl.searchParams.set('page', pageNumber); // Update page parameter
|
||||
window.location.href = currentUrl.toString(); // Redirect to the updated URL
|
||||
}
|
||||
|
||||
// Attach event listeners to pagination links
|
||||
document.querySelectorAll('.pagination-link').forEach(link => {
|
||||
link.addEventListener('click', function(e) {
|
||||
@@ -398,9 +403,11 @@ input[type="checkbox"] {
|
||||
|
||||
document.getElementById('timeFilterSelect').addEventListener('change', function() {
|
||||
const currentUrl = new URL(window.location.href);
|
||||
currentUrl.searchParams.set('selected_days', this.value); // Update per_page value
|
||||
currentUrl.searchParams.set('days', this.value); // Update days value
|
||||
currentUrl.searchParams.set('page', 1); // Reset page number to 1 when any checkbox changes
|
||||
window.location.href = currentUrl.toString(); // Redirect to the updated URL with new per_page value
|
||||
window.location.href = currentUrl.toString(); // Redirect to the updated URL with new days value
|
||||
|
||||
//document.getElementById('filterForm').submit(); // Submits the form instead of manually changing the URL
|
||||
});
|
||||
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ def urls_by_fetch_date(request):
|
||||
|
||||
def urls_per_status(request):
|
||||
# Get the filtering date parameter
|
||||
days = int(request.GET.get('days', 30)) # Default is 30 days
|
||||
days = float(request.GET.get('days', 30)) # Default is 30 days
|
||||
start_date = timezone.now() - timedelta(days=days)
|
||||
|
||||
# Count the number of URLs grouped by status within the date range
|
||||
@@ -283,16 +283,14 @@ def filtered_urls(request):
|
||||
selected_status = request.GET.getlist('status', [str(status[0]) for status in statuses])
|
||||
selected_search = request.GET.getlist('search', [str(search.id) for search in searches])
|
||||
selected_source = request.GET.getlist('source', [str(source.id) for source in sources])
|
||||
selected_days = int(request.GET.get("selected_days", 30))
|
||||
|
||||
print(selected_days)
|
||||
selected_days = request.GET.get("days", 30)
|
||||
|
||||
# Filter URLs based on selected filters
|
||||
urls = Urls.objects.filter(
|
||||
Q(urlssourcesearch__id_source__in=selected_source) &
|
||||
Q(urlssourcesearch__id_search__in=selected_search) &
|
||||
Q(status__in=selected_status) &
|
||||
Q(ts_fetch__gte=now() - timedelta(days=selected_days))
|
||||
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
|
||||
).distinct() # .order_by('-ts_fetch')
|
||||
|
||||
# Custom replace search type
|
||||
|
||||
Reference in New Issue
Block a user