From 24510d26e25cf3b0d44b0f25f3ba141cb1e90730 Mon Sep 17 00:00:00 2001 From: Luciano Gervasoni Date: Mon, 8 Sep 2025 17:55:03 +0200 Subject: [PATCH] Notifications, info and warning --- app_urls/fetcher/src/notifier.py | 97 ++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 5 deletions(-) diff --git a/app_urls/fetcher/src/notifier.py b/app_urls/fetcher/src/notifier.py index eabb1cb..d1a4913 100644 --- a/app_urls/fetcher/src/notifier.py +++ b/app_urls/fetcher/src/notifier.py @@ -5,8 +5,7 @@ from django.db.models import Count import requests import os - -def notify_telegram(last_hours=12): +def notify_telegram_info(last_hours, channel="INFO"): start_date = timezone.now() - timedelta(hours=last_hours) # Count the number of URLs grouped by status within the date range @@ -30,9 +29,8 @@ def notify_telegram(last_hours=12): .order_by('id_search__search') - bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "") - chat_id = os.environ.get("TELEGRAM_CHAT_ID", "") - + bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "") + chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "") message = "During the last {} hours:\n".format(last_hours) @@ -55,3 +53,92 @@ def notify_telegram(last_hours=12): # POST response = requests.post(url, params=params) + + +def notify_telegram_warning(last_hours, channel="WARNING"): + # Message appending logic + message = "" + + start_date = timezone.now() - timedelta(hours=last_hours) + + # Count the number of URLs grouped by status within the date range + urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \ + .values('status') \ + .annotate(count=Count('id')) \ + .order_by('status') + + # Build dictionary + urls_data_status_dict = {} + for o in urls_data_status: + # #STATUS + urls_data_status_dict[o.get("status")] = o.get("count") + # #TOTAL + urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count") + + MINIMUM_URLS_THRESHOLD = 10 + MINIMUM_PROCESSED_URLS_RATIO = 0.5 + + # Minimum amount of URLs + if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD): + message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total")) + message += "\nURLs per status:\n" + for o in urls_data_status: + message += " {}: {}\n".format(o.get("status"), o.get("count")) + + # Minimum ratio of processed raw urls + if (urls_data_status_dict.get("total") > 0): + if (urls_data_status_dict.get("raw") / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO): + message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total")) + message += "\nURLs per status:\n" + for o in urls_data_status: + message += " {}: {}\n".format(o.get("status"), o.get("count")) + + + # Count the number of URLs grouped by source + urls_data_source = UrlsSourceSearch.objects \ + .filter(id_url__ts_fetch__gte=start_date) \ + .values('id_source__source') \ + .annotate(count=Count('id_url')) \ + .order_by('id_source__source') + + MINIMUM_SOURCES = 2 + if (len(urls_data_source) < MINIMUM_SOURCES): + message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours) + message += "\nURLs per source:\n" + for o in urls_data_source: + message += " {}: {}\n".format(o.get("id_source__source"), o.get("count")) + + """ + # TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect + # Count the number of URLs grouped by search + urls_data_search = UrlsSourceSearch.objects \ + .filter(id_url__ts_fetch__gte=start_date) \ + .values('id_search__search') \ + .annotate(count=Count('id_url')) \ + .order_by('id_search__search') + + message += "\nURLs per search:\n" + for o in urls_data_search: + message += " {}: {}\n".format(o.get("id_search__search"), o.get("count")) + """ + + # Valid message body? + if (message != ""): + bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "") + chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "") + + url = f"https://api.telegram.org/bot{bot_token}/sendMessage" + params = { + "chat_id": chat_id, + "text": message + } + + # POST + response = requests.post(url, params=params) + + +def notify_telegram(last_hours=12): + # INFO + notify_telegram_info(last_hours, channel="INFO") + # WARNING + notify_telegram_warning(last_hours, channel="WARNING")