From f44b784715a1ac98899c1443ad520533fdc26f00 Mon Sep 17 00:00:00 2001
From: Luciano Gervasoni <luciano.gervasoni@protonmail.com>
Date: Tue, 9 Sep 2025 22:06:23 +0200
Subject: [PATCH] Notifications, info and warning, try catch

---
 app_urls/fetcher/src/notifier.py | 228 ++++++++++++++++---------------
 1 file changed, 118 insertions(+), 110 deletions(-)

diff --git a/app_urls/fetcher/src/notifier.py b/app_urls/fetcher/src/notifier.py
index d1a4913..86d245a 100644
--- a/app_urls/fetcher/src/notifier.py
+++ b/app_urls/fetcher/src/notifier.py
@@ -4,129 +4,50 @@ from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDup
 from django.db.models import Count
 import requests
 import os
+from .logger import get_logger
+logger = get_logger()
 
 def notify_telegram_info(last_hours, channel="INFO"):
-    start_date = timezone.now() - timedelta(hours=last_hours)
-    
-    # Count the number of URLs grouped by status within the date range
-    urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
-                            .values('status') \
-                            .annotate(count=Count('id')) \
-                            .order_by('status')
-    
-    # Count the number of URLs grouped by source
-    urls_data_source = UrlsSourceSearch.objects \
-                                .filter(id_url__ts_fetch__gte=start_date) \
-                                .values('id_source__source') \
-                                .annotate(count=Count('id_url')) \
-                                .order_by('id_source__source')
-    
-    # Count the number of URLs grouped by search
-    urls_data_search = UrlsSourceSearch.objects \
-                                .filter(id_url__ts_fetch__gte=start_date) \
-                                .values('id_search__search') \
-                                .annotate(count=Count('id_url')) \
-                                .order_by('id_search__search')
+    try:
+        start_date = timezone.now() - timedelta(hours=last_hours)
+        
+        # Count the number of URLs grouped by status within the date range
+        urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
+                                .values('status') \
+                                .annotate(count=Count('id')) \
+                                .order_by('status')
+        
+        # Count the number of URLs grouped by source
+        urls_data_source = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_source__source') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_source__source')
+        
+        # Count the number of URLs grouped by search
+        urls_data_search = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_search__search') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_search__search')
 
 
-    bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
-    chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
+        bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
+        chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
 
-    message = "During the last {} hours:\n".format(last_hours)
-    
-    message += "\nURLs per status:\n"
-    for o in urls_data_status:
-        message += "  {}: {}\n".format(o.get("status"), o.get("count"))
-    message += "\nURLs per source:\n"
-    for o in urls_data_source:
-        message += "  {}: {}\n".format(o.get("id_source__source"), o.get("count"))
-    message += "\nURLs per search:\n"
-    for o in urls_data_search:
-        message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))
-
-
-    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
-    params = {
-        "chat_id": chat_id,
-        "text": message
-    }
-
-    # POST
-    response = requests.post(url, params=params)
-
-
-def notify_telegram_warning(last_hours, channel="WARNING"):
-    # Message appending logic
-    message = ""
-
-    start_date = timezone.now() - timedelta(hours=last_hours)
-    
-    # Count the number of URLs grouped by status within the date range
-    urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
-                            .values('status') \
-                            .annotate(count=Count('id')) \
-                            .order_by('status')
-    
-    # Build dictionary
-    urls_data_status_dict = {}
-    for o in urls_data_status:
-        # #STATUS
-        urls_data_status_dict[o.get("status")] = o.get("count")
-        # #TOTAL
-        urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count")
-
-    MINIMUM_URLS_THRESHOLD = 10
-    MINIMUM_PROCESSED_URLS_RATIO = 0.5
-
-    # Minimum amount of URLs
-    if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD):
-        message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
+        message = "During the last {} hours:\n".format(last_hours)
+        
         message += "\nURLs per status:\n"
         for o in urls_data_status:
             message += "  {}: {}\n".format(o.get("status"), o.get("count"))
-    
-    # Minimum ratio of processed raw urls
-    if (urls_data_status_dict.get("total") > 0):
-        if (urls_data_status_dict.get("raw") / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO):
-            message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
-            message += "\nURLs per status:\n"
-            for o in urls_data_status:
-                message += "  {}: {}\n".format(o.get("status"), o.get("count"))
-
-
-    # Count the number of URLs grouped by source
-    urls_data_source = UrlsSourceSearch.objects \
-                                .filter(id_url__ts_fetch__gte=start_date) \
-                                .values('id_source__source') \
-                                .annotate(count=Count('id_url')) \
-                                .order_by('id_source__source')
-    
-    MINIMUM_SOURCES = 2
-    if (len(urls_data_source) < MINIMUM_SOURCES):
-        message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours)
         message += "\nURLs per source:\n"
         for o in urls_data_source:
             message += "  {}: {}\n".format(o.get("id_source__source"), o.get("count"))
+        message += "\nURLs per search:\n"
+        for o in urls_data_search:
+            message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))
 
-    """
-    # TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect
-    # Count the number of URLs grouped by search
-    urls_data_search = UrlsSourceSearch.objects \
-                                .filter(id_url__ts_fetch__gte=start_date) \
-                                .values('id_search__search') \
-                                .annotate(count=Count('id_url')) \
-                                .order_by('id_search__search')
 
-    message += "\nURLs per search:\n"
-    for o in urls_data_search:
-        message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))
-    """
-    
-    # Valid message body?
-    if (message != ""):
-        bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
-        chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
-        
         url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
         params = {
             "chat_id": chat_id,
@@ -135,6 +56,93 @@ def notify_telegram_warning(last_hours, channel="WARNING"):
 
         # POST
         response = requests.post(url, params=params)
+    except Exception as e:
+        logger.info("Exception while notifying status: {}".format(str(e)))
+
+
+def notify_telegram_warning(last_hours, channel="WARNING"):
+    try:
+        # Message appending logic
+        message = ""
+
+        start_date = timezone.now() - timedelta(hours=last_hours)
+        
+        # Count the number of URLs grouped by status within the date range
+        urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
+                                .values('status') \
+                                .annotate(count=Count('id')) \
+                                .order_by('status')
+        
+        # Build dictionary
+        urls_data_status_dict = {}
+        for o in urls_data_status:
+            # #STATUS
+            urls_data_status_dict[o.get("status")] = o.get("count")
+            # #TOTAL
+            urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count")
+
+        MINIMUM_URLS_THRESHOLD = 10
+        MINIMUM_PROCESSED_URLS_RATIO = 0.5
+
+        # Minimum amount of URLs
+        if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD):
+            message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
+            message += "\nURLs per status:\n"
+            for o in urls_data_status:
+                message += "  {}: {}\n".format(o.get("status"), o.get("count"))
+        
+        # Minimum ratio of processed raw urls
+        if (urls_data_status_dict.get("total") > 0):
+            if (urls_data_status_dict.get("raw") / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO):
+                message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
+                message += "\nURLs per status:\n"
+                for o in urls_data_status:
+                    message += "  {}: {}\n".format(o.get("status"), o.get("count"))
+
+
+        # Count the number of URLs grouped by source
+        urls_data_source = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_source__source') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_source__source')
+        
+        MINIMUM_SOURCES = 2
+        if (len(urls_data_source) < MINIMUM_SOURCES):
+            message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours)
+            message += "\nURLs per source:\n"
+            for o in urls_data_source:
+                message += "  {}: {}\n".format(o.get("id_source__source"), o.get("count"))
+
+        """
+        # TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect
+        # Count the number of URLs grouped by search
+        urls_data_search = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_search__search') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_search__search')
+
+        message += "\nURLs per search:\n"
+        for o in urls_data_search:
+            message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))
+        """
+        
+        # Valid message body?
+        if (message != ""):
+            bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
+            chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
+            
+            url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+            params = {
+                "chat_id": chat_id,
+                "text": message
+            }
+
+            # POST
+            response = requests.post(url, params=params)
+    except Exception as e:
+        logger.info("Exception while notifying status: {}".format(str(e)))
 
 
 def notify_telegram(last_hours=12):