from django.utils import timezone from django.utils.timezone import now, timedelta from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate from django.db.models import Count import requests import os def notify_telegram_info(last_hours, channel="INFO"): start_date = timezone.now() - timedelta(hours=last_hours) # Count the number of URLs grouped by status within the date range urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \ .values('status') \ .annotate(count=Count('id')) \ .order_by('status') # Count the number of URLs grouped by source urls_data_source = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_source__source') \ .annotate(count=Count('id_url')) \ .order_by('id_source__source') # Count the number of URLs grouped by search urls_data_search = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_search__search') \ .annotate(count=Count('id_url')) \ .order_by('id_search__search') bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "") chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "") message = "During the last {} hours:\n".format(last_hours) message += "\nURLs per status:\n" for o in urls_data_status: message += " {}: {}\n".format(o.get("status"), o.get("count")) message += "\nURLs per source:\n" for o in urls_data_source: message += " {}: {}\n".format(o.get("id_source__source"), o.get("count")) message += "\nURLs per search:\n" for o in urls_data_search: message += " {}: {}\n".format(o.get("id_search__search"), o.get("count")) url = f"https://api.telegram.org/bot{bot_token}/sendMessage" params = { "chat_id": chat_id, "text": message } # POST response = requests.post(url, params=params) def notify_telegram_warning(last_hours, channel="WARNING"): # Message appending logic message = "" start_date = timezone.now() - timedelta(hours=last_hours) # Count the number of URLs grouped by status within the date range urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \ .values('status') \ .annotate(count=Count('id')) \ .order_by('status') # Build dictionary urls_data_status_dict = {} for o in urls_data_status: # #STATUS urls_data_status_dict[o.get("status")] = o.get("count") # #TOTAL urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count") MINIMUM_URLS_THRESHOLD = 10 MINIMUM_PROCESSED_URLS_RATIO = 0.5 # Minimum amount of URLs if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD): message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total")) message += "\nURLs per status:\n" for o in urls_data_status: message += " {}: {}\n".format(o.get("status"), o.get("count")) # Minimum ratio of processed raw urls if (urls_data_status_dict.get("total") > 0): if (urls_data_status_dict.get("raw") / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO): message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total")) message += "\nURLs per status:\n" for o in urls_data_status: message += " {}: {}\n".format(o.get("status"), o.get("count")) # Count the number of URLs grouped by source urls_data_source = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_source__source') \ .annotate(count=Count('id_url')) \ .order_by('id_source__source') MINIMUM_SOURCES = 2 if (len(urls_data_source) < MINIMUM_SOURCES): message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours) message += "\nURLs per source:\n" for o in urls_data_source: message += " {}: {}\n".format(o.get("id_source__source"), o.get("count")) """ # TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect # Count the number of URLs grouped by search urls_data_search = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_search__search') \ .annotate(count=Count('id_url')) \ .order_by('id_search__search') message += "\nURLs per search:\n" for o in urls_data_search: message += " {}: {}\n".format(o.get("id_search__search"), o.get("count")) """ # Valid message body? if (message != ""): bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "") chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "") url = f"https://api.telegram.org/bot{bot_token}/sendMessage" params = { "chat_id": chat_id, "text": message } # POST response = requests.post(url, params=params) def notify_telegram(last_hours=12): # INFO notify_telegram_info(last_hours, channel="INFO") # WARNING notify_telegram_warning(last_hours, channel="WARNING")