from django.utils import timezone from django.utils.timezone import now, timedelta from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate from django.db.models import Count import requests import os import traceback from .logger import get_logger logger = get_logger() def notify_telegram_info(last_hours, channel="INFO"): try: start_date = timezone.now() - timedelta(hours=last_hours) # Count the number of URLs grouped by status within the date range urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \ .values('status') \ .annotate(count=Count('id')) \ .order_by('status') # Count the number of URLs grouped by source urls_data_source = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_source__source') \ .annotate(count=Count('id_url')) \ .order_by('id_source__source') # Count the number of URLs grouped by search urls_data_search = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_search__search') \ .annotate(count=Count('id_url')) \ .order_by('id_search__search') bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "") chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "") message = "During the last {} hours:\n".format(last_hours) message += "\nURLs per status:\n" for o in urls_data_status: message += " {}: {}\n".format(o.get("status"), o.get("count")) message += "\nURLs per source:\n" for o in urls_data_source: message += " {}: {}\n".format(o.get("id_source__source"), o.get("count")) message += "\nURLs per search:\n" for o in urls_data_search: message += " {}: {}\n".format(o.get("id_search__search"), o.get("count")) url = f"https://api.telegram.org/bot{bot_token}/sendMessage" params = { "chat_id": chat_id, "text": message } # POST response = requests.post(url, params=params) except Exception as e: logger.info("Exception while notifying status: {}\n{}".format(str(e), traceback.format_exc())) def notify_telegram_warning(last_hours, channel="WARNING"): try: # Message appending logic message = "" start_date = timezone.now() - timedelta(hours=last_hours) # Count the number of URLs grouped by status within the date range urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \ .values('status') \ .annotate(count=Count('id')) \ .order_by('status') # Build dictionary urls_data_status_dict = {} for o in urls_data_status: # #STATUS urls_data_status_dict[o.get("status")] = o.get("count") # #TOTAL urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count") MINIMUM_URLS_THRESHOLD = 10 MINIMUM_PROCESSED_URLS_RATIO = 0.5 # Minimum amount of URLs if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD): message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total")) message += "\nURLs per status:\n" for o in urls_data_status: message += " {}: {}\n".format(o.get("status"), o.get("count")) # Minimum ratio of processed raw urls if (urls_data_status_dict.get("total") > 0): if (urls_data_status_dict.get("raw", 0) / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO): message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total")) message += "\nURLs per status:\n" for o in urls_data_status: message += " {}: {}\n".format(o.get("status"), o.get("count")) # Count the number of URLs grouped by source urls_data_source = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_source__source') \ .annotate(count=Count('id_url')) \ .order_by('id_source__source') MINIMUM_SOURCES = 2 if (len(urls_data_source) < MINIMUM_SOURCES): message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours) message += "\nURLs per source:\n" for o in urls_data_source: message += " {}: {}\n".format(o.get("id_source__source"), o.get("count")) """ # TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect # Count the number of URLs grouped by search urls_data_search = UrlsSourceSearch.objects \ .filter(id_url__ts_fetch__gte=start_date) \ .values('id_search__search') \ .annotate(count=Count('id_url')) \ .order_by('id_search__search') message += "\nURLs per search:\n" for o in urls_data_search: message += " {}: {}\n".format(o.get("id_search__search"), o.get("count")) """ # Valid message body? if (message != ""): bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "") chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "") url = f"https://api.telegram.org/bot{bot_token}/sendMessage" params = { "chat_id": chat_id, "text": message } # POST response = requests.post(url, params=params) except Exception as e: logger.info("Exception while notifying status: {}\n{}".format(str(e)), traceback.format_exc()) def notify_telegram(last_hours=12): # INFO notify_telegram_info(last_hours, channel="INFO") # WARNING notify_telegram_warning(last_hours, channel="WARNING")