matitos_news/app_urls/fetcher/src/notifier.py

from django.utils import timezone
from django.utils.timezone import now, timedelta
from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
from django.db.models import Count
import requests
import os

def notify_telegram_info(last_hours, channel="INFO"):
    start_date = timezone.now() - timedelta(hours=last_hours)

    # Count the number of URLs grouped by status within the date range
    urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
                            .values('status') \
                            .annotate(count=Count('id')) \
                            .order_by('status')

    # Count the number of URLs grouped by source
    urls_data_source = UrlsSourceSearch.objects \
                                .filter(id_url__ts_fetch__gte=start_date) \
                                .values('id_source__source') \
                                .annotate(count=Count('id_url')) \
                                .order_by('id_source__source')

    # Count the number of URLs grouped by search
    urls_data_search = UrlsSourceSearch.objects \
                                .filter(id_url__ts_fetch__gte=start_date) \
                                .values('id_search__search') \
                                .annotate(count=Count('id_url')) \
                                .order_by('id_search__search')


    bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
    chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")

    message = "During the last {} hours:\n".format(last_hours)

    message += "\nURLs per status:\n"
    for o in urls_data_status:
        message += "  {}: {}\n".format(o.get("status"), o.get("count"))
    message += "\nURLs per source:\n"
    for o in urls_data_source:
        message += "  {}: {}\n".format(o.get("id_source__source"), o.get("count"))
    message += "\nURLs per search:\n"
    for o in urls_data_search:
        message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))


    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    params = {
        "chat_id": chat_id,
        "text": message
    }

    # POST
    response = requests.post(url, params=params)


def notify_telegram_warning(last_hours, channel="WARNING"):
    # Message appending logic
    message = ""

    start_date = timezone.now() - timedelta(hours=last_hours)

    # Count the number of URLs grouped by status within the date range
    urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
                            .values('status') \
                            .annotate(count=Count('id')) \
                            .order_by('status')

    # Build dictionary
    urls_data_status_dict = {}
    for o in urls_data_status:
        # #STATUS
        urls_data_status_dict[o.get("status")] = o.get("count")
        # #TOTAL
        urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count")

    MINIMUM_URLS_THRESHOLD = 10
    MINIMUM_PROCESSED_URLS_RATIO = 0.5

    # Minimum amount of URLs
    if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD):
        message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
        message += "\nURLs per status:\n"
        for o in urls_data_status:
            message += "  {}: {}\n".format(o.get("status"), o.get("count"))

    # Minimum ratio of processed raw urls
    if (urls_data_status_dict.get("total") > 0):
        if (urls_data_status_dict.get("raw") / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO):
            message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
            message += "\nURLs per status:\n"
            for o in urls_data_status:
                message += "  {}: {}\n".format(o.get("status"), o.get("count"))


    # Count the number of URLs grouped by source
    urls_data_source = UrlsSourceSearch.objects \
                                .filter(id_url__ts_fetch__gte=start_date) \
                                .values('id_source__source') \
                                .annotate(count=Count('id_url')) \
                                .order_by('id_source__source')

    MINIMUM_SOURCES = 2
    if (len(urls_data_source) < MINIMUM_SOURCES):
        message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours)
        message += "\nURLs per source:\n"
        for o in urls_data_source:
            message += "  {}: {}\n".format(o.get("id_source__source"), o.get("count"))

    """
    # TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect
    # Count the number of URLs grouped by search
    urls_data_search = UrlsSourceSearch.objects \
                                .filter(id_url__ts_fetch__gte=start_date) \
                                .values('id_search__search') \
                                .annotate(count=Count('id_url')) \
                                .order_by('id_search__search')

    message += "\nURLs per search:\n"
    for o in urls_data_search:
        message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))
    """

    # Valid message body?
    if (message != ""):
        bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
        chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")

        url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
        params = {
            "chat_id": chat_id,
            "text": message
        }

        # POST
        response = requests.post(url, params=params)


def notify_telegram(last_hours=12):
    # INFO
    notify_telegram_info(last_hours, channel="INFO")
    # WARNING
    notify_telegram_warning(last_hours, channel="WARNING")