Files
matitos_news/app_urls/fetcher/src/notifier.py
Luciano Gervasoni 2f035a4222 Notifier fix
2025-10-14 12:23:05 +02:00

154 lines
6.7 KiB
Python

from django.utils import timezone
from django.utils.timezone import now, timedelta
from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
from django.db.models import Count
import requests
import os
import traceback
from .logger import get_logger
logger = get_logger()
def notify_telegram_info(last_hours, channel="INFO"):
try:
start_date = timezone.now() - timedelta(hours=last_hours)
# Count the number of URLs grouped by status within the date range
urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
.values('status') \
.annotate(count=Count('id')) \
.order_by('status')
# Count the number of URLs grouped by source
urls_data_source = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_source__source') \
.annotate(count=Count('id_url')) \
.order_by('id_source__source')
# Count the number of URLs grouped by search
urls_data_search = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_search__search') \
.annotate(count=Count('id_url')) \
.order_by('id_search__search')
bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
message = "During the last {} hours:\n".format(last_hours)
message += "\nURLs per status:\n"
for o in urls_data_status:
message += " {}: {}\n".format(o.get("status"), o.get("count"))
message += "\nURLs per source:\n"
for o in urls_data_source:
message += " {}: {}\n".format(o.get("id_source__source"), o.get("count"))
message += "\nURLs per search:\n"
for o in urls_data_search:
message += " {}: {}\n".format(o.get("id_search__search"), o.get("count"))
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
params = {
"chat_id": chat_id,
"text": message
}
# POST
response = requests.post(url, params=params)
except Exception as e:
logger.info("Exception while notifying status: {}\n{}".format(str(e), traceback.format_exc()))
def notify_telegram_warning(last_hours, channel="WARNING"):
try:
# Message appending logic
message = ""
start_date = timezone.now() - timedelta(hours=last_hours)
# Count the number of URLs grouped by status within the date range
urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
.values('status') \
.annotate(count=Count('id')) \
.order_by('status')
# Build dictionary
urls_data_status_dict = {}
for o in urls_data_status:
# #STATUS
urls_data_status_dict[o.get("status")] = o.get("count")
# #TOTAL
urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count")
MINIMUM_URLS_THRESHOLD = 10
MINIMUM_PROCESSED_URLS_RATIO = 0.5
# Minimum amount of URLs
if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD):
message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
message += "\nURLs per status:\n"
for o in urls_data_status:
message += " {}: {}\n".format(o.get("status"), o.get("count"))
# Minimum ratio of processed raw urls
if (urls_data_status_dict.get("total") > 0):
if (urls_data_status_dict.get("raw", 0) / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO):
message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
message += "\nURLs per status:\n"
for o in urls_data_status:
message += " {}: {}\n".format(o.get("status"), o.get("count"))
# Count the number of URLs grouped by source
urls_data_source = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_source__source') \
.annotate(count=Count('id_url')) \
.order_by('id_source__source')
MINIMUM_SOURCES = 2
if (len(urls_data_source) < MINIMUM_SOURCES):
message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours)
message += "\nURLs per source:\n"
for o in urls_data_source:
message += " {}: {}\n".format(o.get("id_source__source"), o.get("count"))
"""
# TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect
# Count the number of URLs grouped by search
urls_data_search = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_search__search') \
.annotate(count=Count('id_url')) \
.order_by('id_search__search')
message += "\nURLs per search:\n"
for o in urls_data_search:
message += " {}: {}\n".format(o.get("id_search__search"), o.get("count"))
"""
# Valid message body?
if (message != ""):
bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
params = {
"chat_id": chat_id,
"text": message
}
# POST
response = requests.post(url, params=params)
except Exception as e:
logger.info("Exception while notifying status: {}\n{}".format(str(e)), traceback.format_exc())
def notify_telegram(last_hours=12):
# INFO
notify_telegram_info(last_hours, channel="INFO")
# WARNING
notify_telegram_warning(last_hours, channel="WARNING")