145 lines
5.9 KiB
Python
145 lines
5.9 KiB
Python
from django.utils import timezone
|
|
from django.utils.timezone import now, timedelta
|
|
from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
|
|
from django.db.models import Count
|
|
import requests
|
|
import os
|
|
|
|
def notify_telegram_info(last_hours, channel="INFO"):
|
|
start_date = timezone.now() - timedelta(hours=last_hours)
|
|
|
|
# Count the number of URLs grouped by status within the date range
|
|
urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
|
|
.values('status') \
|
|
.annotate(count=Count('id')) \
|
|
.order_by('status')
|
|
|
|
# Count the number of URLs grouped by source
|
|
urls_data_source = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_source__source') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_source__source')
|
|
|
|
# Count the number of URLs grouped by search
|
|
urls_data_search = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_search__search') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_search__search')
|
|
|
|
|
|
bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
|
|
chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
|
|
|
|
message = "During the last {} hours:\n".format(last_hours)
|
|
|
|
message += "\nURLs per status:\n"
|
|
for o in urls_data_status:
|
|
message += " {}: {}\n".format(o.get("status"), o.get("count"))
|
|
message += "\nURLs per source:\n"
|
|
for o in urls_data_source:
|
|
message += " {}: {}\n".format(o.get("id_source__source"), o.get("count"))
|
|
message += "\nURLs per search:\n"
|
|
for o in urls_data_search:
|
|
message += " {}: {}\n".format(o.get("id_search__search"), o.get("count"))
|
|
|
|
|
|
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
|
|
params = {
|
|
"chat_id": chat_id,
|
|
"text": message
|
|
}
|
|
|
|
# POST
|
|
response = requests.post(url, params=params)
|
|
|
|
|
|
def notify_telegram_warning(last_hours, channel="WARNING"):
|
|
# Message appending logic
|
|
message = ""
|
|
|
|
start_date = timezone.now() - timedelta(hours=last_hours)
|
|
|
|
# Count the number of URLs grouped by status within the date range
|
|
urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
|
|
.values('status') \
|
|
.annotate(count=Count('id')) \
|
|
.order_by('status')
|
|
|
|
# Build dictionary
|
|
urls_data_status_dict = {}
|
|
for o in urls_data_status:
|
|
# #STATUS
|
|
urls_data_status_dict[o.get("status")] = o.get("count")
|
|
# #TOTAL
|
|
urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count")
|
|
|
|
MINIMUM_URLS_THRESHOLD = 10
|
|
MINIMUM_PROCESSED_URLS_RATIO = 0.5
|
|
|
|
# Minimum amount of URLs
|
|
if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD):
|
|
message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
|
|
message += "\nURLs per status:\n"
|
|
for o in urls_data_status:
|
|
message += " {}: {}\n".format(o.get("status"), o.get("count"))
|
|
|
|
# Minimum ratio of processed raw urls
|
|
if (urls_data_status_dict.get("total") > 0):
|
|
if (urls_data_status_dict.get("raw") / urls_data_status_dict.get("total") < MINIMUM_PROCESSED_URLS_RATIO):
|
|
message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
|
|
message += "\nURLs per status:\n"
|
|
for o in urls_data_status:
|
|
message += " {}: {}\n".format(o.get("status"), o.get("count"))
|
|
|
|
|
|
# Count the number of URLs grouped by source
|
|
urls_data_source = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_source__source') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_source__source')
|
|
|
|
MINIMUM_SOURCES = 2
|
|
if (len(urls_data_source) < MINIMUM_SOURCES):
|
|
message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours)
|
|
message += "\nURLs per source:\n"
|
|
for o in urls_data_source:
|
|
message += " {}: {}\n".format(o.get("id_source__source"), o.get("count"))
|
|
|
|
"""
|
|
# TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect
|
|
# Count the number of URLs grouped by search
|
|
urls_data_search = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_search__search') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_search__search')
|
|
|
|
message += "\nURLs per search:\n"
|
|
for o in urls_data_search:
|
|
message += " {}: {}\n".format(o.get("id_search__search"), o.get("count"))
|
|
"""
|
|
|
|
# Valid message body?
|
|
if (message != ""):
|
|
bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
|
|
chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
|
|
|
|
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
|
|
params = {
|
|
"chat_id": chat_id,
|
|
"text": message
|
|
}
|
|
|
|
# POST
|
|
response = requests.post(url, params=params)
|
|
|
|
|
|
def notify_telegram(last_hours=12):
|
|
# INFO
|
|
notify_telegram_info(last_hours, channel="INFO")
|
|
# WARNING
|
|
notify_telegram_warning(last_hours, channel="WARNING")
|