Notify status task

This commit is contained in:
Luciano Gervasoni
2025-08-14 15:06:53 +02:00
parent ffb0f85475
commit 4ccff2bc02
6 changed files with 93 additions and 150 deletions

View File

@@ -0,0 +1,57 @@
from django.utils import timezone
from django.utils.timezone import now, timedelta
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
from django.db.models import Q, Count
import requests
import os
def notify_telegram(last_hours=24):
start_date = timezone.now() - timedelta(hours=last_hours)
# Count the number of URLs grouped by status within the date range
urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
.values('status') \
.annotate(count=Count('id')) \
.order_by('status')
# Count the number of URLs grouped by source
urls_data_source = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_source__source') \
.annotate(count=Count('id_url')) \
.order_by('id_source__source')
# Count the number of URLs grouped by search
urls_data_search = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_search__search') \
.annotate(count=Count('id_url')) \
.order_by('id_search__search')
bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "")
chat_id = os.environ.get("TELEGRAM_CHAT_ID", "")
message = "During the last {} hours:\n".format(last_hours)
message += "\nURLs per status:\n"
for o in urls_data_status:
message += " {}: {}\n".format(o.get("status"), o.get("count"))
message += "\nURLs per source:\n"
for o in urls_data_source:
message += " {}: {}\n".format(o.get("id_source__source"), o.get("count"))
message += "\nURLs per search:\n"
for o in urls_data_search:
message += " {}: {}\n".format(o.get("id_search__search"), o.get("count"))
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
params = {
"chat_id": chat_id,
"text": message
}
# POST
response = requests.post(url, params=params)

View File

@@ -7,6 +7,7 @@ from .src.fetch_missing_kids import FetchMissingKids
from .src.fetch_selenium import FetchSeleniumSourceSearch
from .src.db_utils import DB_Handler
from .src.publisher import Publisher
from .src.notifications import notify_telegram
from .src.logger import get_logger
logger = get_logger()
@@ -75,74 +76,9 @@ def clean_old_url_content(older_than_days=14):
DB_Handler().clean_old_url_content(older_than_days=older_than_days)
logger.info("Task completed: {}".format(task))
'''
@job('default')
def background_task(process_type: str):
logger.info("Task triggered: {}".format(process_type))
try:
if (process_type == "fetch_feeds"):
FetchFeeds().run()
elif (process_type == "fetch_parser"):
FetchParser().run()
elif (process_type == "fetch_search"):
FetchSearcher().run()
elif (process_type == "fetch_selenium_search"):
FetchSeleniumSourceSearch().run()
elif (process_type == "fetch_missingkids_all"):
FetchMissingKids().run(number_pages=-1)
elif ("fetch_missingkids" in process_type):
# number_pages encoded in URL
try:
number_pages = int(process_type.split("_")[-1])
except Exception as e:
number_pages = -1
FetchMissingKids().run(number_pages=number_pages)
elif ("process_" in process_type):
# Batch size encoded in URL
try:
batch_size = int(process_type.split("_")[-1])
except Exception as e:
batch_size = None
# Task type
if ("process_raw_urls" in process_type):
DB_Handler().process_raw_urls(batch_size=batch_size)
elif ("process_error_urls" in process_type):
DB_Handler().process_error_urls(batch_size=batch_size)
elif ("process_missing_kids_urls" in process_type):
if ("process_missing_kids_urls_valid" in process_type):
DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only="valid")
elif ("process_missing_kids_urls_invalid" in process_type):
DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only="invalid")
elif ("process_missing_kids_urls_unknown" in process_type):
DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only="unknown")
else:
DB_Handler().process_missing_kids_urls(batch_size=batch_size)
elif ("clean_old_url_content" in process_type ):
# Older than X days encoded in URL
try:
older_than_days = float(process_type.split("_")[-1])
except Exception as e:
older_than_days = None
DB_Handler().clean_old_url_content(older_than_days=older_than_days)
elif ("publish" in process_type):
# Extract URL ID
url_id = process_type.split("_")[-1]
# Publish
Publisher().publish(url_id)
else:
logger.info("Task unknown!: {}".format(process_type))
logger.info("Task completed: {}".format(process_type))
except Exception as e:
logger.error(e)
'''
@shared_task(queue='default')
def notify_status():
task = "Notify status"
logger.info("Task triggered: {}".format(task))
notify_telegram()
logger.info("Task completed: {}".format(task))

View File

@@ -4,7 +4,6 @@ from . import views
urlpatterns = [
path('', views.link_list, name='link_list'),
#
path('notify_status', views.notify_status, name='notify_status'),
path('logs/database', views.log_db, name='log_db'),
path('logs/<str:log_type>', views.logs, name='logs'),
#

View File

@@ -1,4 +1,4 @@
from .views_base import link_list, logs, log_db, notify_status #, trigger_task,
from .views_base import link_list, logs, log_db #, trigger_task,
from django.core.paginator import Paginator
from django.shortcuts import render, get_object_or_404

View File

@@ -1,16 +1,7 @@
import os
from django.http import JsonResponse, HttpResponse
from django.db import connection
import requests
import os
from django.utils import timezone
from django.utils.timezone import now, timedelta
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
from django.db.models import Q, Count
from .src.logger import get_logger
logger = get_logger()
####################################################################################################
"""
@@ -82,71 +73,3 @@ def log_db(request):
return HttpResponse( "\n".join([str(e) for e in r]) )
####################################################################################################
def notify_status(request):
last_hours = 24
start_date = timezone.now() - timedelta(hours=last_hours)
# Count the number of URLs grouped by status within the date range
urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
.values('status') \
.annotate(count=Count('id')) \
.order_by('status')
# Count the number of URLs grouped by source
urls_data_source = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_source__source') \
.annotate(count=Count('id_url')) \
.order_by('id_source__source')
# Count the number of URLs grouped by search
urls_data_search = UrlsSourceSearch.objects \
.filter(id_url__ts_fetch__gte=start_date) \
.values('id_search__search') \
.annotate(count=Count('id_url')) \
.order_by('id_search__search')
bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "")
chat_id = os.environ.get("TELEGRAM_CHAT_ID", "")
message = "During the last {} hours:\n".format(last_hours)
message += "\nURLs per status:\n"
for o in urls_data_status:
message += " {}: {}\n".format(o.get("status"), o.get("count"))
message += "\nURLs per source:\n"
for o in urls_data_source:
message += " {}: {}\n".format(o.get("id_source__source"), o.get("count"))
message += "\nURLs per search:\n"
for o in urls_data_search:
message += " {}: {}\n".format(o.get("id_search__search"), o.get("count"))
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
params = {
"chat_id": chat_id,
"text": message
}
logger.info("NOTIFY STATUS: {}".format(str(url)))
logger.info("NOTIFY STATUS: {}".format(str(params)))
# POST
# response = requests.post(url, params={"chat_id": chat_id, "text": "Hola!"})
response = requests.post(url, params=params)
# print(response.json()) # Check the response
"""
import json
from django.forms.models import model_to_dict
# readable_ = [model_to_dict(obj) for obj in urls_data_status]
response = requests.post(url, params={"chat_id": chat_id, "text": str(readable)})
response = requests.post(url, params={"chat_id": chat_id, "text": str(readable_)})
"""
return HttpResponse( "\n".join([str(e) for e in message]) )

View File

@@ -391,6 +391,34 @@
"description": ""
}
},
{
"model": "django_celery_beat.periodictask",
"pk": 4,
"fields": {
"name": "Notify status",
"task": "fetcher.tasks.notify_status",
"interval": 3,
"crontab": null,
"solar": null,
"clocked": null,
"args": "[]",
"kwargs": "{}",
"queue": null,
"exchange": null,
"routing_key": null,
"headers": "{}",
"priority": null,
"expires": null,
"expire_seconds": null,
"one_off": false,
"start_time": null,
"enabled": true,
"last_run_at": null,
"total_run_count": 0,
"date_changed": "2025-07-17T16:12:44.533Z",
"description": ""
}
},
{
"model": "django_celery_beat.intervalschedule",
"pk": 1,