From 4ccff2bc024d1e3370d73db720cb2e1d081d0fa8 Mon Sep 17 00:00:00 2001 From: Luciano Gervasoni Date: Thu, 14 Aug 2025 15:06:53 +0200 Subject: [PATCH] Notify status task --- app_urls/fetcher/src/notifier.py | 57 +++++++++++++++++++++++ app_urls/fetcher/tasks.py | 78 +++----------------------------- app_urls/fetcher/urls.py | 1 - app_urls/fetcher/views.py | 2 +- app_urls/fetcher/views_base.py | 77 ------------------------------- app_urls/scheduled_tasks.json | 28 ++++++++++++ 6 files changed, 93 insertions(+), 150 deletions(-) create mode 100644 app_urls/fetcher/src/notifier.py diff --git a/app_urls/fetcher/src/notifier.py b/app_urls/fetcher/src/notifier.py new file mode 100644 index 0000000..4ec3532 --- /dev/null +++ b/app_urls/fetcher/src/notifier.py @@ -0,0 +1,57 @@ +from django.utils import timezone +from django.utils.timezone import now, timedelta +from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate +from django.db.models import Q, Count +import requests +import os + + +def notify_telegram(last_hours=24): + start_date = timezone.now() - timedelta(hours=last_hours) + + # Count the number of URLs grouped by status within the date range + urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \ + .values('status') \ + .annotate(count=Count('id')) \ + .order_by('status') + + # Count the number of URLs grouped by source + urls_data_source = UrlsSourceSearch.objects \ + .filter(id_url__ts_fetch__gte=start_date) \ + .values('id_source__source') \ + .annotate(count=Count('id_url')) \ + .order_by('id_source__source') + + # Count the number of URLs grouped by search + urls_data_search = UrlsSourceSearch.objects \ + .filter(id_url__ts_fetch__gte=start_date) \ + .values('id_search__search') \ + .annotate(count=Count('id_url')) \ + .order_by('id_search__search') + + + bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "") + chat_id = os.environ.get("TELEGRAM_CHAT_ID", "") + + + message = "During the last {} hours:\n".format(last_hours) + + message += "\nURLs per status:\n" + for o in urls_data_status: + message += " {}: {}\n".format(o.get("status"), o.get("count")) + message += "\nURLs per source:\n" + for o in urls_data_source: + message += " {}: {}\n".format(o.get("id_source__source"), o.get("count")) + message += "\nURLs per search:\n" + for o in urls_data_search: + message += " {}: {}\n".format(o.get("id_search__search"), o.get("count")) + + + url = f"https://api.telegram.org/bot{bot_token}/sendMessage" + params = { + "chat_id": chat_id, + "text": message + } + + # POST + response = requests.post(url, params=params) diff --git a/app_urls/fetcher/tasks.py b/app_urls/fetcher/tasks.py index 05f994f..8b34e17 100644 --- a/app_urls/fetcher/tasks.py +++ b/app_urls/fetcher/tasks.py @@ -7,6 +7,7 @@ from .src.fetch_missing_kids import FetchMissingKids from .src.fetch_selenium import FetchSeleniumSourceSearch from .src.db_utils import DB_Handler from .src.publisher import Publisher +from .src.notifications import notify_telegram from .src.logger import get_logger logger = get_logger() @@ -75,74 +76,9 @@ def clean_old_url_content(older_than_days=14): DB_Handler().clean_old_url_content(older_than_days=older_than_days) logger.info("Task completed: {}".format(task)) - -''' -@job('default') -def background_task(process_type: str): - logger.info("Task triggered: {}".format(process_type)) - - try: - if (process_type == "fetch_feeds"): - FetchFeeds().run() - elif (process_type == "fetch_parser"): - FetchParser().run() - elif (process_type == "fetch_search"): - FetchSearcher().run() - elif (process_type == "fetch_selenium_search"): - FetchSeleniumSourceSearch().run() - elif (process_type == "fetch_missingkids_all"): - FetchMissingKids().run(number_pages=-1) - - elif ("fetch_missingkids" in process_type): - # number_pages encoded in URL - try: - number_pages = int(process_type.split("_")[-1]) - except Exception as e: - number_pages = -1 - - FetchMissingKids().run(number_pages=number_pages) - - elif ("process_" in process_type): - # Batch size encoded in URL - try: - batch_size = int(process_type.split("_")[-1]) - except Exception as e: - batch_size = None - - # Task type - if ("process_raw_urls" in process_type): - DB_Handler().process_raw_urls(batch_size=batch_size) - elif ("process_error_urls" in process_type): - DB_Handler().process_error_urls(batch_size=batch_size) - elif ("process_missing_kids_urls" in process_type): - if ("process_missing_kids_urls_valid" in process_type): - DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only="valid") - elif ("process_missing_kids_urls_invalid" in process_type): - DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only="invalid") - elif ("process_missing_kids_urls_unknown" in process_type): - DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only="unknown") - else: - DB_Handler().process_missing_kids_urls(batch_size=batch_size) - - elif ("clean_old_url_content" in process_type ): - # Older than X days encoded in URL - try: - older_than_days = float(process_type.split("_")[-1]) - except Exception as e: - older_than_days = None - - DB_Handler().clean_old_url_content(older_than_days=older_than_days) - - elif ("publish" in process_type): - # Extract URL ID - url_id = process_type.split("_")[-1] - # Publish - Publisher().publish(url_id) - - else: - logger.info("Task unknown!: {}".format(process_type)) - - logger.info("Task completed: {}".format(process_type)) - except Exception as e: - logger.error(e) -''' \ No newline at end of file +@shared_task(queue='default') +def notify_status(): + task = "Notify status" + logger.info("Task triggered: {}".format(task)) + notify_telegram() + logger.info("Task completed: {}".format(task)) diff --git a/app_urls/fetcher/urls.py b/app_urls/fetcher/urls.py index ef207e0..69605fb 100644 --- a/app_urls/fetcher/urls.py +++ b/app_urls/fetcher/urls.py @@ -4,7 +4,6 @@ from . import views urlpatterns = [ path('', views.link_list, name='link_list'), # - path('notify_status', views.notify_status, name='notify_status'), path('logs/database', views.log_db, name='log_db'), path('logs/', views.logs, name='logs'), # diff --git a/app_urls/fetcher/views.py b/app_urls/fetcher/views.py index afd803e..be15d61 100644 --- a/app_urls/fetcher/views.py +++ b/app_urls/fetcher/views.py @@ -1,4 +1,4 @@ -from .views_base import link_list, logs, log_db, notify_status #, trigger_task, +from .views_base import link_list, logs, log_db #, trigger_task, from django.core.paginator import Paginator from django.shortcuts import render, get_object_or_404 diff --git a/app_urls/fetcher/views_base.py b/app_urls/fetcher/views_base.py index 69f6120..d04d0cd 100644 --- a/app_urls/fetcher/views_base.py +++ b/app_urls/fetcher/views_base.py @@ -1,16 +1,7 @@ import os from django.http import JsonResponse, HttpResponse from django.db import connection -import requests import os -from django.utils import timezone -from django.utils.timezone import now, timedelta -from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate -from django.db.models import Q, Count - - -from .src.logger import get_logger -logger = get_logger() #################################################################################################### """ @@ -82,71 +73,3 @@ def log_db(request): return HttpResponse( "\n".join([str(e) for e in r]) ) #################################################################################################### - -def notify_status(request): - last_hours = 24 - start_date = timezone.now() - timedelta(hours=last_hours) - - # Count the number of URLs grouped by status within the date range - urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \ - .values('status') \ - .annotate(count=Count('id')) \ - .order_by('status') - - # Count the number of URLs grouped by source - urls_data_source = UrlsSourceSearch.objects \ - .filter(id_url__ts_fetch__gte=start_date) \ - .values('id_source__source') \ - .annotate(count=Count('id_url')) \ - .order_by('id_source__source') - - # Count the number of URLs grouped by search - urls_data_search = UrlsSourceSearch.objects \ - .filter(id_url__ts_fetch__gte=start_date) \ - .values('id_search__search') \ - .annotate(count=Count('id_url')) \ - .order_by('id_search__search') - - - bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "") - chat_id = os.environ.get("TELEGRAM_CHAT_ID", "") - - - message = "During the last {} hours:\n".format(last_hours) - - message += "\nURLs per status:\n" - for o in urls_data_status: - message += " {}: {}\n".format(o.get("status"), o.get("count")) - message += "\nURLs per source:\n" - for o in urls_data_source: - message += " {}: {}\n".format(o.get("id_source__source"), o.get("count")) - message += "\nURLs per search:\n" - for o in urls_data_search: - message += " {}: {}\n".format(o.get("id_search__search"), o.get("count")) - - - url = f"https://api.telegram.org/bot{bot_token}/sendMessage" - params = { - "chat_id": chat_id, - "text": message - } - - logger.info("NOTIFY STATUS: {}".format(str(url))) - logger.info("NOTIFY STATUS: {}".format(str(params))) - - - # POST - # response = requests.post(url, params={"chat_id": chat_id, "text": "Hola!"}) - - response = requests.post(url, params=params) - # print(response.json()) # Check the response - - """ - import json - from django.forms.models import model_to_dict - - # readable_ = [model_to_dict(obj) for obj in urls_data_status] - response = requests.post(url, params={"chat_id": chat_id, "text": str(readable)}) - response = requests.post(url, params={"chat_id": chat_id, "text": str(readable_)}) - """ - return HttpResponse( "\n".join([str(e) for e in message]) ) \ No newline at end of file diff --git a/app_urls/scheduled_tasks.json b/app_urls/scheduled_tasks.json index ff31f07..3712ab0 100644 --- a/app_urls/scheduled_tasks.json +++ b/app_urls/scheduled_tasks.json @@ -391,6 +391,34 @@ "description": "" } }, + { + "model": "django_celery_beat.periodictask", + "pk": 4, + "fields": { + "name": "Notify status", + "task": "fetcher.tasks.notify_status", + "interval": 3, + "crontab": null, + "solar": null, + "clocked": null, + "args": "[]", + "kwargs": "{}", + "queue": null, + "exchange": null, + "routing_key": null, + "headers": "{}", + "priority": null, + "expires": null, + "expire_seconds": null, + "one_off": false, + "start_time": null, + "enabled": true, + "last_run_at": null, + "total_run_count": 0, + "date_changed": "2025-07-17T16:12:44.533Z", + "description": "" + } + }, { "model": "django_celery_beat.intervalschedule", "pk": 1,