151 lines
5.8 KiB
Python
151 lines
5.8 KiB
Python
import os
|
|
from django.http import JsonResponse, HttpResponse
|
|
from django.db import connection
|
|
import requests
|
|
import os
|
|
from django.utils import timezone
|
|
from django.utils.timezone import now, timedelta
|
|
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
|
|
from django.db.models import Q, Count
|
|
|
|
|
|
from .src.logger import get_logger
|
|
logger = get_logger()
|
|
|
|
####################################################################################################
|
|
"""
|
|
### from .tasks import background_task
|
|
|
|
def trigger_task(request, task):
|
|
# Enqueue function in "default" queue
|
|
background_task.delay(task)
|
|
return JsonResponse({"message": "Task has been enqueued!", "task": task})
|
|
"""
|
|
|
|
def link_list(request):
|
|
# Base URL path
|
|
app_url = request.build_absolute_uri()
|
|
# Tasks
|
|
links_fetch = ["fetch_feeds", "fetch_parser", "fetch_search", "fetch_missingkids_5", "fetch_missingkids_all", "fetch_selenium_search"]
|
|
links_process = ["process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_valid_all", "process_missing_kids_urls_invalid_all", "process_missing_kids_urls_unknown_all", "process_missing_kids_urls_all", "clean_old_url_content_60"]
|
|
# List of links
|
|
list_links = \
|
|
[ os.path.join(app_url, "admin"), os.path.join(app_url, "urls"), os.path.join(app_url, "notify_status") ] + \
|
|
[ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning", "server", "beat", "worker_default", "worker_low"] ] #+ \
|
|
#[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
|
|
|
|
# Links tuple
|
|
links = [(l, l) for l in list_links]
|
|
# HTML
|
|
html = "<html><head><title>Links</title></head><body><h1>Links</h1><ul>"
|
|
for text, url in links:
|
|
html += f'<li><a href="{url}" target="_blank">{text}</a></li>'
|
|
html += "</ul></body></html>"
|
|
|
|
return HttpResponse(html)
|
|
|
|
|
|
####################################################################################################
|
|
def logs(request, log_type):
|
|
# Capture output: python manage.py rqstats
|
|
try:
|
|
with open( os.path.join( os.getenv("PATH_LOGS_DIRECTORY", "logs"), "{}.log".format(log_type) ), "r") as f:
|
|
file_content = f.read()
|
|
except Exception as e:
|
|
file_content = "Error reading logs for log type :{}".format(log_type)
|
|
return HttpResponse(file_content, content_type="text/plain")
|
|
|
|
####################################################################################################
|
|
def log_db(request):
|
|
with connection.cursor() as cursor:
|
|
# Create URLs table
|
|
r = cursor.execute("""
|
|
SELECT
|
|
relname AS "relation",
|
|
pg_size_pretty (
|
|
pg_total_relation_size (C .oid)
|
|
) AS "total_size"
|
|
FROM
|
|
pg_class C
|
|
LEFT JOIN pg_namespace N ON (N.oid = C .relnamespace)
|
|
WHERE
|
|
nspname NOT IN (
|
|
'pg_catalog',
|
|
'information_schema'
|
|
)
|
|
AND C .relkind <> 'i'
|
|
AND nspname !~ '^pg_toast'
|
|
ORDER BY
|
|
pg_total_relation_size (C .oid) DESC
|
|
LIMIT 100;
|
|
""").fetchall()
|
|
return HttpResponse( "\n".join([str(e) for e in r]) )
|
|
|
|
####################################################################################################
|
|
|
|
def notify_status(request):
|
|
last_hours = 24
|
|
start_date = timezone.now() - timedelta(hours=last_hours)
|
|
|
|
# Count the number of URLs grouped by status within the date range
|
|
urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
|
|
.values('status') \
|
|
.annotate(count=Count('id')) \
|
|
.order_by('status')
|
|
|
|
# Count the number of URLs grouped by source
|
|
urls_data_source = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_source__source') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_source__source')
|
|
|
|
# Count the number of URLs grouped by search
|
|
urls_data_search = UrlsSourceSearch.objects \
|
|
.filter(id_url__ts_fetch__gte=start_date) \
|
|
.values('id_search__search') \
|
|
.annotate(count=Count('id_url')) \
|
|
.order_by('id_search__search')
|
|
|
|
|
|
bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "")
|
|
chat_id = os.environ.get("TELEGRAM_CHAT_ID", "")
|
|
|
|
|
|
message = "During the last {} hours:\n"
|
|
|
|
message = "URLs per status:\n"
|
|
for o in urls_data_status:
|
|
message += " {}".format(o)
|
|
message = "URLs per source:\n"
|
|
for o in urls_data_source:
|
|
message += " {}".format(o)
|
|
message = "URLs per search:\n"
|
|
for o in urls_data_search:
|
|
message += " {}".format(o)
|
|
|
|
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
|
|
params = {
|
|
"chat_id": chat_id,
|
|
"text": message
|
|
}
|
|
|
|
logger.info("NOTIFY STATUS: {}".format(str(url)))
|
|
logger.info("NOTIFY STATUS: {}".format(str(params)))
|
|
|
|
|
|
# POST
|
|
# response = requests.post(url, params={"chat_id": chat_id, "text": "Hola!"})
|
|
|
|
response = requests.post(url, params=params)
|
|
# print(response.json()) # Check the response
|
|
|
|
"""
|
|
import json
|
|
from django.forms.models import model_to_dict
|
|
|
|
# readable_ = [model_to_dict(obj) for obj in urls_data_status]
|
|
response = requests.post(url, params={"chat_id": chat_id, "text": str(readable)})
|
|
response = requests.post(url, params={"chat_id": chat_id, "text": str(readable_)})
|
|
"""
|
|
return HttpResponse( "\n".join([str(e) for e in message]) ) |