From 80507739067e301055b8567cde58b0ba257ba196 Mon Sep 17 00:00:00 2001 From: Luciano Gervasoni Date: Fri, 21 Mar 2025 12:28:23 +0100 Subject: [PATCH] UrlSourceSearch model update, admin panel registration of objects --- 1-DB.ipynb | 102 ++++++++++++++++++++++++++++++++++++----- app_urls/api/admin.py | 9 ++++ app_urls/api/models.py | 16 ++++++- 3 files changed, 115 insertions(+), 12 deletions(-) diff --git a/1-DB.ipynb b/1-DB.ipynb index cc5bf8b..65e0993 100644 --- a/1-DB.ipynb +++ b/1-DB.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -11,16 +11,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "db_postgres\n", + "db_redis\n", + "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 1/0\n", + " ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n", + " ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.3s \u001b[0m\n", + " ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.3s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.4s \u001b[0m\n", + " ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.4s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n", + " \u001b[32m✔\u001b[0m Container db_redis \u001b[32mStarted\u001b[0m \u001b[34m0.4s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container db_postgres \u001b[32mStarted\u001b[0m \u001b[34m0.4s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n", + "\u001b[?25h" + ] + } + ], "source": [ "!docker rm -f db_postgres db_redis; docker compose -f docker/docker-compose.yml up -d ; sleep 5" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -137,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -189,9 +219,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\t urls\n", + "[]\n", + "\t urls_duplicate\n", + "[]\n", + "\t urls_source_search\n", + "[]\n", + "\t source\n", + "[]\n", + "\t search\n", + "[(1,\n", + " 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n", + " 'rss_feed'),\n", + " (2, 'www.breitbart.com', 'url_host'),\n", + " (3, 'child abuse', 'keyword_search')]\n", + "\t status_pattern_matching\n", + "[('.*youtube\\\\.com/.*', 50, 'invalid'),\n", + " ('.*tiktok\\\\.com/.*', 50, 'invalid'),\n", + " ('.*twitter\\\\.com/.*', 50, 'invalid'),\n", + " ('.*reddit\\\\.com/.*', 50, 'invalid'),\n", + " ('.*libreddit\\\\.de/.*', 50, 'invalid'),\n", + " ('.*radio\\\\.foxnews\\\\.com/.*', 50, 'invalid')]\n", + "\t url_content\n", + "[]\n" + ] + } + ], "source": [ "# Connect to an existing database\n", "with psycopg.connect(connection_info) as conn:\n", @@ -208,9 +268,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(1,\n", + " 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n", + " 'rss_feed'),\n", + " (2, 'www.breitbart.com', 'url_host'),\n", + " (3, 'child abuse', 'keyword_search')]\n" + ] + } + ], "source": [ "# Connect to an existing database\n", "with psycopg.connect(connection_info) as conn:\n", @@ -221,9 +293,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n" + ] + } + ], "source": [ "# Connect to an existing database\n", "with psycopg.connect(connection_info) as conn:\n", diff --git a/app_urls/api/admin.py b/app_urls/api/admin.py index 8c38f3f..3f186c6 100644 --- a/app_urls/api/admin.py +++ b/app_urls/api/admin.py @@ -1,3 +1,12 @@ from django.contrib import admin # Register your models here. +from .models import Search, Source, StatusPatternMatching, UrlContent, Urls, UrlsDuplicate, UrlsSourceSearch + +admin.site.register(Search) +admin.site.register(Source) +admin.site.register(StatusPatternMatching) +admin.site.register(UrlContent) +admin.site.register(Urls) +admin.site.register(UrlsDuplicate) +admin.site.register(UrlsSourceSearch) diff --git a/app_urls/api/models.py b/app_urls/api/models.py index 8e9a048..1e0babc 100644 --- a/app_urls/api/models.py +++ b/app_urls/api/models.py @@ -16,6 +16,8 @@ class Search(models.Model): managed = False db_table = 'search' + def __str__(self): + return "[{}] {}".format(self.type, self.search) class Source(models.Model): id = models.SmallAutoField(primary_key=True) @@ -25,6 +27,8 @@ class Source(models.Model): managed = False db_table = 'source' + def __str__(self): + return self.source class StatusPatternMatching(models.Model): pattern = models.TextField(primary_key=True) @@ -35,6 +39,8 @@ class StatusPatternMatching(models.Model): managed = False db_table = 'status_pattern_matching' + def __str__(self): + return "{} -> {} [Priority: {}]".format(self.pattern, self.status, self.priority) class UrlContent(models.Model): id_url = models.OneToOneField('Urls', models.DO_NOTHING, db_column='id_url', primary_key=True) @@ -57,7 +63,6 @@ class UrlContent(models.Model): managed = False db_table = 'url_content' - class Urls(models.Model): class STATUS_ENUM(models.TextChoices): RAW = "raw", "Raw" @@ -76,6 +81,9 @@ class Urls(models.Model): db_table = 'urls' ordering = ["-ts_fetch"] + def __str__(self): + return "{} {} {}".format(self.url, self.ts_fetch, self.status) + class UrlsDuplicate(models.Model): id_url_canonical = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url_canonical', primary_key=True) # The composite primary key (id_url_canonical, id_url_duplicated) found, that is not supported. The first column is selected. @@ -86,6 +94,9 @@ class UrlsDuplicate(models.Model): db_table = 'urls_duplicate' unique_together = (('id_url_canonical', 'id_url_duplicated'),) + def __str__(self): + return Urls(id=self.id_url_duplicated), Urls(id=self.id_url_canonical) + class UrlsSourceSearch(models.Model): id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True) # The composite primary key (id_url, id_source, id_search) found, that is not supported. The first column is selected. @@ -96,3 +107,6 @@ class UrlsSourceSearch(models.Model): managed = False db_table = 'urls_source_search' unique_together = (('id_url', 'id_source', 'id_search'),) + + def __str__(self): + return Urls(id=self.id_url), Source(id=self.id_source), Search(id=self.id_search)