UrlSourceSearch model update, admin panel registration of objects

This commit is contained in:
Luciano Gervasoni
2025-03-21 12:28:23 +01:00
parent f84c7729f8
commit 8050773906
3 changed files with 115 additions and 12 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -11,16 +11,46 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"db_postgres\n",
"db_redis\n",
"\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 1/0\n",
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n",
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n",
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n",
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n",
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.3s \u001b[0m\n",
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.3s \u001b[0m\n",
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.4s \u001b[0m\n",
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.4s \u001b[0m\n",
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n",
" \u001b[32m✔\u001b[0m Container db_redis \u001b[32mStarted\u001b[0m \u001b[34m0.4s \u001b[0m\n",
" \u001b[32m✔\u001b[0m Container db_postgres \u001b[32mStarted\u001b[0m \u001b[34m0.4s \u001b[0m\n",
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
"\u001b[?25h"
]
}
],
"source": [
"!docker rm -f db_postgres db_redis; docker compose -f docker/docker-compose.yml up -d ; sleep 5"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -137,7 +167,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -189,9 +219,39 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\t urls\n",
"[]\n",
"\t urls_duplicate\n",
"[]\n",
"\t urls_source_search\n",
"[]\n",
"\t source\n",
"[]\n",
"\t search\n",
"[(1,\n",
" 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n",
" 'rss_feed'),\n",
" (2, 'www.breitbart.com', 'url_host'),\n",
" (3, 'child abuse', 'keyword_search')]\n",
"\t status_pattern_matching\n",
"[('.*youtube\\\\.com/.*', 50, 'invalid'),\n",
" ('.*tiktok\\\\.com/.*', 50, 'invalid'),\n",
" ('.*twitter\\\\.com/.*', 50, 'invalid'),\n",
" ('.*reddit\\\\.com/.*', 50, 'invalid'),\n",
" ('.*libreddit\\\\.de/.*', 50, 'invalid'),\n",
" ('.*radio\\\\.foxnews\\\\.com/.*', 50, 'invalid')]\n",
"\t url_content\n",
"[]\n"
]
}
],
"source": [
"# Connect to an existing database\n",
"with psycopg.connect(connection_info) as conn:\n",
@@ -208,9 +268,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(1,\n",
" 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n",
" 'rss_feed'),\n",
" (2, 'www.breitbart.com', 'url_host'),\n",
" (3, 'child abuse', 'keyword_search')]\n"
]
}
],
"source": [
"# Connect to an existing database\n",
"with psycopg.connect(connection_info) as conn:\n",
@@ -221,9 +293,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
}
],
"source": [
"# Connect to an existing database\n",
"with psycopg.connect(connection_info) as conn:\n",