UrlSourceSearch model update, admin panel registration of objects
This commit is contained in:
102
1-DB.ipynb
102
1-DB.ipynb
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -11,16 +11,46 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"db_postgres\n",
|
||||
"db_redis\n",
|
||||
"\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 1/0\n",
|
||||
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n",
|
||||
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
|
||||
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n",
|
||||
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
|
||||
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.3s \u001b[0m\n",
|
||||
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.3s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
|
||||
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.4s \u001b[0m\n",
|
||||
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.4s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container db_redis \u001b[32mStarted\u001b[0m \u001b[34m0.4s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container db_postgres \u001b[32mStarted\u001b[0m \u001b[34m0.4s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!docker rm -f db_postgres db_redis; docker compose -f docker/docker-compose.yml up -d ; sleep 5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -137,7 +167,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -189,9 +219,39 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\t urls\n",
|
||||
"[]\n",
|
||||
"\t urls_duplicate\n",
|
||||
"[]\n",
|
||||
"\t urls_source_search\n",
|
||||
"[]\n",
|
||||
"\t source\n",
|
||||
"[]\n",
|
||||
"\t search\n",
|
||||
"[(1,\n",
|
||||
" 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n",
|
||||
" 'rss_feed'),\n",
|
||||
" (2, 'www.breitbart.com', 'url_host'),\n",
|
||||
" (3, 'child abuse', 'keyword_search')]\n",
|
||||
"\t status_pattern_matching\n",
|
||||
"[('.*youtube\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*tiktok\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*twitter\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*reddit\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*libreddit\\\\.de/.*', 50, 'invalid'),\n",
|
||||
" ('.*radio\\\\.foxnews\\\\.com/.*', 50, 'invalid')]\n",
|
||||
"\t url_content\n",
|
||||
"[]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Connect to an existing database\n",
|
||||
"with psycopg.connect(connection_info) as conn:\n",
|
||||
@@ -208,9 +268,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[(1,\n",
|
||||
" 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n",
|
||||
" 'rss_feed'),\n",
|
||||
" (2, 'www.breitbart.com', 'url_host'),\n",
|
||||
" (3, 'child abuse', 'keyword_search')]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Connect to an existing database\n",
|
||||
"with psycopg.connect(connection_info) as conn:\n",
|
||||
@@ -221,9 +293,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Connect to an existing database\n",
|
||||
"with psycopg.connect(connection_info) as conn:\n",
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
from django.contrib import admin
|
||||
|
||||
# Register your models here.
|
||||
from .models import Search, Source, StatusPatternMatching, UrlContent, Urls, UrlsDuplicate, UrlsSourceSearch
|
||||
|
||||
admin.site.register(Search)
|
||||
admin.site.register(Source)
|
||||
admin.site.register(StatusPatternMatching)
|
||||
admin.site.register(UrlContent)
|
||||
admin.site.register(Urls)
|
||||
admin.site.register(UrlsDuplicate)
|
||||
admin.site.register(UrlsSourceSearch)
|
||||
|
||||
@@ -16,6 +16,8 @@ class Search(models.Model):
|
||||
managed = False
|
||||
db_table = 'search'
|
||||
|
||||
def __str__(self):
|
||||
return "[{}] {}".format(self.type, self.search)
|
||||
|
||||
class Source(models.Model):
|
||||
id = models.SmallAutoField(primary_key=True)
|
||||
@@ -25,6 +27,8 @@ class Source(models.Model):
|
||||
managed = False
|
||||
db_table = 'source'
|
||||
|
||||
def __str__(self):
|
||||
return self.source
|
||||
|
||||
class StatusPatternMatching(models.Model):
|
||||
pattern = models.TextField(primary_key=True)
|
||||
@@ -35,6 +39,8 @@ class StatusPatternMatching(models.Model):
|
||||
managed = False
|
||||
db_table = 'status_pattern_matching'
|
||||
|
||||
def __str__(self):
|
||||
return "{} -> {} [Priority: {}]".format(self.pattern, self.status, self.priority)
|
||||
|
||||
class UrlContent(models.Model):
|
||||
id_url = models.OneToOneField('Urls', models.DO_NOTHING, db_column='id_url', primary_key=True)
|
||||
@@ -57,7 +63,6 @@ class UrlContent(models.Model):
|
||||
managed = False
|
||||
db_table = 'url_content'
|
||||
|
||||
|
||||
class Urls(models.Model):
|
||||
class STATUS_ENUM(models.TextChoices):
|
||||
RAW = "raw", "Raw"
|
||||
@@ -76,6 +81,9 @@ class Urls(models.Model):
|
||||
db_table = 'urls'
|
||||
ordering = ["-ts_fetch"]
|
||||
|
||||
def __str__(self):
|
||||
return "{} {} {}".format(self.url, self.ts_fetch, self.status)
|
||||
|
||||
|
||||
class UrlsDuplicate(models.Model):
|
||||
id_url_canonical = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url_canonical', primary_key=True) # The composite primary key (id_url_canonical, id_url_duplicated) found, that is not supported. The first column is selected.
|
||||
@@ -86,6 +94,9 @@ class UrlsDuplicate(models.Model):
|
||||
db_table = 'urls_duplicate'
|
||||
unique_together = (('id_url_canonical', 'id_url_duplicated'),)
|
||||
|
||||
def __str__(self):
|
||||
return Urls(id=self.id_url_duplicated), Urls(id=self.id_url_canonical)
|
||||
|
||||
|
||||
class UrlsSourceSearch(models.Model):
|
||||
id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True) # The composite primary key (id_url, id_source, id_search) found, that is not supported. The first column is selected.
|
||||
@@ -96,3 +107,6 @@ class UrlsSourceSearch(models.Model):
|
||||
managed = False
|
||||
db_table = 'urls_source_search'
|
||||
unique_together = (('id_url', 'id_source', 'id_search'),)
|
||||
|
||||
def __str__(self):
|
||||
return Urls(id=self.id_url), Source(id=self.id_source), Search(id=self.id_search)
|
||||
|
||||
Reference in New Issue
Block a user