Refactoring fetcher, working feeds and raw url writer

This commit is contained in:
Luciano Gervasoni
2025-03-12 17:56:40 +01:00
parent e124dbc21a
commit 61c31ee9aa
24 changed files with 2085 additions and 194 deletions

View File

@@ -118,14 +118,23 @@
" title TEXT,\n",
" description TEXT,\n",
" content TEXT,\n",
" valid_content BOOLEAN,\n",
" language CHAR(2), -- ISO 639-1 Code\n",
" keywords TEXT[],\n",
" tags TEXT[],\n",
" authors TEXT[],\n",
" image_urls TEXT[]\n",
" image_main TEXT,\n",
" images_url TEXT[],\n",
" videos_url TEXT[],\n",
" url_host TEXT, -- www.breitbart.com\n",
" site_name TEXT -- Breitbart News\n",
" );\n",
" CREATE INDEX idx_tags ON URL_CONTENT USING GIN(tags);\n",
" CREATE INDEX idx_authors ON URL_CONTENT USING GIN(authors);\n",
" CREATE INDEX idx_date_published ON URL_CONTENT (date_published);\n",
" CREATE INDEX idx_valid_content ON URL_CONTENT (valid_content);\n",
" CREATE INDEX idx_language ON URL_CONTENT (language);\n",
" CREATE INDEX idx_url_host ON URL_CONTENT (url_host);\n",
" \"\"\")\n",
"\n",
" # Feeds\n",