Refactoring fetcher, working feeds and raw url writer
This commit is contained in:
11
1-DB.ipynb
11
1-DB.ipynb
@@ -118,14 +118,23 @@
|
||||
" title TEXT,\n",
|
||||
" description TEXT,\n",
|
||||
" content TEXT,\n",
|
||||
" valid_content BOOLEAN,\n",
|
||||
" language CHAR(2), -- ISO 639-1 Code\n",
|
||||
" keywords TEXT[],\n",
|
||||
" tags TEXT[],\n",
|
||||
" authors TEXT[],\n",
|
||||
" image_urls TEXT[]\n",
|
||||
" image_main TEXT,\n",
|
||||
" images_url TEXT[],\n",
|
||||
" videos_url TEXT[],\n",
|
||||
" url_host TEXT, -- www.breitbart.com\n",
|
||||
" site_name TEXT -- Breitbart News\n",
|
||||
" );\n",
|
||||
" CREATE INDEX idx_tags ON URL_CONTENT USING GIN(tags);\n",
|
||||
" CREATE INDEX idx_authors ON URL_CONTENT USING GIN(authors);\n",
|
||||
" CREATE INDEX idx_date_published ON URL_CONTENT (date_published);\n",
|
||||
" CREATE INDEX idx_valid_content ON URL_CONTENT (valid_content);\n",
|
||||
" CREATE INDEX idx_language ON URL_CONTENT (language);\n",
|
||||
" CREATE INDEX idx_url_host ON URL_CONTENT (url_host);\n",
|
||||
" \"\"\")\n",
|
||||
"\n",
|
||||
" # Feeds\n",
|
||||
|
||||
Reference in New Issue
Block a user