From f84c7729f8db487633d6c4c8d053a178c652c666 Mon Sep 17 00:00:00 2001
From: Luciano Gervasoni <luciano.gervasoni@3dlook.me>
Date: Thu, 20 Mar 2025 17:19:52 +0100
Subject: [PATCH] Urls source search, cleaning code

---
 1-DB.ipynb                                    | 104 +++--------------
 A_Development.ipynb                           |  60 ++--------
 app_urls/README.md                            |  12 +-
 ...0003_urlssourcesearch_delete_urlssource.py |  27 +++++
 app_urls/api/models.py                        |   9 +-
 app_urls/api/src/db_utils.py                  | 110 +++++-------------
 app_urls/api/src/fetch_feed.py                |  21 ++--
 app_urls/api/src/fetch_parser.py              |  28 ++---
 app_urls/api/src/fetch_search.py              |  44 ++++---
 app_urls/api/src/fetch_search_utils.py        |  72 ++++++++----
 app_urls/api/src/url_processor.py             |  32 +++++
 app_urls/api/tasks.py                         |   5 +
 app_urls/core/settings.py                     |  17 ++-
 13 files changed, 241 insertions(+), 300 deletions(-)
 create mode 100644 app_urls/api/migrations/0003_urlssourcesearch_delete_urlssource.py

diff --git a/1-DB.ipynb b/1-DB.ipynb
index 0430949..cc5bf8b 100644
--- a/1-DB.ipynb
+++ b/1-DB.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -11,64 +11,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "db_postgres\n",
-      "db_redis\n",
-      "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 0/0\n",
-      " ⠋ Container db_redis     \u001b[39mCreating\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
-      " ⠋ Container db_postgres  \u001b[39mCreating\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/2\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.3s \u001b[0m\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.3s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.4s \u001b[0m\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.4s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.5s \u001b[0m\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.5s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container db_redis     \u001b[32mStarted\u001b[0m                                          \u001b[34m0.5s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container db_postgres  \u001b[32mStarted\u001b[0m                                          \u001b[34m0.5s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!docker rm -f db_postgres db_redis; docker compose -f docker/docker-compose.yml up -d ; sleep 5"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "UndefinedTable",
-     "evalue": "relation \"urls_source\" does not exist",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mUndefinedTable\u001b[39m                            Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 19\u001b[39m\n\u001b[32m     15\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m conn.cursor() \u001b[38;5;28;01mas\u001b[39;00m cur:\n\u001b[32m     16\u001b[39m     \u001b[38;5;66;03m# Autocommit at end of transaction (Atomic insert of URLs and sources)\u001b[39;00m\n\u001b[32m     17\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m conn.transaction() \u001b[38;5;28;01mas\u001b[39;00m tx:\n\u001b[32m     18\u001b[39m         \u001b[38;5;66;03m# Create URLs table\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m19\u001b[39m         c = \u001b[43mcur\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\"\"\u001b[39;49m\n\u001b[32m     20\u001b[39m \u001b[33;43m            CREATE TYPE URL_STATUS AS ENUM (\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mraw\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43merror\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mvalid\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43munknown\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43minvalid\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mduplicate\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m);\u001b[39;49m\n\u001b[32m     21\u001b[39m \n\u001b[32m     22\u001b[39m \u001b[33;43m            CREATE TABLE URLS (\u001b[39;49m\n\u001b[32m     23\u001b[39m \u001b[33;43m                id SERIAL PRIMARY KEY,\u001b[39;49m\n\u001b[32m     24\u001b[39m \u001b[33;43m                url TEXT NOT NULL UNIQUE,\u001b[39;49m\n\u001b[32m     25\u001b[39m \u001b[33;43m                ts_fetch TIMESTAMPTZ NOT NULL DEFAULT NOW(),\u001b[39;49m\n\u001b[32m     26\u001b[39m \u001b[33;43m                status URL_STATUS NOT NULL DEFAULT \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mraw\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m -- ,\u001b[39;49m\n\u001b[32m     27\u001b[39m \u001b[33;43m                -- status_wendy WENDY_STATUS DEFAULT NULL,\u001b[39;49m\n\u001b[32m     28\u001b[39m \u001b[33;43m                -- ts_wendy TIMESTAMPTZ DEFAULT NULL\u001b[39;49m\n\u001b[32m     29\u001b[39m \u001b[33;43m            );\u001b[39;49m\n\u001b[32m     30\u001b[39m \u001b[33;43m            CREATE INDEX idx_urls_status ON urls(status);\u001b[39;49m\n\u001b[32m     31\u001b[39m \u001b[33;43m            CREATE INDEX idx_urls_ts_fetch ON urls(ts_fetch);\u001b[39;49m\n\u001b[32m     32\u001b[39m \n\u001b[32m     33\u001b[39m \u001b[33;43m            CREATE TABLE URLS_DUPLICATE (\u001b[39;49m\n\u001b[32m     34\u001b[39m \u001b[33;43m                id_url_canonical INTEGER REFERENCES URLS(id),\u001b[39;49m\n\u001b[32m     35\u001b[39m \u001b[33;43m                id_url_duplicated INTEGER REFERENCES URLS(id),\u001b[39;49m\n\u001b[32m     36\u001b[39m \u001b[33;43m                PRIMARY KEY (id_url_canonical, id_url_duplicated)\u001b[39;49m\n\u001b[32m     37\u001b[39m \u001b[33;43m            );\u001b[39;49m\n\u001b[32m     38\u001b[39m \u001b[33;43m            \u001b[39;49m\n\u001b[32m     39\u001b[39m \u001b[33;43m            CREATE TYPE SEARCH_TYPE AS ENUM (\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mrss_feed\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mkeyword_search\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43murl_host\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m);\u001b[39;49m\n\u001b[32m     40\u001b[39m \u001b[33;43m            CREATE TABLE SEARCH (\u001b[39;49m\n\u001b[32m     41\u001b[39m \u001b[33;43m                id SMALLSERIAL PRIMARY KEY,\u001b[39;49m\n\u001b[32m     42\u001b[39m \u001b[33;43m                search TEXT NOT NULL UNIQUE,\u001b[39;49m\n\u001b[32m     43\u001b[39m \u001b[33;43m                type SEARCH_TYPE NOT NULL\u001b[39;49m\n\u001b[32m     44\u001b[39m \u001b[33;43m            );\u001b[39;49m\n\u001b[32m     45\u001b[39m \u001b[33;43m            CREATE INDEX idx_search_type ON SEARCH(type);\u001b[39;49m\n\u001b[32m     46\u001b[39m \u001b[33;43m            \u001b[39;49m\n\u001b[32m     47\u001b[39m \u001b[33;43m            CREATE TABLE SOURCE (\u001b[39;49m\n\u001b[32m     48\u001b[39m \u001b[33;43m                id SMALLSERIAL PRIMARY KEY,\u001b[39;49m\n\u001b[32m     49\u001b[39m \u001b[33;43m                source TEXT NOT NULL UNIQUE\u001b[39;49m\n\u001b[32m     50\u001b[39m \u001b[33;43m            );\u001b[39;49m\n\u001b[32m     51\u001b[39m \u001b[33;43m                        \u001b[39;49m\n\u001b[32m     52\u001b[39m \u001b[33;43m            CREATE TABLE URLS_SOURCE_SEARCH (\u001b[39;49m\n\u001b[32m     53\u001b[39m \u001b[33;43m                id_url INTEGER REFERENCES URLS(id),\u001b[39;49m\n\u001b[32m     54\u001b[39m \u001b[33;43m                id_source SMALLINT REFERENCES SOURCE(id) ON UPDATE CASCADE ON DELETE RESTRICT,\u001b[39;49m\n\u001b[32m     55\u001b[39m \u001b[33;43m                id_search SMALLINT REFERENCES SEARCH(id) ON UPDATE CASCADE ON DELETE RESTRICT,\u001b[39;49m\n\u001b[32m     56\u001b[39m \u001b[33;43m                PRIMARY KEY(id_url, id_source)\u001b[39;49m\n\u001b[32m     57\u001b[39m \u001b[33;43m            );\u001b[39;49m\n\u001b[32m     58\u001b[39m \u001b[33;43m            CREATE INDEX idx_source ON urls_source(id_source);\u001b[39;49m\n\u001b[32m     59\u001b[39m \n\u001b[32m     60\u001b[39m \u001b[33;43m            CREATE TABLE STATUS_PATTERN_MATCHING (\u001b[39;49m\n\u001b[32m     61\u001b[39m \u001b[33;43m                pattern TEXT PRIMARY KEY,\u001b[39;49m\n\u001b[32m     62\u001b[39m \u001b[33;43m                priority SMALLINT NOT NULL,\u001b[39;49m\n\u001b[32m     63\u001b[39m \u001b[33;43m                status URL_STATUS NOT NULL\u001b[39;49m\n\u001b[32m     64\u001b[39m \u001b[33;43m            );\u001b[39;49m\n\u001b[32m     65\u001b[39m \u001b[33;43m            \u001b[39;49m\n\u001b[32m     66\u001b[39m \u001b[33;43m            \u001b[39;49m\n\u001b[32m     67\u001b[39m \u001b[33;43m            CREATE TABLE URL_CONTENT (\u001b[39;49m\n\u001b[32m     68\u001b[39m \u001b[33;43m                id_url INTEGER PRIMARY KEY REFERENCES URLS(id),\u001b[39;49m\n\u001b[32m     69\u001b[39m \u001b[33;43m                date_published TIMESTAMPTZ DEFAULT NOW(),\u001b[39;49m\n\u001b[32m     70\u001b[39m \u001b[33;43m                title TEXT,\u001b[39;49m\n\u001b[32m     71\u001b[39m \u001b[33;43m                description TEXT,\u001b[39;49m\n\u001b[32m     72\u001b[39m \u001b[33;43m                content TEXT,\u001b[39;49m\n\u001b[32m     73\u001b[39m \u001b[33;43m                valid_content BOOLEAN,\u001b[39;49m\n\u001b[32m     74\u001b[39m \u001b[33;43m                language CHAR(2), -- ISO 639-1 Code\u001b[39;49m\n\u001b[32m     75\u001b[39m \u001b[33;43m                keywords TEXT[],\u001b[39;49m\n\u001b[32m     76\u001b[39m \u001b[33;43m                tags TEXT[],\u001b[39;49m\n\u001b[32m     77\u001b[39m \u001b[33;43m                authors TEXT[],\u001b[39;49m\n\u001b[32m     78\u001b[39m \u001b[33;43m                image_main_url TEXT,\u001b[39;49m\n\u001b[32m     79\u001b[39m \u001b[33;43m                images_url TEXT[],\u001b[39;49m\n\u001b[32m     80\u001b[39m \u001b[33;43m                videos_url TEXT[],\u001b[39;49m\n\u001b[32m     81\u001b[39m \u001b[33;43m                url_host TEXT,    -- www.breitbart.com\u001b[39;49m\n\u001b[32m     82\u001b[39m \u001b[33;43m                site_name TEXT    -- Breitbart News\u001b[39;49m\n\u001b[32m     83\u001b[39m \u001b[33;43m            );\u001b[39;49m\n\u001b[32m     84\u001b[39m \u001b[33;43m            CREATE INDEX idx_tags ON URL_CONTENT USING GIN(tags);\u001b[39;49m\n\u001b[32m     85\u001b[39m \u001b[33;43m            CREATE INDEX idx_authors ON URL_CONTENT USING GIN(authors);\u001b[39;49m\n\u001b[32m     86\u001b[39m \u001b[33;43m            CREATE INDEX idx_date_published ON URL_CONTENT (date_published);\u001b[39;49m\n\u001b[32m     87\u001b[39m \u001b[33;43m            CREATE INDEX idx_valid_content ON URL_CONTENT (valid_content);\u001b[39;49m\n\u001b[32m     88\u001b[39m \u001b[33;43m            CREATE INDEX idx_language ON URL_CONTENT (language);\u001b[39;49m\n\u001b[32m     89\u001b[39m \u001b[33;43m            CREATE INDEX idx_url_host ON URL_CONTENT (url_host);\u001b[39;49m\n\u001b[32m     90\u001b[39m \u001b[33;43m        \u001b[39;49m\u001b[33;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m     92\u001b[39m         \u001b[38;5;66;03m# Feeds\u001b[39;00m\n\u001b[32m     93\u001b[39m         cur.execute( \u001b[33m\"\u001b[39m\u001b[33mINSERT INTO SEARCH (search, type) VALUES (\u001b[39m\u001b[33m'\u001b[39m\u001b[33mhttps://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC\u001b[39m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m\u001b[33mrss_feed\u001b[39m\u001b[33m'\u001b[39m\u001b[33m);\u001b[39m\u001b[33m\"\u001b[39m )\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda3/envs/matitos/lib/python3.12/site-packages/psycopg/cursor.py:97\u001b[39m, in \u001b[36mCursor.execute\u001b[39m\u001b[34m(self, query, params, prepare, binary)\u001b[39m\n\u001b[32m     93\u001b[39m         \u001b[38;5;28mself\u001b[39m._conn.wait(\n\u001b[32m     94\u001b[39m             \u001b[38;5;28mself\u001b[39m._execute_gen(query, params, prepare=prepare, binary=binary)\n\u001b[32m     95\u001b[39m         )\n\u001b[32m     96\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m e._NO_TRACEBACK \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[32m---> \u001b[39m\u001b[32m97\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m ex.with_traceback(\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m     98\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n",
-      "\u001b[31mUndefinedTable\u001b[39m: relation \"urls_source\" does not exist"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "INSERT_TABLES = True\n",
     "INSERT_SAMPLE_DATA = False\n",
@@ -125,9 +79,10 @@
     "                        id_url INTEGER REFERENCES URLS(id),\n",
     "                        id_source SMALLINT REFERENCES SOURCE(id) ON UPDATE CASCADE ON DELETE RESTRICT,\n",
     "                        id_search SMALLINT REFERENCES SEARCH(id) ON UPDATE CASCADE ON DELETE RESTRICT,\n",
-    "                        PRIMARY KEY(id_url, id_source)\n",
+    "                        PRIMARY KEY(id_url, id_source, id_search)\n",
     "                    );\n",
-    "                    CREATE INDEX idx_source ON urls_source(id_source);\n",
+    "                    CREATE INDEX idx_source ON URLS_SOURCE_SEARCH(id_source);\n",
+    "                    CREATE INDEX idx_search ON URLS_SOURCE_SEARCH(id_search);\n",
     "\n",
     "                    CREATE TABLE STATUS_PATTERN_MATCHING (\n",
     "                        pattern TEXT PRIMARY KEY,\n",
@@ -160,11 +115,12 @@
     "                    CREATE INDEX idx_language ON URL_CONTENT (language);\n",
     "                    CREATE INDEX idx_url_host ON URL_CONTENT (url_host);\n",
     "                \"\"\")\n",
+    "\n",
+    "                ### Default insert values\n",
     "                \n",
     "                # Feeds\n",
     "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC', 'rss_feed');\" )\n",
     "                # Websites of interest\n",
-    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('www.unicef.org', 'url_host');\" )\n",
     "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('www.breitbart.com', 'url_host');\" )\n",
     "                # Search keywords\n",
     "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search');\" )\n",
@@ -207,17 +163,7 @@
     "                    cur.execute(\"INSERT INTO SOURCE (source) values ('news.google.com')\")\n",
     "                    cur.execute(\"INSERT INTO SOURCE (source) values ('qwant.com')\")\n",
     "\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (1, 1)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (2, 1)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (3, 1)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (4, 1)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (5, 1)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (6, 1)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (7, 1)\")\n",
-    "\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (1, 2)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (2, 2)\")\n",
-    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source) values (3, 2)\")\n",
+    "                    cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source, id_search) values (1, 1, 1)\")\n",
     "\n",
     "                    for j in range(5):\n",
     "                        import time\n",
@@ -241,26 +187,6 @@
    "outputs": [],
    "source": []
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Connect to an existing database\n",
-    "with psycopg.connect(connection_info) as conn:\n",
-    "    # Open a cursor to perform database operations\n",
-    "    with conn.cursor() as cur:\n",
-    "        pprint( cur.execute(\"SELECT * FROM SEARCH;\").fetchall() )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -285,7 +211,13 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# Connect to an existing database\n",
+    "with psycopg.connect(connection_info) as conn:\n",
+    "    # Open a cursor to perform database operations\n",
+    "    with conn.cursor() as cur:\n",
+    "        pprint( cur.execute(\"SELECT * FROM SEARCH;\").fetchall() )"
+   ]
   },
   {
    "cell_type": "code",
diff --git a/A_Development.ipynb b/A_Development.ipynb
index fdf01d8..2f88e52 100644
--- a/A_Development.ipynb
+++ b/A_Development.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,25 +14,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Searching Bing                                                                 \n",
-      "                                                                               \r"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "results = engine.search('news: \"child abuse\"', pages=2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -41,18 +32,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Searching Brave                                                                \n",
-      "                                                                               \r"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "query = 'news: child abuse'\n",
     "r = engine.search(query, pages=2)"
@@ -60,20 +42,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'_results': []}"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "r.__dict__"
    ]
@@ -87,20 +58,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "newspaper.exceptions.ArticleBinaryDataException"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import newspaper\n",
     "newspaper.ArticleBinaryDataException"
diff --git a/app_urls/README.md b/app_urls/README.md
index 477ee31..9edfd16 100644
--- a/app_urls/README.md
+++ b/app_urls/README.md
@@ -2,7 +2,7 @@
 ```
 conda create -n matitos_urls python=3.12
 conda activate matitos_urls
-pip install django psycopg[binary] django-rq
+pip install django psycopg[binary] django-redis django-rq
 pip install feedparser python-dateutil newspaper4k lxml[html_clean] googlenewsdecoder gnews duckduckgo_search GoogleNews
 ```
 
@@ -77,8 +77,10 @@ DB_PORT=${DB_NAME:-5432}
 REDIS_HOST=${REDIS_HOST:-localhost}
 REDIS_PORT=${REDIS_PORT:-6379}
 
-# Default RQ queue timeout
+# Default RQ job timeout
 RQ_DEFAULT_TIMEOUT=${REDIS_PORT:-900}
+# Default RQ job queue TTL
+RQ_DEFAULT_RESULT_TTL=${RQ_DEFAULT_RESULT_TTL:-3600}
 ```
 
 * Django DB
@@ -94,9 +96,9 @@ python manage.py makemigrations api; python manage.py migrate --fake-initial
 # Server
 python manage.py runserver
 
-# Worker
-python manage.py rqworker default
-while true; do python manage.py rqworker default --burst -v 0; sleep 5; done
+# Workers
+# python manage.py rqworker high default low
+python manage.py rqworker high default low
 
 # Visualize DB
 http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id
diff --git a/app_urls/api/migrations/0003_urlssourcesearch_delete_urlssource.py b/app_urls/api/migrations/0003_urlssourcesearch_delete_urlssource.py
new file mode 100644
index 0000000..94c990a
--- /dev/null
+++ b/app_urls/api/migrations/0003_urlssourcesearch_delete_urlssource.py
@@ -0,0 +1,27 @@
+# Generated by Django 4.2.20 on 2025-03-20 16:12
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('api', '0002_delete_feed_delete_websiteofinterest_and_more'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='UrlsSourceSearch',
+            fields=[
+                ('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='api.urls')),
+            ],
+            options={
+                'db_table': 'urls_source_search',
+                'managed': False,
+            },
+        ),
+        migrations.DeleteModel(
+            name='UrlsSource',
+        ),
+    ]
diff --git a/app_urls/api/models.py b/app_urls/api/models.py
index 8459cbc..8e9a048 100644
--- a/app_urls/api/models.py
+++ b/app_urls/api/models.py
@@ -87,11 +87,12 @@ class UrlsDuplicate(models.Model):
         unique_together = (('id_url_canonical', 'id_url_duplicated'),)
 
 
-class UrlsSource(models.Model):
-    id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)  # The composite primary key (id_url, id_source) found, that is not supported. The first column is selected.
+class UrlsSourceSearch(models.Model):
+    id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)  # The composite primary key (id_url, id_source, id_search) found, that is not supported. The first column is selected.
     id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
+    id_search = models.ForeignKey(Search, models.DO_NOTHING, db_column='id_search')
 
     class Meta:
         managed = False
-        db_table = 'urls_source'
-        unique_together = (('id_url', 'id_source'),)
\ No newline at end of file
+        db_table = 'urls_source_search'
+        unique_together = (('id_url', 'id_source', 'id_search'),)
diff --git a/app_urls/api/src/db_utils.py b/app_urls/api/src/db_utils.py
index 634d9c1..6ea318e 100644
--- a/app_urls/api/src/db_utils.py
+++ b/app_urls/api/src/db_utils.py
@@ -1,11 +1,9 @@
-from ..models import Urls, UrlContent, UrlsSource, UrlsDuplicate, Source, StatusPatternMatching
+from ..models import Urls, UrlContent, UrlsSourceSearch, UrlsDuplicate, StatusPatternMatching, Source, Search
 from django.db.models import Q
-from .url_processor import process_url
 from django.core.cache import cache
 from django.db import IntegrityError
-import hashlib
+from .url_processor import process_url, get_with_protocol
 import re
-import time
 import traceback
 from .logger import get_logger
 logger = get_logger()
@@ -19,61 +17,32 @@ class DB_Handler():
         # URL host slowdown
         self.url_host_slowdown_seconds = 5
 
-    def _get_safe_cache_key(self, raw_key):
-        """Generate a safe cache key using an MD5 hash"""
-        return hashlib.md5(raw_key.encode()).hexdigest()
-
-    def _cache_key(self, cache_key, hash_encode, cache_timeout):
-        if (hash_encode):
-            cache.set(self._get_safe_cache_key(cache_key), True, timeout=cache_timeout)
-        else:
-            cache.set(cache_key, True, timeout=cache_timeout)
-
-    def _is_cached_key(self, cache_key, hash_encoded):
-        # Returns True if cached
-        if (hash_encoded):
-            return cache.get(self._get_safe_cache_key(cache_key)) is not None
-        else:
-            return cache.get(cache_key) is not None
-
-    def _clean_protocol(self, url):
-        # http:// -> https://
-        url = url.replace("http://", "https://")
-        # "" -> https://
-        if not (url.startswith("https://")):
-            url = "https://" + url
-        return url
-
-    def insert_raw_urls(self, urls, source):        
+    def insert_raw_urls(self, urls, obj_source, obj_search):        
         try:
             logger.debug("Inserting raw URLs")
             # Empty?
             if (len(urls) == 0):
-                logger.debug("Empty batch of urls (not writing to DB) for source: {}".format(source))
+                logger.debug("Empty batch of urls (not writing to DB) for source-search: {} - {}".format(obj_source.source, obj_search.search))
                 return
-            
             # Default protocol https://
-            urls_clean = [self._clean_protocol(url) for url in urls]
-            
-            # Get the source (create if not exists)
-            source_obj, created = Source.objects.get_or_create(source=source)
+            urls_clean = [get_with_protocol(url) for url in urls]
 
             urls_to_insert = []
             # Per URL
             for url in urls_clean:
 
                 ### Already processed URL?
-                if (self._is_cached_key(url, hash_encoded=True)):
+                if (cache.get("insert_{}".format(url)) is not None):
                     logger.debug("Already cached URL: {}".format(url))
 
-                    if (self._is_cached_key("{}{}".format(source, url), hash_encoded=True)):
-                        logger.debug("Already cached (source, URL): {} {}".format(source, url))
+                    if (cache.get("insert_{}{}{}".format(url, obj_source.source, obj_search.search)) is not None):
+                        logger.debug("Already cached (URL, source, search): {} {} {}".format(url, obj_source.source, obj_search.search))
                     else:
-                        ### Insert (URL_id, source_id), since not cached
+                        ### Insert (URL_id, source_id, search_id), since not cached
                         # Get URL ID (should already be created)
-                        url_obj, created = Urls.objects.get_or_create(url=url)
+                        obj_url, created = Urls.objects.get_or_create(url=url)
                         # Create (id_source, id_url) (shouldn't exist)
-                        UrlsSource.objects.get_or_create(id_source=source_obj, id_url=url_obj)
+                        UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
                 else:
                     # Add object to insert
                     # url_object_to_insert.append(Urls(url=url))
@@ -85,16 +54,20 @@ class DB_Handler():
                 # URLs (ignore_conflicts=False to return IDs)
                 bulk_created_urls = Urls.objects.bulk_create([Urls(url=url) for url in urls_to_insert], ignore_conflicts=False)
                 # (URL_id, source_id)
-                UrlsSource.objects.bulk_create([UrlsSource(id_source=source_obj, id_url=url_obj) for url_obj in bulk_created_urls], ignore_conflicts=True)
+                UrlsSourceSearch.objects.bulk_create([UrlsSourceSearch(id_url=obj_url, id_source=obj_source, id_search=obj_search) for obj_url in bulk_created_urls], ignore_conflicts=True)
             except IntegrityError as e:
                 ### Fallback to one-by-one insert
                 logger.debug("bulk_create exception while inserting raw URLs (fails if duplicated URL), falling back to non-bulk method")
                 # One by one
                 for url in urls_to_insert:
                     # URL
-                    url_obj, created = Urls.objects.get_or_create(url=url)
-                    # (URL, source)
-                    UrlsSource.objects.get_or_create(id_source=source_obj, id_url=url_obj)
+                    obj_url, created = Urls.objects.get_or_create(url=url)
+                    if (created):
+                        logger.info("CREATED: {}".format(obj_url.url))
+                    else:
+                        logger.info("NOT CREATED: {}".format(obj_url.url))
+                    # (URL, source, search)
+                    UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
             except Exception as e:
                 logger.warning("bulk_create unknown exception while inserting raw URLs: {}\n{}".format(e, traceback.format_exc()))
                 # Avoid caching due to error on insertion
@@ -102,37 +75,14 @@ class DB_Handler():
 
             # Insert or update cache
             for url in urls_clean:
-                # Hash encode URLs for special characters
-                self._cache_key(url, hash_encode=True, cache_timeout=self._cache_timeout_insert_url)
-                self._cache_key("{}{}".format(source, url), hash_encode=True, cache_timeout=self._cache_timeout_insert_url)
+                cache.set("insert_{}".format(url), True, timeout=self._cache_timeout_insert_url)
+                cache.set("insert_{}{}{}".format(url, obj_source.source, obj_search.search), True, timeout=self._cache_timeout_insert_url)
 
             logger.info("Inserted #{} raw URLs".format(len(urls_to_insert)))
 
         except Exception as e:
             logger.warning("Exception inserting raw URLs: {}\n{}".format(e, traceback.format_exc()))
 
-    def _get_url_host(self, url):
-        # URL no protocol, first substring before '/'
-        url_host = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return url_host
-    
-    def _url_host_slowdown(self, url, url_host_slowdown_seconds):
-        ### Avoid (frequent) too many requests to the same URL host
-        # Get URL host
-        url_host = self._get_url_host(url)
-        # Recently processed URL host? -> Slow down required
-        last_cached_timestamp = cache.get("processed_{}".format(url_host), None)
-        if last_cached_timestamp:
-            # Get time since last processed URL host (in seconds)
-            time_since_last_processed = time.time() - last_cached_timestamp
-            # Amount of time required to sleep?
-            slowdown_required = max(0, url_host_slowdown_seconds - time_since_last_processed)
-            logger.debug("Slow down (sleeping {:.2f}) for URL host {}".format(slowdown_required, url_host))
-            # Sleep
-            time.sleep(slowdown_required)
-        # About to process URL host, cache time
-        cache.set("processed_{}".format(url_host), time.time(), timeout=60*5) # Expire after 5 minutes
-
     def _process_single_url(self, obj_url, status_pattern_match, raise_exception_on_error):
         
         def set_status(obj_url, status):
@@ -158,8 +108,6 @@ class DB_Handler():
         
         ##### Process URL
         try:
-            # Slow down if required to avoid too many requests error
-            self._url_host_slowdown(obj_url.url, self.url_host_slowdown_seconds)
             # Get data
             dict_url_data = process_url(obj_url.url)
             # Not none or handle as exception
@@ -190,17 +138,17 @@ class DB_Handler():
             
             # Get or create URL with canonical form
             obj_url_canonical, created = Urls.objects.get_or_create(url=dict_url_data.get("url_canonical"))
-            # Get the sources id associated to obj_url.id
-            url_sources = UrlsSource.objects.filter(id_url=obj_url)
-            for url_source_obj in url_sources:
+            # Get the source-search IDs associated to obj_url.id
+            list_url_source_search = UrlsSourceSearch.objects.fiter(id_url=obj_url)
+            for obj_url_source_search in list_url_source_search:
                 # Associate same sources to url_canonical (it might already exist)
-                obj_urls_source, created = UrlsSource.objects.get_or_create(id_source=url_source_obj.id_source, id_url=obj_url_canonical)
+                UrlsSourceSearch.objects.get_or_create(id_url=obj_url_canonical, id_source=obj_url_source_search.id_source, id_search=obj_url_source_search.id_search)
             
             # URLs duplciate association
-            obj_urls_duplicate, created = UrlsDuplicate.objects.get_or_create(id_url_canonical=obj_url_canonical, id_url_duplicated=obj_url)
+            UrlsDuplicate.objects.get_or_create(id_url_canonical=obj_url_canonical, id_url_duplicated=obj_url)
 
             # TODO: return obj_url_canonical so as to directly process the recently inserted URL
-            # Whever this function is called, add:
+            # Wherever this function is called, add:
             # self._process_single_url(obj_url_canonical, status_pattern_match, raise_exception_on_error)
 
             # Next URL
@@ -281,7 +229,7 @@ class DB_Handler():
                 # Per URL
                 for obj_url in error_urls:
                     # URL ID cached? -> Tried to process recently already, skip
-                    if (self._is_cached_key("error_{}".format(obj_url.id), hash_encoded=False)):
+                    if (cache.get("error_{}".format(obj_url.id)) is not None):
                         logger.debug("Already cached URL ID: {}".format(obj_url.id))
                         num_urls_skipped += 1
                         continue
@@ -292,7 +240,7 @@ class DB_Handler():
                         num_urls_processed += 1
                     except Exception as e:
                         # Error, cache to avoid re-processing for X time
-                        self._cache_key("error_{}".format(obj_url.id), hash_encode=False, cache_timeout=self._cache_timeout_error_url)
+                        cache.set("error_{}".format(obj_url.id), True, timeout=self._cache_timeout_insert_url)
                         num_urls_skipped += 1
                 
                 # Get following batch of URLs, status='error'
diff --git a/app_urls/api/src/fetch_feed.py b/app_urls/api/src/fetch_feed.py
index 8d7389b..bc2f809 100644
--- a/app_urls/api/src/fetch_feed.py
+++ b/app_urls/api/src/fetch_feed.py
@@ -1,5 +1,5 @@
 from .db_utils import DB_Handler
-from ..models import Search
+from ..models import Search, Source
 import feedparser
 import dateutil
 import traceback
@@ -14,16 +14,19 @@ class FetchFeeds():
         try:
             logger.debug("Starting FetchFeeds.run()")
             
-            # Get feeds
-            list_url_feeds = list(Search.objects.filter(type=Search.TYPE_ENUM.RSS_FEED).values_list('search', flat=True))
-            logger.debug("Fetching from feeds: {}".format(list_url_feeds))
+            # Get source object
+            obj_source, created = Source.objects.get_or_create(source="feeds")
+
+            # Get feeds objects
+            list_obj_search_feeds = Search.objects.filter(type=Search.TYPE_ENUM.RSS_FEED)
+            logger.debug("Fetching from feeds: {}".format([e.search for e in list_obj_search_feeds]))
 
             # Process via RSS feeds
-            for url_feed in list_url_feeds:
+            for obj_search in list_obj_search_feeds:
                 # Initialize
                 urls_fetched, urls_publish_date = [], []
                 # Fetch feeds
-                feeds = feedparser.parse(url_feed)
+                feeds = feedparser.parse(obj_search.search)
                 # Parse
                 for f in feeds.get("entries", []):
                     # Get URL
@@ -41,10 +44,8 @@ class FetchFeeds():
                         urls_publish_date.append(publish_date_parsed)
                         # URL
                         urls_fetched.append(url)
-
-                # URL fetching source
-                source = "feed {}".format(url_feed)
+                
                 # Write to DB
-                DB_Handler().insert_raw_urls(urls_fetched, source)
+                DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
         except Exception as e:
             logger.warning("Exception in FetchFeeds.run(): {}\n{}".format(e, traceback.format_exc()))
diff --git a/app_urls/api/src/fetch_parser.py b/app_urls/api/src/fetch_parser.py
index cea8580..04398e2 100644
--- a/app_urls/api/src/fetch_parser.py
+++ b/app_urls/api/src/fetch_parser.py
@@ -1,5 +1,6 @@
 from .db_utils import DB_Handler
-from ..models import Search
+from ..models import Search, Source
+from .url_processor import get_with_protocol, url_host_slowdown
 import newspaper
 import traceback
 from .logger import get_logger
@@ -13,27 +14,26 @@ class FetchParser():
         try:
             logger.debug("Starting FetchParser.run() for {}")
 
+            # Get source object
+            obj_source, created = Source.objects.get_or_create(source="newspaper4k")
             # Get URL hosts
-            list_url_host = list(Search.objects.filter(type=Search.TYPE_ENUM.URL_HOST).values_list('search', flat=True))
-            logger.debug("Fetching news by parsing URL hosts: {}".format(list_url_host))
+            list_url_host = Search.objects.filter(type=Search.TYPE_ENUM.URL_HOST)
+            logger.debug("Fetching news by parsing URL hosts: {}".format([e.search for e in list_url_host]))
 
             # Process newspaper4k build method
-            for url_host_feed in list_url_host:
+            for obj_search in list_url_host:
                 # Protocol
-                if not (url_host_feed.startswith("http")):
-                    url_host_feed_formatted = "https://" + url_host_feed
-                else:
-                    url_host_feed_formatted = url_host_feed
-
-                logger.debug("Fetching newspaper4k parsing based on URL: {}".format(url_host_feed_formatted))
+                url_host_protocol = get_with_protocol(obj_search.search)
+                logger.debug("Fetching newspaper4k parsing based on URL: {}".format(url_host_protocol))
+                
+                # Make sure no requests made for the last X seconds
+                url_host_slowdown(url_host_protocol, url_host_slowdown_seconds=5)
                 # Source object
-                url_host_built = newspaper.build(url_host_feed_formatted)
+                url_host_built = newspaper.build(url_host_protocol)
                 # Get articles URL list
                 urls_fetched = url_host_built.article_urls()
 
-                # URL fetching source
-                source = "newspaper4k {}".format(url_host_feed)
                 # Write to DB
-                DB_Handler().insert_raw_urls(urls_fetched, source)
+                DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
         except Exception as e:
             logger.warning("Exception in FetchParser.run(): {}\n{}".format(e, traceback.format_exc()))
diff --git a/app_urls/api/src/fetch_search.py b/app_urls/api/src/fetch_search.py
index 2b9949f..1554c0d 100644
--- a/app_urls/api/src/fetch_search.py
+++ b/app_urls/api/src/fetch_search.py
@@ -1,5 +1,6 @@
 from .db_utils import DB_Handler
-from ..models import Search
+from ..models import Search, Source
+from django.db.models import Q
 import traceback
 import time
 from .fetch_search_utils import search_gnews, search_ddg, search_googlenews_general, search_googlenews_news
@@ -10,54 +11,59 @@ class FetchSearcher():
     def __init__(self) -> None:
         logger.debug("Initializing Fetcher Searcher")
 
+    def _get_source_object(self, source):
+        #  TODO: Cache
+        # self.cached_sources = {}
+        # Get source object
+        obj_source, created = Source.objects.get_or_create(source=source)
+        return obj_source
+
     def run(self):
         try:
             logger.debug("Starting FetchSearcher.run()")
-                        
-            # Get keyword searches of interest
-            list_keyword_search = list(Search.objects.filter(type=Search.TYPE_ENUM.KEYWORD_SEARCH).values_list('search', flat=True))
-            # Get URL host of interest
-            list_url_host = list(Search.objects.filter(type=Search.TYPE_ENUM.URL_HOST).values_list('search', flat=True))
-            
-            # TODO: allintitle: "child abuse"
-            # TODO: intitle: "child abuse"
-            # list_keyword_search + ['allintitle: "{}"'.format(s) for s in list_keyword_search] + ['intitle: "{}"'.format(s) for s in list_keyword_search]
-            # Merge searches
-            list_search = list_keyword_search + ["site:{}".format(u) for u in list_url_host]
-            logger.debug("Fetching from keyword search: {}".format(list_search))
+
+            # Get search objects of interest
+            list_search_obj = Search.objects.filter(Q(type=Search.TYPE_ENUM.URL_HOST) | Q(type=Search.TYPE_ENUM.KEYWORD_SEARCH))
+            logger.debug("Fetching from search: {}".format(["{} ({})".format(e.search, e.type) for e in list_search_obj]))
 
             # Search
-            for keyword_search in list_search:
+            for obj_search in list_search_obj:
                 # TODO: language & country customization
+                # TODO: allintitle: "child abuse"
+                # TODO: intitle: "child abuse"
+                
+                # Search
+                keyword_search = "{}{}".format("site:" if obj_search.type is Search.TYPE_ENUM.URL_HOST else "", obj_search.search)
                 
                 # DDG News
                 time.sleep(5)
                 raw_urls, source = search_ddg(keyword_search, category="news", timelimit="d", max_results=None, region = "wt-wt")
                 # Write to DB
-                DB_Handler().insert_raw_urls(raw_urls, source)
+                DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
 
                 # GNews
                 time.sleep(5)
                 raw_urls, source = search_gnews(keyword_search, language="en", country="US")
                 # Write to DB
-                DB_Handler().insert_raw_urls(raw_urls, source)
+                DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
 
                 # DDG Text
                 time.sleep(5)
                 raw_urls, source = search_ddg(keyword_search, category="text", timelimit="d", max_results=None, region = "wt-wt")
                 # Write to DB
-                DB_Handler().insert_raw_urls(raw_urls, source)
+                DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
                 
                 # GoogleNews news
                 time.sleep(5)
                 raw_urls, source = search_googlenews_news(keyword_search, period="1d", language="en", country="US")
                 # Write to DB
-                DB_Handler().insert_raw_urls(raw_urls, source)
+                DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
+
                 # GoogleNews general
                 time.sleep(5)
                 raw_urls, source = search_googlenews_general(keyword_search, period="1d", language="en", country="US", max_pages=5)
                 # Write to DB
-                DB_Handler().insert_raw_urls(raw_urls, source)
+                DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
 
                 # TODO:
                 # SearxNG
diff --git a/app_urls/api/src/fetch_search_utils.py b/app_urls/api/src/fetch_search_utils.py
index 4bcc6af..e6cea03 100644
--- a/app_urls/api/src/fetch_search_utils.py
+++ b/app_urls/api/src/fetch_search_utils.py
@@ -1,3 +1,4 @@
+from django.core.cache import cache
 import traceback
 import random
 import time
@@ -10,20 +11,31 @@ from duckduckgo_search import DDGS
 from GoogleNews import GoogleNews
 
 ###########################################################################
-def decode_gnews_urls(encoded_urls):
+def decode_gnews_urls(encoded_urls, interval=2):
     # DecodeURLs
     list_decoded_urls = []
     for url in encoded_urls:
-        try:
-            # Decode URL, with interval time to avoid block
-            decoded_url = gnewsdecoder(url, interval=5)
-            # Ok?
-            if decoded_url.get("status"):
-                list_decoded_urls.append(decoded_url["decoded_url"])
-            else:
-                logger.warning("Error decoding news.google.com, URL {}\nMessage: {}".format(url, decoded_url["message"]))
-        except Exception as e:
-            logger.warning("Error decoding news.google.com, URL: {}\n{}".format(url, traceback.format_exc()))
+        # Already cached?
+        decoded_url = cache.get("gnews_decode_{}".format(url))
+        if (decoded_url is not None):
+            logger.debug("Already cached decoded URL: {} -> {}".format(url, decoded_url))
+            # Append decoded URL
+            list_decoded_urls.append(decoded_url)
+        else:
+            try:
+                # Decode URL, with interval time to avoid block
+                decoded_url_dict = gnewsdecoder(url, interval=interval)
+                # Ok?
+                if decoded_url_dict.get("status"):
+                    # Append decoded URL
+                    decoded_url = decoded_url_dict["decoded_url"]
+                    list_decoded_urls.append(decoded_url)
+                    # Cache decoded URL
+                    cache.set("gnews_decode_{}".format(url), decoded_url, timeout=60*60*12)
+                else:
+                    logger.warning("Error decoding news.google.com, URL {}\nMessage: {}".format(url, decoded_url["message"]))
+            except Exception as e:
+                logger.warning("Error decoding news.google.com, URL: {}\n{}".format(url, traceback.format_exc()))
     return list_decoded_urls
 
 ###########################################################################
@@ -33,13 +45,18 @@ def search_gnews(keyword_search, period="1d", language="en", country="US", max_r
     source = "gnews {} {} {}-{} max_results={}".format("news", period, language, country, max_results).replace("None", "").strip()
     logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
 
-    # Get news
-    results_gnews = GNews(language=language, country=country).get_news(keyword_search)
-    # Get list of encoded urls
-    encoded_urls = [e.get("url") for e in results_gnews]
-    # Decode
-    list_decoded_urls = decode_gnews_urls(encoded_urls)
-    return list_decoded_urls, source
+    try:
+        # Get news
+        results_gnews = GNews(language=language, country=country).get_news(keyword_search)
+        # Get list of encoded urls
+        encoded_urls = [e.get("url") for e in results_gnews]
+        # Decode
+        logger.debug("Decoding gnews URLs")
+        urls = decode_gnews_urls(encoded_urls)
+    except Exception as e:
+        logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
+        urls = []
+    return urls, source
 
 ###########################################################################
 
@@ -51,14 +68,18 @@ def search_ddg(keyword_search, category="news", timelimit="d", max_results=None,
     # region="{}-{}".format(langauge, country.lower())
     # timelimit= # Options: d, w, m
     # max_results # max number of results. If None, returns results only from the first response. Defaults to None
-
-    if (category == "news"):
-        news = DDGS().news(keyword_search, region=region, timelimit=timelimit, max_results=max_results)
-        urls = [e.get("url") for e in news]
-    if (category == "text"):
-        news = DDGS().text(keyword_search, region=region, timelimit=timelimit, max_results=max_results)
-        urls = [e.get("href") for e in news]
     
+    try:    
+        if (category == "news"):
+            news = DDGS().news(keyword_search, region=region, timelimit=timelimit, max_results=max_results)
+            urls = [e.get("url") for e in news]
+        if (category == "text"):
+            news = DDGS().text(keyword_search, region=region, timelimit=timelimit, max_results=max_results)
+            urls = [e.get("href") for e in news]
+    except Exception as e:
+        logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
+        urls = []
+
     return urls, source
 ###########################################################################
 
@@ -78,6 +99,7 @@ def search_googlenews_news(keyword_search, period="1d", language="en", country="
         # Fetch
         encoded_urls = googlenews.get_links()
         # Decode
+        logger.debug("Decoding gnews URLs")
         urls = decode_gnews_urls(encoded_urls)
     except Exception as e:
         logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
diff --git a/app_urls/api/src/url_processor.py b/app_urls/api/src/url_processor.py
index 6f1d6c0..a925dde 100644
--- a/app_urls/api/src/url_processor.py
+++ b/app_urls/api/src/url_processor.py
@@ -2,14 +2,46 @@ from django.core.cache import cache
 from .logger import get_logger
 logger = get_logger()
 import newspaper
+import time
 from urllib.parse import unquote
 # pip install langdetect
 #import langdetect
 #langdetect.DetectorFactory.seed = 0
 
+def get_with_protocol(url):
+    # http:// -> https://
+    url = url.replace("http://", "https://")
+    # "" -> https://
+    if not (url.startswith("https://")):
+        url = "https://" + url
+    return url
+
+def get_url_host(url):
+    # URL no protocol, first substring before '/'
+    url_host = url.replace("https://", "").replace("http://", "").split("/")[0]
+    return url_host
+
+def url_host_slowdown(url, url_host_slowdown_seconds):
+    ### Avoid (frequent) too many requests to the same URL host
+    # Get URL host
+    url_host = get_url_host(url)
+    # Recently processed URL host? -> Slow down required
+    last_cached_timestamp = cache.get("process_{}".format(url_host).encode("utf-8"), None)
+    if last_cached_timestamp:
+        # Get time since last processed URL host (in seconds)
+        time_since_last_processed = time.time() - last_cached_timestamp
+        # Amount of time required to sleep?
+        slowdown_required = max(0, url_host_slowdown_seconds - time_since_last_processed)
+        logger.debug("Slow down (sleeping {:.2f}) for URL host {}".format(slowdown_required, url_host))
+        # Sleep
+        time.sleep(slowdown_required)
+    # About to process URL host, cache time
+    cache.set("process_{}".format(url_host).encode("utf-8"), time.time(), timeout=60*5) # Expire after 5 minutes
 
 def process_url(url):
     try:
+        # Slow down if required to avoid too many requests error
+        url_host_slowdown(url, url_host_slowdown_seconds=2)
         # Process
         article = newspaper.article(url)
     except newspaper.ArticleBinaryDataException:
diff --git a/app_urls/api/tasks.py b/app_urls/api/tasks.py
index 0ccea1c..3e4d163 100644
--- a/app_urls/api/tasks.py
+++ b/app_urls/api/tasks.py
@@ -13,6 +13,11 @@ from src.missing_kids_status import MissingKidsStatus
 from .src.logger import get_logger
 logger = get_logger()
 
+@job
+def fetch_feeds():
+    logger.info("Task triggered: {}".format("FetchFeeds"))
+    FetchFeeds().run()
+
 @job
 def background_task(process_type: str):
     logger.info("Task triggered: {}".format(process_type))
diff --git a/app_urls/core/settings.py b/app_urls/core/settings.py
index b94ad48..25720d3 100644
--- a/app_urls/core/settings.py
+++ b/app_urls/core/settings.py
@@ -21,7 +21,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent
 # See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
 
 # SECURITY WARNING: keep the secret key used in production secret!
-SECRET_KEY = 'django-insecure-kc0jj#_=7i$_79p(n5)p3taxvhnq=w*ori-%%iu_a6wye@$(*n'
+SECRET_KEY = 'django-insecure-54mqLbW5NlO8OlVDsT3fcbg3Vf6C8Fgcoj8H0hXv3Pr8bpgqvOuiaeqvGn34sGwt'
 
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = True
@@ -38,7 +38,6 @@ INSTALLED_APPS = [
     'django.contrib.sessions',
     'django.contrib.messages',
     'django.contrib.staticfiles',
-    # 'rest_framework',
     'django_rq',
     'api',
 ]
@@ -93,11 +92,16 @@ DATABASES = {
 
 CACHES = {
     "default": {
-        "BACKEND": "django.core.cache.backends.redis.RedisCache",
+        #"BACKEND": "django.core.cache.backends.redis.RedisCache",
+        "BACKEND": "django_redis.cache.RedisCache",
         "LOCATION": "redis://{}:{}".format(
-                os.environ.get("REDIS_HOST", "localhost"), 
-                os.environ.get("REDIS_PORT", 6379)
-            ),
+            os.environ.get("REDIS_HOST", "localhost"), 
+            os.environ.get("REDIS_PORT", 6379)
+        ),
+        "OPTIONS": {
+            "MEMCACHE_MAX_KEY_LENGTH": 2048,
+            "CLIENT_CLASS": "django_redis.client.DefaultClient",
+        },
     }
 }
 
@@ -107,6 +111,7 @@ RQ_QUEUES = {
         'PORT': os.environ.get("REDIS_PORT", 6379),
         'DB': os.environ.get("REDIS_DB", 0),
         'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 900),
+        'DEFAULT_RESULT_TTL': os.environ.get("RQ_DEFAULT_RESULT_TTL", 3600),
     }
 }