Urls web visualization, cleaning obsolete code

2025-03-25 02:51:16 +01:00
parent 0c6b5f1ea4
commit 24b4614049
52 changed files with 371 additions and 3293 deletions
--- a/1-DB.ipynb
+++ b/1-DB.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -11,7 +11,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@@ -20,17 +20,108 @@
     "text": [
      "db_postgres\n",
      "db_redis\n",
-      "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 1/0\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
+      "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 0/0\n",
+      " ⠙ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.1s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠹ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.2s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠸ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.3s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠼ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.4s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠴ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.5s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠦ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.6s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠧ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.7s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠇ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.8s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠏ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.9s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠋ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.0s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠙ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.1s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠹ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.2s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠸ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.3s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠼ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.4s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠴ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.5s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠦ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.6s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠧ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.7s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
+      " ⠇ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m1.8s \u001b[0m\n",
+      "   ⠋ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.0s \u001b[0m\n",
+      "   ⠋ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.0s \u001b[0m\n",
+      "   ⠋ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.0s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/4\n",
+      " ⠏ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m1.9s \u001b[0m\n",
+      "   ⠙ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.1s \u001b[0m\n",
+      "   ⠙ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.1s \u001b[0m\n",
+      "   ⠙ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.1s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/4\n",
+      " ⠋ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m2.0s \u001b[0m\n",
+      "   ⠹ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.2s \u001b[0m\n",
+      "   ⠹ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.2s \u001b[0m\n",
+      "   ⠹ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.2s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/4\n",
+      " ⠙ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m2.1s \u001b[0m\n",
+      "   ⠸ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.3s \u001b[0m\n",
+      "   ⠸ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.3s \u001b[0m\n",
+      "   ⠸ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.3s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/4\n",
+      " ⠹ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m2.2s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete  \u001b[32m\u001b[0m                                          \u001b[34m0.4s \u001b[0m\n",
+      "   ⠼ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.4s \u001b[0m\n",
+      "   ⠼ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.4s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
+      " ⠸ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⠀\u001b[0m] 166.8kB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.3s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   ⠴ 82780b9b6d69 Downloading \u001b[39m 166.8kB/16.38MB\u001b[0m                             \u001b[34m0.5s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
+      " ⠼ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⣤\u001b[0m] 9.833MB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.4s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   ⠦ 82780b9b6d69 Downloading \u001b[39m 9.833MB/16.38MB\u001b[0m                             \u001b[34m0.6s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
+      " ⠴ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⠀\u001b[0m] 163.8kB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.5s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   ⠿ 82780b9b6d69 Extracting \u001b[39m 163.8kB/16.38MB\u001b[0m                              \u001b[34m0.7s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
+      " ⠦ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⣤\u001b[0m] 9.667MB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.6s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   ⠿ 82780b9b6d69 Extracting \u001b[39m 9.667MB/16.38MB\u001b[0m                              \u001b[34m0.8s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 4/4\u001b[0m\n",
+      " \u001b[32m✔\u001b[0m matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⣿\u001b[0m]      0B/0B      Pulled \u001b[32m\u001b[0m                   \u001b[34m2.7s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
+      "   \u001b[32m✔\u001b[0m 82780b9b6d69 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.9s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 0/0\n",
+      " ⠋ Container db_redis     \u001b[39mCreating\u001b[0m                                         \u001b[34m0.0s \u001b[0m\n",
+      " ⠋ Container db_postgres  \u001b[39mCreating\u001b[0m                                         \u001b[34m0.0s \u001b[0m\n",
+      " ⠋ Container dozzle       \u001b[39mCreating\u001b[0m                                         \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
+      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
+      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
+      " ⠿ Container dozzle       \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container db_postgres  \u001b[32mStarted\u001b[0m                                          \u001b[34m0.2s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container db_redis     \u001b[32mStarted\u001b[0m                                          \u001b[34m0.2s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/4\n",
+      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
+      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
+      " ⠿ Container dozzle       \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
+      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 4/4\u001b[0m\n",
+      " \u001b[32m✔\u001b[0m Container db_redis     \u001b[32mStarted\u001b[0m                                          \u001b[34m0.3s \u001b[0m\n",
+      " \u001b[32m✔\u001b[0m Container db_postgres  \u001b[32mStarted\u001b[0m                                          \u001b[34m0.3s \u001b[0m\n",
+      " \u001b[32m✔\u001b[0m Container dozzle       \u001b[32mStarted\u001b[0m                                          \u001b[34m0.3s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h"
     ]
@@ -42,7 +133,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -143,6 +234,7 @@
    "                # Feeds\n",
    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC', 'rss_feed');\" )\n",
    "                # Websites of interest\n",
+    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('www.missingkids.org/poster', 'url_host');\" )\n",
    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('www.breitbart.com', 'url_host');\" )\n",
    "                # Search keywords\n",
    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search');\" )\n",
@@ -159,7 +251,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -211,7 +303,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -260,7 +352,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -285,7 +377,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
--- a/OBSOLETE_app_fetcher/Dev.ipynb
+++ b/OBSOLETE_app_fetcher/Dev.ipynb
@@ -1,46 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "conda create -n matitos_fetcher python=3.12\n",
-    "conda activate matitos_fetcher\n",
-    "conda install -c conda-forge curl\n",
-    "pip install ipykernel \"psycopg[binary]\" git+https://github.com/ranahaani/GNews.git GoogleNews duckduckgo_search newspaper4k numpy beautifulsoup4 requests feedparser pytz redis fastapi uvicorn fastapi-utils lxml[html_clean]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!uvicorn app:app --host 0.0.0.0 --port 5000 --reload"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "matitos_fetcher",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/OBSOLETE_app_fetcher/Dockerfile
+++ b/OBSOLETE_app_fetcher/Dockerfile
@@ -1,17 +0,0 @@
-FROM continuumio/miniconda3:25.1.1-2
-
-# App repository
-COPY . /opt/app/
-
-RUN conda install -c conda-forge curl
-RUN pip install --no-cache-dir --upgrade "psycopg[binary]" git+https://github.com/ranahaani/GNews.git GoogleNews duckduckgo_search newspaper4k numpy beautifulsoup4 requests feedparser pytz redis fastapi uvicorn fastapi-utils lxml[html_clean]
-RUN pip freeze
-# GoogleNews-1.6.10 Pillow-10.1.0 PyYAML-6.0.1 aiofiles-23.2.1 anyio-3.7.1 beautifulsoup4-4.9.3 bs4-0.0.1 click-8.1.7 cssselect-1.2.0 dateparser-1.2.0 dnspython-1.16.0 duckduckgo_search-3.9.8 fastapi-0.104.1 fastapi-utils-0.2.1 feedfinder2-0.0.4 feedparser-6.0.10 filelock-3.13.1 gnews-0.3.6 greenlet-3.0.1 h11-0.14.0 h2-4.1.0 hpack-4.0.0 httpcore-1.0.2 httpx-0.25.2 hyperframe-6.0.1 jieba3k-0.35.1 joblib-1.3.2 lxml-4.9.3 newspaper3k-0.2.8 nltk-3.8.1 numpy-1.26.2 psycopg-3.1.13 psycopg-binary-3.1.13 pydantic-1.10.13 pymongo-3.12.3 python-dateutil-2.8.2 python-dotenv-0.19.2 pytz-2023.3.post1 redis-5.0.1 regex-2023.10.3 requests-2.26.0 requests-file-1.5.1 sgmllib3k-1.0.0 six-1.16.0 sniffio-1.3.0 socksio-1.0.0 soupsieve-2.5 sqlalchemy-1.4.50 starlette-0.27.0 tinysegmenter-0.3 tldextract-5.1.1 typing-extensions-4.8.0 tzlocal-5.2 uvicorn-0.24.0.post1
-
-WORKDIR /opt/app
-
-# https://www.uvicorn.org/settings/#resource-limits
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
-
-# docker build -t fetch_app .
-# docker run --rm --name container_fetch_app fetch_app
--- a/OBSOLETE_app_fetcher/README.md
+++ b/OBSOLETE_app_fetcher/README.md
@@ -1,20 +0,0 @@
-# Fetcher
-
-```
-conda create -n matitos_fetcher python=3.12
-conda activate matitos_fetcher
-conda install -c conda-forge curl
-pip install ipykernel "psycopg[binary]" git+https://github.com/ranahaani/GNews.git GoogleNews duckduckgo_search newspaper4k numpy beautifulsoup4 requests feedparser pytz redis fastapi uvicorn fastapi-utils lxml[html_clean]
-```
-
-
-* Fetcher app
-    - Contains several endpoints to perform a specific fetching type task 
-        - For more details, check in [app.py](app.py) /{fetch_type}
-
-* Build and run
-    - Important: To be deployed with other micro-services, [docker-compose.yml](../docker-compose.yml)
-```
-docker build -t fetch_app .
-docker run --rm --name container_fetch_app fetch_app
-```
--- a/OBSOLETE_app_fetcher/app.py
+++ b/OBSOLETE_app_fetcher/app.py
@@ -1,79 +0,0 @@
-from src.fetch_feed import FetchFeed
-from src.fetch_parser import FetchParser
-from src.fetch_search import FetchSearch
-
-from src.missing_kids_fetch import MissingKidsFetch
-from src.missing_kids_status import MissingKidsStatus
-
-from src.url_status import UpdateErrorURLs
-from src.db_utils import DB_Handler
-
-import src.credentials as cred
-from logging_ import get_logger
-
-from fastapi import FastAPI, BackgroundTasks
-##################################################################################################
-
-logger = get_logger()
-logger.info("Environment: {}".format(cred.ENVIRONMENT))
-
-db_handler = DB_Handler(cred.db_connect_info, cred.redis_connect_info)
-
-app = FastAPI()
-
-@app.get("/")
-def hello_world():
-    return {"message": "Ok"}
-
-@app.get("/{process_type}")
-async def process(background_tasks: BackgroundTasks, process_type: str):
-    # Concurrent job running
-    logger.info("Triggered: {}".format(process_type))
-
-    if (process_type == "fetch_feeds"):
-        task_run = FetchFeed(db_handler).run
-    elif (process_type == "fetch_parser"):
-        task_run = FetchParser(db_handler).run
-    elif (process_type == "search") or (process_type == "search_full"):
-        task_run = FetchSearch(cred.db_connect_info, cred.redis_connect_info, full=True).run
-    elif (process_type == "search_reduced"):
-        task_run = FetchSearch(cred.db_connect_info, cred.redis_connect_info, full=False).run
-
-    # Selenium based
-    elif (process_type == "fetch_missing_kids_reduced"):
-        task_run = MissingKidsFetch(db_handler, num_pages=4).run
-    elif (process_type == "fetch_missing_kids_full"):
-        task_run = MissingKidsFetch(db_handler, num_pages=100000).run
-    
-    elif (process_type == "update_missing_kids_status_reduced"):
-        task_run = MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=50).update_missing_kids_status
-    elif (process_type == "update_missing_kids_status_full"):
-        task_run = MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=None).update_missing_kids_status
-
-    elif (process_type == "update_error_urls"):
-        task_run = UpdateErrorURLs(cred.db_connect_info, cred.redis_connect_info, num_urls=100).update_error_urls_status
-    else:
-        return {"message": "ERROR. Unknown fetcher type!"}
-    
-    # Run task
-    background_tasks.add_task(task_run)
-    # Return message
-    return {"message": "Started {}: Ok".format(process_type)}
-
-"""
-# TODO: Instead of background tasks!
-
-import rq
-import redis
-
-# Redis connection
-redis_conn = redis.Redis(host='localhost', port=6379, db=0)
-queue = rq.Queue(connection=redis_conn)
-
-# ... 
-# Queue the processing task
-dict_args= {"db_handler": db_handler, }
-queue.enqueue(task_run, **dict_args)
-
-# https://python-rq.org/
-"""
--- a/OBSOLETE_app_fetcher/src/db_utils.py
+++ b/OBSOLETE_app_fetcher/src/db_utils.py
@@ -1,502 +0,0 @@
-import psycopg
-import redis
-import traceback
-import random
-import requests
-import json
-import os
-from .url_utils import process_article
-from .logger import get_logger
-logger = get_logger()
-
-# TODO: URL_DB_HANDLER, _get_search_list, _get_url_host, _get_url_host_list, ...
-# The rest, elsewhere
-
-class DB_Handler():
-    def __init__(self, db_connect_info, redis_connect_info):
-        logger.debug("Initializing URL DB writer")
-        self.db_connect_info = db_connect_info
-        self.redis_instance = redis.Redis(host=redis_connect_info.get("host"), port=redis_connect_info.get("port"))
-        self.redis_expiry_seconds = redis_connect_info.get("expiry_seconds", 172800) # Default: 48 hours
-        
-        try:
-            self.redis_instance.ping()
-            logger.debug("Succesfully pinged Redis")
-        except Exception as e:
-            logger.warning("Error trying to ping Redis: {}".format(str(e)))
-
-    def get_urls_count(self, last_minutes_check):
-        #####################
-        ### Get number of URLs within last X minutes
-        #####################
-        try:
-            # Update
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                num_urls = cursor.execute("SELECT COUNT(*) FROM URLS WHERE ts_fetch >= current_timestamp - interval '{} minutes';".format(last_minutes_check)).fetchone()[0]
-        except Exception as e:
-            logger.warning("Error updating URLs status: {}".format(str(e)))
-            num_urls = None
-        return num_urls
-    
-    def _get_url_host_list(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # List of URL host
-                list_url_host = [l[0] for l in conn.execute("SELECT url_host FROM WEBSITE_OF_INTEREST;").fetchall()]
-            # Clean http / https from URLs
-            list_url_host = [l.replace("https://", "").replace("http://", "") for l in list_url_host]
-            # Clean last slash if exists
-            list_url_host = [ l if not l.endswith("/") else l[:-1] for l in list_url_host]
-        except Exception as e:
-            logger.warning("Exception fetching URL host list: " + str(e))
-            list_url_host = []
-        return list_url_host
-
-    def _get_search_list(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # List of keyword searches
-                list_search_text = [l[0] for l in conn.execute("SELECT keyword_search FROM SEARCH;").fetchall()]
-        except Exception as e:
-            logger.warning("Exception fetching searches list: " + str(e))
-            list_search_text = []
-        return list_search_text
-
-    def _get_feed_urls(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                list_url_feeds = conn.execute("SELECT rss_feed FROM FEED;").fetchall()
-                # Decode (tuple with 1 element)
-                list_url_feeds = [l[0] for l in list_url_feeds]
-        except Exception as e:
-            logger.warning("Exception fetching RSS sites: " + str(e))
-            list_url_feeds = []
-        return list_url_feeds
-    
-    def _get_url_hosts(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                list_url_hosts = conn.execute("SELECT url_host FROM WEBSITE_OF_INTEREST;").fetchall()
-                # Decode (tuple with 1 element)
-                list_url_hosts = [l[0] for l in list_url_hosts]
-        except Exception as e:
-            logger.warning("Exception fetching RSS sites: " + str(e))
-            list_url_hosts = []
-        return list_url_hosts
-
-    def _format(self, values):
-        # Repalce single quote ' with ''. Based on https://stackoverflow.com/a/12320729
-        # String -> 'string', Int -> '1' (string-based), None -> NULL (no quotes for pgSQL to interpret Null value)
-        if (type(values) == list) or (type(values) == tuple):
-            insert_args = "(" + ", ".join([ "NULL" if v is None else "'" + str(v).replace("'", "''") + "'" for v in values]) + ")"
-        elif (type(values) == str):
-            insert_args = "({})".format( "NULL" if values is None else "'" + values.replace("'", "''") + "'" )
-        else:
-            logger.warning("Error formatting input values: {}".format(values))
-            assert False
-        return insert_args
-
-    def _get_cached_canonical_url(self, url):
-        ### Redis: URL processed recently? -> Avoid increasing SERIAL counter & efficiency of DB
-        try:
-            filter_url = self.redis_instance.get(url)
-            if (filter_url is not None):
-                filter_url = filter_url.decode("utf-8")
-        except Exception as e:
-            logger.warning("Exception querying Redis: {}".format(str(e)))
-            filter_url = None
-        return filter_url
-    
-    def _update_urls_status(self, dict_status_ids):
-        #####################
-        ### Update status to array of URL IDs
-        #####################
-        try:
-            # Update
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # Autocommit at end of transaction (Atomic insert of URLs and sources)
-                with conn.transaction() as tx:
-                    for key_status, value_ids in dict_status_ids.items():
-                        cursor.execute("UPDATE URLS SET status='{}' WHERE id IN ({});".format(key_status, ",".join([str(v) for v in value_ids])))
-        except Exception as e:
-            logger.warning("Error updating URLs status: {}".format(str(e)))
-
-    def _get_missing_kids_urls(self, num_urls=None):
-        #####################
-        ### Get list of Missing Kids URLs
-        #####################
-        try:
-            missing_kids_ids_and_urls = []
-            if (num_urls is None):
-                limit = 500
-            else:
-                limit = num_urls
-            offset = 0
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                while True:
-                    # Query
-                    missing_kids_ids_and_urls_query = cursor.execute("SELECT id, url, status FROM URLS WHERE url LIKE '%missingkids.org/poster%' ORDER BY ts_fetch DESC LIMIT {} OFFSET {};".format(limit, offset)).fetchall()
-                    # Finished?
-                    if (len(missing_kids_ids_and_urls_query) == 0):
-                        break
-                    # Extend
-                    missing_kids_ids_and_urls = missing_kids_ids_and_urls + missing_kids_ids_and_urls_query
-                    # Offset
-                    offset += len(missing_kids_ids_and_urls_query)
-                    # Stop?
-                    if (num_urls is not None) and (len(missing_kids_ids_and_urls) >= num_urls):
-                        break
-                        
-        except Exception as e:
-            logger.warning("Error getting Missing Kids URLs: {}".format(str(e)))
-            missing_kids_ids_and_urls = []
-        return missing_kids_ids_and_urls
-    
-    def _get_error_urls(self, num_urls=None):
-        #####################
-        ### Get list of Missing Kids URLs
-        #####################
-        try:
-            error_urls = []
-            if (num_urls is None):
-                limit = 500
-            else:
-                limit = num_urls
-            offset = 0
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                while True:
-                    # Query
-                    error_urls_query = cursor.execute("SELECT id, url FROM URLS WHERE status='error' ORDER BY ts_fetch DESC LIMIT {} OFFSET {};".format(limit, offset)).fetchall()
-                    # Finished?
-                    if (len(error_urls_query) == 0):
-                        break
-                    # Extend
-                    error_urls = error_urls + error_urls_query
-                    # Offset
-                    offset += len(error_urls_query)
-                    # Stop?
-                    if (num_urls is not None) and (len(error_urls) >= num_urls):
-                        break
-                        
-        except Exception as e:
-            logger.warning("Error getting Error URLs: {}".format(str(e)))
-            error_urls = []
-        return error_urls
-
-    def _decode_urls(self, urls_fetched, list_domains_to_filter, list_pattern_status_tuple): # TODO: language for urls_fetched...
-        """
-        # TODO: REFACTOR
-        For each input url
-
-            Already processed? 
-                -> Update on Redis expire time
-                -> Associate to source
-            Not processed? Get main URL:
-                -> URL Canonical valid? 
-                    -> Rely on this as main URL
-                -> URL Canonical not valid?
-                    -> Use input url, unless it's a news.google.com link
-                        -> If news.google.com link, filter out. REDIS?
-            Main URL processing:
-                -> Update in REDIS, association url -> url_canonical
-                -> url != url_canonical: Add in duplicate table
-                    If both != news.google.com
-        """
-
-        # URLs to insert, URLs duplicated association, URL to Canonical form
-        list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical = [], [], {}
-
-        # URL VS CANONICAL:
-        # News URL returned: https://news.google.com/articles/CBMifmh0dHBzOi8vd3d3LmJyZWl0YmFydC5jb20vMm5kLWFtZW5kbWVudC8yMDIzLzA0LzAzL2dvdi1kZXNhbnRpcy1zaWducy1iaWxsLW1ha2luZy1mbG9yaWRhLXRoZS0yNnRoLWNvbnN0aXR1dGlvbmFsLWNhcnJ5LXN0YXRlL9IBAA?hl=en-US&gl=US&ceid=US%3Aen
-        # Corresponds to canonical URL: https://www.breitbart.com/2nd-amendment/2023/04/03/gov-desantis-signs-bill-making-florida-the-26th-constitutional-carry-state/
-
-        for url in urls_fetched:
-            # Domain to filter? Input url
-            filter_due_to_domain = False
-            for domain_to_filter in list_domains_to_filter:
-                if (domain_to_filter in url):
-                    logger.debug("Domain filter applied based on {} for input URL: {}".format(domain_to_filter, url))
-                    filter_due_to_domain = True
-            if (filter_due_to_domain):
-                continue
-
-            # URL processed recently? -> Filter and avoid increasing SERIAL counter & efficiency of DB
-            cached_canonical_url = self._get_cached_canonical_url(url)
-            if (cached_canonical_url is not None):
-                # Even if url processed, need to add url_canonical to list_filtered_urls, so as to associate search source to canonical URL (canonical is the main URL entry)
-                dict_full_urls_to_canonical[url] = cached_canonical_url # X -> Y
-                # If url has been processed, so was its canonical form
-                logger.debug("Filtering out already inserted (processed) URL and its canonical form: {} {}".format(url, cached_canonical_url))
-                continue
-
-            # Process TODO: Add language...
-            url_canonical, article_elements, article_status = process_article(url, list_pattern_status_tuple)
-            # TODO: Store article_elements information to insert into OS after inserted into DB (and therefore having associated url_id)
-
-            # Could not retrieve redirection for news.google.com based URL? Continue (avoid inserting in DB)
-            if (url_canonical is None) and ("news.google.com" in url):
-                logger.debug("Filtering empty canonical link for base URL based on news.google.com: {}".format(url))
-                continue
-            # Canonical URL still news.google.com? Continue (avoid inserting in DB)
-            if (url_canonical is not None) and ("news.google.com" in url_canonical):
-                logger.debug("Filtering canonical news.google.com based URL: {}".format(url_canonical))
-                continue
-
-            # Domain to filter? Input canonical_url
-            filter_due_to_domain = False
-            for domain_to_filter in list_domains_to_filter:
-                if (url_canonical is not None) and (domain_to_filter in url_canonical):
-                    filter_due_to_domain = True
-            if (filter_due_to_domain):
-                logger.info("Filtering due to domain input URL, Canonical_URL: {} {}".format(url, url_canonical))
-                continue
-
-            if (url_canonical is None) or (article_status == "error"):
-                logger.debug("Processing failed for URL: {}".format(url))
-                # Still insert URL with "error"? -> If processed later, might have inconsistent sources (url vs url_canonical). Only store if not news.google.com based
-                if ("news.google.com" in url) or ("consent.google.com" in url):
-                    logging.debug("Not able to process Google News link, skipping: {}".format(url))
-                else:
-                    dict_full_urls_to_canonical[url] = url # X -> X
-                    list_insert_url_tuple_args.append( (url, article_status) )
-                continue
-            
-            # URL was not processed (not sure canonical yet). Generate URL_CANONICAL <-> URL_ORIGINAL association if they're different
-            if (url_canonical != url):
-                list_tuple_canonical_duplicate_urls.append( (url_canonical, url) )
-            # Dict: url -> canonical (update association)
-            dict_full_urls_to_canonical[url] = url_canonical # X -> Y or X
-
-            # Canonical URL processed recently? -> Filter and avoid increasing SERIAL counter & efficiency of DB
-            if (self._get_cached_canonical_url(url_canonical) is not None):
-                # Canonical URL was already processed
-                logger.debug("Filtering out already inserted (processed) URL canonical: {}".format(url_canonical))
-            else:
-                # Insert url_canonical to DB formatted
-                list_insert_url_tuple_args.append( (url_canonical, article_status) )
-                # Canonical URL different? Process
-                if (url_canonical != url):
-                    if ("news.google.com" in url) or ("consent.google.com" in url):
-                        logging.debug("Not adding google.news.com based link, skipping: {}".format(url))
-                    else:
-                        # Fetched url -> duplicate (using canonical as main link)
-                        article_status = "duplicate"
-                        # Insert url (non-canonical) to DB formatted
-                        list_insert_url_tuple_args.append( (url, article_status) )
-
-        return list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical
-    
-    def _insert_urls(self, cursor, list_insert_url_tuple_args):
-        #####################
-        ### Insert URLs with status
-        #####################
-        if (len(list_insert_url_tuple_args) > 0):
-            insert_args = ', '.join( [ self._format(t) for t in list_insert_url_tuple_args] )
-            # Insert. (url_1, status_1), (url_2, status_2), ...
-            sql_code = "INSERT INTO URLS {} VALUES {} ON CONFLICT (url) DO NOTHING;".format("(url, status)", insert_args)
-            # logger.debug("SQL CODE: {}".format(sql_code))
-            c = cursor.execute(sql_code)
-            # NOTE: Not using "RETURNING id" since previously inserted URLs are not returned (ON CONFLICT)
-            # https://stackoverflow.com/questions/35949877/how-to-include-excluded-rows-in-returning-from-insert-on-conflict/35953488#35953488
-    
-    def _insert_urls_duplicated(self, cursor, list_tuple_canonical_duplicate_urls):
-        #####################
-        ### Insert duplicated URLs
-        #####################
-        if (len(list_tuple_canonical_duplicate_urls) > 0):
-            # Flatten, format, set to remove duplicates
-            args_duplicated_urls_set = "(" + ', '.join( set( [ "'" + str(y).replace("'", "''") + "'" for x in list_tuple_canonical_duplicate_urls for y in x] ) ) + ")"
-
-            # Dict: url -> id
-            dict_url_to_id = {}
-            # Get url -> id association to populate duplicated URLs
-            for (id_, url_) in cursor.execute("SELECT id, url FROM URLS WHERE url IN {};".format(args_duplicated_urls_set)).fetchall():
-                dict_url_to_id[url_] = id_
-            
-            # Convert tuples (url_canonical, url) -> (id_url_canonical, id_url) to insert in DB
-            # ORIGINAL CODE. Issue, might not have found association to all urls
-            ### list_tuple_canonical_duplicate_urls_ids = [ (dict_url_to_id[t[0]], dict_url_to_id[t[1]]) for t in list_tuple_canonical_duplicate_urls]
-
-            list_tuple_canonical_duplicate_urls_ids = []
-            for (url_1, url_2) in list_tuple_canonical_duplicate_urls:
-                id_url_1, id_url_2 = dict_url_to_id.get(url_1), dict_url_to_id.get(url_2)
-                if (id_url_1 is None) or (id_url_2 is None):
-                    logger.debug("Skipping duplicate association due to no url -> id_url mapping available for tuple: {} {}".format(url_1, url_2))
-                else:
-                    list_tuple_canonical_duplicate_urls_ids.append( (id_url_1, id_url_2) )
-            
-            if (len(list_tuple_canonical_duplicate_urls_ids) > 0):
-                insert_args = ', '.join( [ self._format(t) for t in list_tuple_canonical_duplicate_urls_ids] )
-                # Insert. (id_url_canonical_1, id_url_1), ...
-                sql_code = "INSERT INTO URLS_DUPLICATE {} VALUES {} ON CONFLICT DO NOTHING;".format("(id_url_canonical, id_url_duplicated)", insert_args)
-                # logger.debug("SQL CODE: {}".format(sql_code))
-                c = cursor.execute(sql_code)
-
-    def _get_pattern_status_list(self):
-        #####################
-        ### Get list of domains to filter
-        #####################
-        # TODO: Cache on redis and query once every N hours? ...
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # TODO: Cache on Redis
-                list_pattern_status = cursor.execute("SELECT pattern, priority, status FROM STATUS_PATTERN_MATCHING;").fetchall()
-        except Exception as e:
-            logger.warning("Error getting pattern status list: {}".format(str(e)))
-            list_pattern_status = []
-        return list_pattern_status
-
-    def _get_domains_to_filter(self):
-        #####################
-        ### Get list of domains to filter
-        #####################
-        # TODO: Cache on redis and query once every N hours? ...
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # TODO: Cache on Redis
-                sites_to_filter = [e[0] for e in cursor.execute("SELECT url_host FROM WEBSITE_TO_FILTER;").fetchall() ]
-        except Exception as e:
-            logger.warning("Error getting domains to filter: {}".format(str(e)))
-            sites_to_filter = []
-        return sites_to_filter
-
-    def _get_cached_source_id(self, source):
-        ### Redis: URL processed recently? -> Avoid increasing SERIAL counter & efficiency of DB
-        try:
-            source_id = self.redis_instance.get(source)
-            if (source_id is not None):
-                source_id = source_id.decode("utf-8")
-        except Exception as e:
-            logger.warning("Exception querying Redis: {}".format(str(e)))
-            source_id = None
-        return source_id
-    
-    def _get_source_id(self, cursor, source):
-        #####################
-        ### Get source corresponding id
-        #####################
-        # Cached?
-        id_source = self._get_cached_source_id(source)
-        if (id_source is None):
-            c = cursor.execute("SELECT id FROM SOURCE WHERE source='{}'".format(source.replace("'", "''"))).fetchone()
-            if (c is None) or (len(c) == 0):
-                # Source does not exist, insert and get id
-                c = cursor.execute("INSERT INTO SOURCE (source) VALUES ('{}') RETURNING id;".format(source.replace("'", "''"))).fetchone()
-            # Decode source id
-            id_source = c[0]
-        # Cache
-        print("*"*10, source, id_source)
-        self.redis_instance.set(source, id_source, ex=self.redis_expiry_seconds)
-        return id_source
-    
-    def _get_urls_id(self, cursor, urls_full):
-        #####################
-        ### Get id of inserted and filtered URLs
-        #####################
-        # TODO: Cache url -> url_id, url_canonical
-        if (len(urls_full) == 0):
-            return []
-        # Get inserted and filtered URL ids (unnested). Filtered URLs are also retrieved since they might have been fetched from a new source
-        in_inserted_filtered_urls = "(" + ', '.join(["'" + u.replace("'", "''") + "'" for u in urls_full]) + ")"
-        id_urls_related = [ i[0] for i in cursor.execute("SELECT id FROM URLS WHERE url IN {};".format(in_inserted_filtered_urls)).fetchall() ]
-        return id_urls_related
-
-    def _insert_urls_source(self, cursor, id_urls_related, id_source):
-        #####################
-        ### Insert URL sources: (id_url_1, id_source), (id_url_2, id_source), ...
-        #####################
-        if (len(id_urls_related) == 0) or (id_source is None):
-            return
-        columns = "(id_url, id_source)"
-        insert_args = ', '.join( [ self._format([id_url, id_source]) for id_url in id_urls_related ] )
-        # Insert
-        sql_code = "INSERT INTO URLS_SOURCE {} VALUES {} ON CONFLICT DO NOTHING;".format(columns, insert_args)
-        # logger.debug("SQL CODE: {}".format(sql_code))
-        c = cursor.execute(sql_code)
-
-    def write_batch(self, urls_fetched, source):
-        # Chunks of 50 elements
-        n = 50
-        # Divide in small chunks
-        urls_fetched_chunks = [urls_fetched[i:i + n] for i in range(0, len(urls_fetched), n)]
-        # Process
-        for urls_fetched_chunk_i in urls_fetched_chunks:
-            self._write_small_batch(urls_fetched_chunk_i, source)
-
-    def _write_small_batch(self, urls_fetched, source):
-        try:
-            logger.info("Fetched #{} URLs, source: {}".format(len(urls_fetched), source))
-
-            if (len(urls_fetched) == 0):
-                logger.debug("Empty batch of urls (not writing to DB) for source: {}".format(source))
-                return
-            
-            # Shuffle URLs to reduce continuous URLs of same URL host (minimize chance of being blocked for too many continuous requests)
-            random.shuffle(urls_fetched)
-            
-            # Get list of domains to filter
-            list_domains_to_filter = self._get_domains_to_filter()
-            # Get list of (pattern, priority, status) tuples to override status if required
-            list_pattern_status_tuple = self._get_pattern_status_list()
-            # Sort pattern tuples by priority
-            list_pattern_status_tuple.sort(key=lambda tup: tup[1], reverse=True)
-            
-            # Process URLs to update DB
-            list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical = self._decode_urls(urls_fetched, list_domains_to_filter, list_pattern_status_tuple)
-            # Full set of URL and its canonical form (to associate them to a search), both to insert and filter
-            urls_full = set(dict_full_urls_to_canonical.keys()).union( set(dict_full_urls_to_canonical.values()) )
-
-            # Insert
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # Autocommit at end of transaction (Atomic insert of URLs and sources)
-                with conn.transaction() as tx:
-                    # Insert processed URLs
-                    self._insert_urls(cursor, list_insert_url_tuple_args)
-                    # Insert URLs duplicated (canonical != fetched url)
-                    self._insert_urls_duplicated(cursor, list_tuple_canonical_duplicate_urls)
-
-                    # Get source id in DB
-                    id_source = self._get_source_id(cursor, source)
-                    # Get IDs of all related URLs
-                    id_urls_related = self._get_urls_id(cursor, urls_full)
-                    # Insert search source associated to URLs
-                    self._insert_urls_source(cursor, id_urls_related, id_source)
-
-            # Update Redis status of inserted and filtered URLs after writing to DB
-            for url, url_canonical in dict_full_urls_to_canonical.items():
-                try:
-                    # Set with updated expiry time
-                    self.redis_instance.set(url, url_canonical, ex=self.redis_expiry_seconds)
-                    if (url != url_canonical):
-                        self.redis_instance.set(url_canonical, url_canonical, ex=self.redis_expiry_seconds)
-                except Exception as e:
-                    logger.warning("Exception running set in Redis: {}".format(str(e)))
-
-            if (len(list_insert_url_tuple_args) > 0):
-                try:
-                    webhook_token = os.environ.get("CLIQ_WEBHOOK_TOKEN")
-                    endpoint_message = "https://cliq.zoho.com/api/v2/channelsbyname/urlretrievalbot/message?zapikey={}".format(webhook_token)
-                    
-                    payload = json.dumps({"text": "Fetched #{} new URLs, source: {}".format(len(list_insert_url_tuple_args), source) })
-                    r = requests.post(endpoint_message, data=payload)
-                except Exception as e:
-                    logger.warning("Webhook failed: {}".format(str(e)))
-
-            logger.debug("URL DB write finished")
-        except Exception as e:
-            logger.warning( "Exception writing to URL_DB:\n{}".format(traceback.format_exc()) )
-            logger.debug( "Exception --- List of URLs: {}".format(str(urls_fetched)) )
--- a/OBSOLETE_app_fetcher/src/fetch_feed.py
+++ b/OBSOLETE_app_fetcher/src/fetch_feed.py
@@ -1,48 +0,0 @@
-from .db_utils import DB_Handler
-import feedparser
-import dateutil
-from .logger import get_logger
-logger = get_logger()
-
-class FetchFeed():
-    def __init__(self, db_handler: DB_Handler) -> None:
-        logger.debug("Initializing News feed")
-        self.db_handler = db_handler
-    
-    def run(self):
-        try:
-            logger.debug("Starting NewsFeed.run()")
-            # Get feeds
-            list_url_feeds = self.db_handler._get_feed_urls()
-            logger.debug("Fetching news from feeds: {}".format(str(list_url_feeds)))
-
-            # Process via RSS feeds
-            for url_feed in list_url_feeds:
-                # Initialize
-                urls_fetched, urls_publish_date = [], []
-                # Fetch feeds
-                feeds = feedparser.parse(url_feed)
-                # Parse
-                for f in feeds.get("entries", []):
-                    # Get URL
-                    url = f.get("link", None)
-                    # Process?
-                    if (url is not None):
-                        # Available publish date?
-                        publish_date_parsed = f.get("published_parsed")
-                        if (publish_date_parsed is None):
-                            publish_date = f.get("published", None)
-                            if (publish_date is not None):
-                                publish_date_parsed = dateutil.parser.parse(publish_date)
-                        
-                        # Published date
-                        urls_publish_date.append(publish_date_parsed)
-                        # URL
-                        urls_fetched.append(url)
-
-                # URL fetching source
-                source = "feed {}".format(url_feed)
-                # Write to DB
-                self.db_handler.write_batch(urls_fetched, source)
-        except Exception as e:
-            logger.warning("Exception in NewsFeed.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/fetch_parser.py
+++ b/OBSOLETE_app_fetcher/src/fetch_parser.py
@@ -1,45 +0,0 @@
-from .db_utils import DB_Handler
-import newspaper
-from .logger import get_logger
-logger = get_logger()
-
-class FetchParser():
-    def __init__(self, db_handler: DB_Handler) -> None:
-        logger.debug("Initializing News SiteParsing newspaper4k")
-        self.db_handler = db_handler
-
-    # TODO: MOVE LOGIC ELSEWHERE!
-    def _postprocess(self, article_urls):
-        return [url.replace("#comment-stream", "") for url in article_urls]
-    
-    def run(self):
-        try:
-            logger.debug("Starting NewsSiteParsing.run() for {}")
-
-            # Get URL hosts
-            list_url_hosts = self.db_handler._get_url_hosts()
-            logger.info("Fetching news by parsing URL hosts: {}".format(str(list_url_hosts)))
-
-            # Process newspaper4k build method
-            for url_host_feed in list_url_hosts:
-                # Protocol
-                if not (url_host_feed.startswith("http")):
-                    url_host_feed_formatted = "https://" + url_host_feed
-                else:
-                    url_host_feed_formatted = url_host_feed
-
-                logger.debug("Fetching newspaper4k parsing based on URL: {}".format(url_host_feed_formatted))
-                # Source object
-                url_host_built = newspaper.build(url_host_feed_formatted)
-                # Get articles URL list
-                urls_fetched = url_host_built.article_urls()
-                # TODO: MOVE!
-                # Post-processing
-                urls_fetched = self._postprocess(urls_fetched)
-
-                # URL fetching source
-                source = "newspaper4k {}".format(url_host_feed)
-                # Write to DB
-                self.db_handler.write_batch(urls_fetched, source)
-        except Exception as e:
-            logger.warning("Exception in NewsSiteParsing.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/fetch_search.py
+++ b/OBSOLETE_app_fetcher/src/fetch_search.py
@@ -1,73 +0,0 @@
-from .db_utils import DB_Handler
-from .utils import get_searxng_instances
-from .fetch_search_sources import FetcherDuckDuckGo, FetcherGNews, FetcherGoogleNews, FetcherSearxNews, FetcherPreSearch
-from .logger import get_logger
-logger = get_logger()
-
-class FetchSearch():
-    def __init__(self, db_handler: DB_Handler, full=True) -> None:
-        logger.debug("Initializing News feed")
-        self.db_handler = db_handler
-        self.full_search = full
-    
-    def _run_fetching(self, search_text):
-        logger.debug("Starting _run_fetching() for {}".format(search_text))
-        
-        # Common parameters
-        lang, region = "en", "US"
-
-        ### PreSearch
-        dict_params_news = {"search": search_text}
-        FetcherPreSearch(**dict_params_news).fetch_articles(self.db_handler)
-
-        ### DuckDuckGo
-        period = "d"
-        dict_params_news = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "news", "period": period}
-        FetcherDuckDuckGo(**dict_params_news).fetch_articles(self.db_handler)
-        dict_params_general = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "general", "period": period}
-        FetcherDuckDuckGo(**dict_params_general).fetch_articles(self.db_handler)
-
-        if (self.full_search):
-            # Avoid site:{} search due to G-Bypass required time
-            if ("site:" not in search_text):
-                ### GNews
-                dict_params = {"search": search_text, "lang": "wt", "region": "wt", "period": period}
-                FetcherGNews(**dict_params).fetch_articles(self.db_handler)
-
-                ### GoogleNews
-                dict_params_news = {"search": search_text, "lang": lang, "region": region, "search_category": "news", "period": period}
-                FetcherGoogleNews(**dict_params_news).fetch_articles(self.db_handler)
-                # dict_params_general = {"search": search_text, "lang": lang, "region": region, "search_category": "general", "period": period}
-
-            if False:
-                ### SearxNG
-                period = "day"
-                for searx_instance in get_searxng_instances():
-                    dict_params_news = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "news", "period": period}
-                    dict_params_general = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "general", "period": period}
-                    # Append thread
-                    FetcherSearxNews(**dict_params_news).fetch_articles(self.db_handler)
-                    FetcherSearxNews(**dict_params_general).fetch_articles(self.db_handler)
-
-        logger.debug("Finished _run_fetching()")
-                    
-    def run(self):
-        try:
-            logger.info("Fetching text searches & URL hosts of interest")
-
-            # Get text searches of interest
-            list_search_text_of_interest = self.db_handler._get_search_list()
-
-            # Get URL host of interest
-            list_url_host = self.db_handler._get_url_host_list()
-            # Get text searches for URL hosts
-            list_search_text_url_host = ["site:{}".format(l) for l in list_url_host]
-
-            for search_text in list_search_text_of_interest + list_search_text_url_host:
-                logger.debug("Fetching news for search: {}".format(search_text))
-                self._run_fetching(search_text)
-
-            logger.info("Finished fetching text searches & URL hosts of interest")
-        except Exception as e:
-            logger.warning("Exception in NewsSearch.run(): {}".format(str(e)))
-        
--- a/OBSOLETE_app_fetcher/src/fetch_search_sources.py
+++ b/OBSOLETE_app_fetcher/src/fetch_search_sources.py
@@ -1,384 +0,0 @@
-from duckduckgo_search import DDGS
-from gnews import GNews
-from GoogleNews import GoogleNews
-
-import requests
-from bs4 import BeautifulSoup
-import os
-import time
-import json
-import numpy as np
-import random
-from .google_bypass import GoogleByPass
-from abc import ABC, abstractmethod
-from .logger import get_logger
-logger = get_logger()
-
-
-
-# Generic fetcher (fetches articles, writes to DB)
-class FetcherAbstract(ABC):
-    @abstractmethod
-    def _fetch(self):
-        pass
-
-    def fetch_articles(self, db_writer):
-        logger.debug("Starting fetch() for {}".format(self.name))
-        # Fetch articles
-        list_news = self._fetch()
-        logger.info("Found #{} articles for search: {}".format(len(list_news), self.name))
-        # Write to DB
-        db_writer.write_batch(list_news, self.name)
-
-# https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
-
-user_agents_list = [
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; rv:111.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.44",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 OPR/96.0.0.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 OPR/97.0.0.0",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.34",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.39",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; rv:112.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.51",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 YaBrowser/23.3.0.2246 Yowser/2.5 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36                       (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
-    "Mozilla/5.0 (Windows NT 6.1; rv:102.0) Gecko/20100101 Goanna/6.0 Firefox/102.0 PaleMoon/32.0.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.41",
-    "Mozilla/5.0 (Windows NT 10.0; rv:110.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 YaBrowser/23.1.5.708 Yowser/2.5 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
-    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
-]
-
-
-
-
-
-class FetcherPreSearch(FetcherAbstract):
-    def __init__(self, search):
-        """
-        # period ->
-        - h = hours (eg: 12h)
-        - d = days (eg: 7d)
-        - m = months (eg: 6m)
-        - y = years (eg: 1y)
-        """
-        self.search = search
-        self.period = "1d" # TODO Fixed for the moment
-        # self.lang = lang
-        # self.region = region
-        search_category = "news"
-        self.name = "presearch {} {} {}".format(search, search_category, self.period)
-
-    def _fetch(self):
-        try:
-            # PreSearch fetching endpoint, parameter search keyword
-            presearch_fetch_endpoint = "http://selenium_app:80/fetch_presearch/?search_keyword={}".format(self.search)
-            # Timeout: 15 minutes
-            r = requests.get(presearch_fetch_endpoint, timeout=900)
-            # Decode
-            list_news = json.loads(r.text).get("list_urls", [])
-        except Exception as e:
-            logger.warning("Timeout on request: {}. {}".format(presearch_fetch_endpoint, str(e)))
-            list_news = []
-        return list_news
-
-
-
-class FetcherGNews(FetcherAbstract):
-    def __init__(self, search, period, lang="en", region="US"):
-        """
-        # period ->
-        - h = hours (eg: 12h)
-        - d = days (eg: 7d)
-        - m = months (eg: 6m)
-        - y = years (eg: 1y)
-        """
-        self.search = search
-        self.period = period
-        self.lang = lang
-        self.region = region
-        search_category = "news"
-        self.name = "gnews {} {} {} {}".format(search, search_category, period, "{}-{}".format(lang, region))
-
-    def _fetch(self):
-        try:
-            list_dict_news = GNews(self.lang, self.region, period=self.period).get_news(self.search)
-            # Decode
-            list_news = []
-            for l in list_dict_news:
-                list_news.append(l.get("url"))
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []
-
-        # Bypass Google links
-        list_news_redirections = GoogleByPass().bypass_google_urls(list_news)
-
-        return list_news_redirections
-
-class FetcherGoogleNews(FetcherAbstract):
-    def __init__(self, search, search_category="news", period="1d", lang="en", region="US"):
-        assert(search_category in ["news", "general"])
-
-        self.lang = lang
-        self.region = region
-        self.period = period
-        self.search_category = search_category
-        self.search = search
-        self.name = "googlenews {} {} {} {}".format(search, search_category, period, "{}-{}".format(lang, region))
-
-    def _fetch(self):
-        try:
-            # Initialize
-            g = GoogleNews(encode="utf-8", period=self.period, lang=self.lang, region=self.region)
-            g.enableException(True)
-
-            if (self.search_category == "general"):
-                set_links = set()
-                # Search
-                g.search(self.search)
-
-                # Iterate pages
-                MAX_ITER_PAGES = 15
-                for i in range(MAX_ITER_PAGES):
-                    time.sleep(random.uniform(1, 1.5))
-                    num_before = len(set_links)
-
-                    # Get page
-                    try:
-                        links = g.page_at(i)
-                    except Exception as e:
-                        logger.warning("Exception fetching page in GoogleNews {}: {}".format(self.name, str(e)))
-                        break
-                    # Links
-                    for l in links:
-                        # '/url?esrc=s&q=&rct=j&sa=U&url=https://www.breitbart.com/news/scent-of-luxury-indias-jasmine-infuses-global-perfume/&ved=2ahUKEwjOybGSiN-AAxX1gv0HHfqSBpMQxfQBegQICBAC&usg=AOvVaw06GdoHyzPbIopUaEuUSQPQ'
-                        url = l.get("link").split("url=")[-1]
-                        set_links.add(url)
-                    
-                    num_after = len(set_links)
-
-                    # Finished?
-                    if (num_before == num_after):
-                        logger.debug("Iterated {} pages on GoogleNews general search".format(i))
-                        break
-                # To list
-                list_news = list(set_links)
-            elif (self.search_category == "news"):
-                # Search
-                g.get_news(self.search)
-                # Fetch
-                list_news = g.get_links()
-
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []
-
-        # Bypass Google links
-        list_news_redirections = GoogleByPass().bypass_google_urls(list_news)
-        
-        return list_news_redirections
-
-class FetcherDuckDuckGo(FetcherAbstract):
-    def __init__(self, search, search_category, period, lang="wt", region="wt"):
-        assert(search_category in ["news", "general"])
-        assert(period in ["d", "w", "m", "y"])
-        self.search = search
-        self.search_category = search_category
-        self.period = period
-        self.lang_region = "{}-{}".format(lang, region)
-        self.name = "duckduckgo {} {} {} {}".format(search, search_category, "1{}".format(period), region)
-
-    def _fetch(self):
-        try:
-            list_news = []
-            with DDGS(timeout=10) as ddgs:
-                if (self.search_category == "general"):
-                    generator_links = ddgs.text(keywords=self.search, timelimit=self.period, region=self.lang_region)
-                elif (self.search_category == "news"):
-                    generator_links = ddgs.news(keywords=self.search, timelimit=self.period, region=self.lang_region)
-                
-                for l in generator_links:
-                    list_news.append( l.get("url", l.get("href")) )
-                    
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []
-        return list_news
-
-
-class FetcherSearxNews(FetcherAbstract):
-    def __init__(self, search="child abuse", searx_instance="https://serx.ml/", lang="en", region="US", search_category="news", period="day"):
-        assert(search_category in ["news", "general"])
-        assert(period in [None, "day", "week", "month", "year"])
-        # Random header (minimize prob of web-scrapping detection)
-        self.headers = {
-            'User-agent': str(np.random.choice(user_agents_list)),
-            'Accept-Encoding': 'gzip, deflate', 
-            'Accept': '*/*', 
-            'Connection': 'keep-alive',
-        }
-        """ # Optional header
-        self.headers = {
-            'User-agent': str(np.random.choice(user_agents_list)),
-            'Accept-Encoding': 'gzip, deflate, br', 
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-            'TE': 'trailers',
-            'Sec-Fetch-Site': 'cross-site',
-            'Sec-Fetch-Mode': 'navigate',
-            'Sec-Fetch-Dest': 'document',
-        }
-        """
-        self.search = search
-        self.searx_instance = searx_instance
-        self.lang_region = "{}-{}".format(lang, region)
-        self.search_category = search_category
-        self.period = period
-        self.t_sleep_lower, self.t_sleep_higher = 0.5, 1.5
-        self.request_timeout = 240
-
-        period_name_mapping = {
-            None: "no_date_range",
-            "day": "1d",
-            "week": "1w",
-            "month": "1m",
-            "year": "1y",
-        }
-        self.name = "searxng {} {} {} {} {}".format(searx_instance.replace("https://", "").replace("/", ""), search, search_category, period_name_mapping[period], self.lang_region)
-        logger.info("SearX - Initialized SearX fetcher: {}".format(self.name))
-    
-    def _request_and_decode(self, url_search):
-        # Initial random time sleep (minimize chance of getting blocked)
-        time.sleep(random.uniform(self.t_sleep_lower, self.t_sleep_higher))
-        # Request
-        logger.debug("SearX - Searching: {}".format(url_search))
-        try:
-            r = requests.get(url_search, headers=self.headers, timeout=self.request_timeout)
-        except Exception as e:
-            logger.warning("SearX - Exception in request: {}".format(url_search), "\n", str(e))
-            return []
-        
-        if (r.status_code == 200):
-            # Status code Ok
-            pass
-        elif (r.status_code == 429):
-            # TooManyRequests, "Rate limit exceeded"
-            logger.warning("SearX {} - Too many requests while running: {}. Request output: {}".format(self.name, r.url, r.text))
-            return []
-        elif (r.status_code != 200):
-            logger.warning("SearX {} - Status code: {}. Request output: {}".format(self.name, r.status_code, r.text))
-            return []
-        else:
-            logger.debug("SearX - Status code: {}".format(r.status_code))
-
-        # Decode request
-        soup = BeautifulSoup(r.text, 'html.parser')
-        page_url_set = set()
-        # h3 links
-        for elem in soup.find_all('h3'):
-            # Get url
-            url = elem.find('a').get('href')
-            page_url_set.add(url)
-        return page_url_set
-
-    def _get_news_list(self):
-        ############################################################
-        # Domain & search parameter
-        search_domain = os.path.join(self.searx_instance, "search?q=")
-        # Search keywords
-        search_formatted = self.search.replace(" ", "+").replace(":", "%3A")
-        # Period formatted
-        period_formatted = "&time_range={}".format(self.period) if self.period is not None else ""
-        # Search parameters
-        search_parameters = "&category_{}=on&language={}{}".format(self.search_category, self.lang_region, period_formatted)
-        # Combined url search
-        url_search_nopage = "{}{}{}".format(search_domain, search_formatted, search_parameters)
-        ############################################################
-
-        # Request and decode on page=1
-        url_set = self._request_and_decode(url_search_nopage)
-        # No results?
-        if (len(url_set) == 0):
-            logger.warning("SearX {} - Empty results on search: {}".format(self.name, url_search_nopage))
-            return []
-
-        # Iterate pages
-        search_numpage = 2
-        while True:
-            # Combine url search with page number
-            url_search_with_page = "{}&pageno={}".format(url_search_nopage, search_numpage)
-            # Request and decode on page=X
-            url_set_i = self._request_and_decode(url_search_with_page)
-
-            # Length before merging
-            length_current = len(url_set)
-            # Merge
-            url_set = url_set.union(url_set_i)
-            # Length after merging
-            length_merged = len(url_set)
-
-            # No new elements?
-            if (length_current == length_merged):
-                logger.debug("SearX {} - Finished processing search, #pages: {}".format(self.name, search_numpage))
-                break
-            # Next page
-            search_numpage += 1
-        
-        return list(url_set)
-
-    def _fetch(self):
-        try:
-            # Fetch news
-            list_news = self._get_news_list()
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []        
-        return list_news
--- a/OBSOLETE_app_fetcher/src/google_bypass.py
+++ b/OBSOLETE_app_fetcher/src/google_bypass.py
@@ -1,26 +0,0 @@
-import requests
-import json
-from .logger import get_logger
-logger = get_logger()
-
-class GoogleByPass():
-    def __init__(self) -> None:
-        pass
-
-    def bypass_google_urls(self, list_urls):
-        if (len(list_urls) == 0):
-            return []
-        
-        try:
-            # Endpoint
-            gbypass_endpoint = "http://selenium_app:80/get_redirection"
-            # Timeout: 20 minutes
-            timeout = 60*20
-            r = requests.post(gbypass_endpoint, json={"list_urls": list_urls}, timeout=timeout)
-            # Decode
-            list_urls_redirections = json.loads(r.text).get("list_urls_redirections", [])
-        except Exception as e:
-            logger.warning("Exception on request: {}. {}".format(gbypass_endpoint, str(e)))
-            list_urls_redirections = []
-            
-        return list_urls_redirections
--- a/OBSOLETE_app_fetcher/src/logger.py
+++ b/OBSOLETE_app_fetcher/src/logger.py
@@ -1,22 +0,0 @@
-import logging
-
-import os
-os.makedirs("logs", exist_ok=True)
-
-logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')
-logger = logging.getLogger("news_fetcher")
-logger.setLevel(logging.INFO)
-
-# To file log: INFO / WARNING / ERROR
-fh = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher.log", mode="a", maxBytes=10000000, backupCount=4)
-fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
-logger.addHandler(fh)
-
-# To file log: WARNING / ERROR
-fh_ = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_error.log", mode="a", maxBytes=10000000, backupCount=1)
-fh_.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
-fh_.setLevel(logging.WARNING)
-logger.addHandler(fh_)
-
-def get_logger():
-    return logger
--- a/OBSOLETE_app_fetcher/src/missing_kids_fetch.py
+++ b/OBSOLETE_app_fetcher/src/missing_kids_fetch.py
@@ -1,36 +0,0 @@
-from .db_utils import DB_Handler
-import requests
-import json
-from .logger import get_logger
-logger = get_logger()
-
-class MissingKidsFetch():
-    def __init__(self, db_handler: DB_Handler, num_pages) -> None:
-        logger.debug("Initializing News MissingKids")
-        self.db_handler = db_handler
-        self.num_pages = num_pages
-        self.missingkids_fetch_endpoint = "http://selenium_app:80/get_missing_kids/?pages={}"
-    
-    def run(self):
-        try:
-            logger.debug("Starting NewsMissingKids.run()")
-            try:
-                # Timeout
-                if (self.num_pages > 15):
-                    timeout = 60*90 # 1.5h
-                else:
-                    timeout = 60*5  # 5 min
-                # Request
-                r = requests.get(self.missingkids_fetch_endpoint.format(self.num_pages), timeout=timeout)
-                # Decode
-                urls_fetched = json.loads(r.text).get("list_urls", [])
-            except Exception as e:
-                logger.warning("Timeout on request: {}. {}".format(missingkids_fetch_endpoint, str(e)))
-                urls_fetched = []
-
-            # URL fetching source
-            source = "missingkids fetcher"
-            # Write to DB
-            self.db_handler.write_batch(urls_fetched, source)
-        except Exception as e:
-            logger.warning("Exception in NewsMissingKids.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/missing_kids_status.py
+++ b/OBSOLETE_app_fetcher/src/missing_kids_status.py
@@ -1,98 +0,0 @@
-from .db_utils import URL_DB_Writer
-from .url_utils import get_missing_kid_status
-from .logger import get_logger
-logger = get_logger()
-
-
-def get_missing_kid_status(url, return_canonical_url=False):
-    import time
-    import requests
-
-    # Sleep
-    time.sleep(0.75)
-    try:
-        # Request
-        r = requests.get(url, timeout=300)
-        # Decode
-        status_code = r.status_code
-        # Canonical URL removing parameters
-        url_canonical = r.url
-    except Exception as e:
-        logger.warning("Exception on get URL status request: {}. {}".format(url, str(e)))
-        status_code = None
-        url_canonical = url
-    
-    if (status_code == 200):
-        status = "valid"
-    elif (status_code == 404):
-        status = "invalid"
-    else:
-        status = "unknown"
-
-    logger.debug("Missing Kid URL {} status: {}".format(url, status))
-    if (return_canonical_url):
-        return status, url_canonical
-    else:
-        return status
-
-class MissingKidsStatus():
-    def __init__(self, db_connect_info, redis_connect_info, num_urls) -> None:
-        self.num_urls = num_urls
-        self.db_connect_info = db_connect_info
-        self.redis_connect_info = redis_connect_info
-        self.db_writer = URL_DB_Writer(db_connect_info, redis_connect_info)
-
-    def update_missing_kids_status(self):
-        try:
-            logger.info("Starting updating status to Missing Kids URLs, limit #URLs: {}".format(self.num_urls))
-            # List of URLs
-            list_ids_and_urls = self.db_writer._get_missing_kids_urls(self.num_urls)
-            # Dict: status -> IDs to update to new status
-            dict_status_ids, dict_status_urls = {}, {}
-            # Check URLs with invalid status?
-            skip_invalid_check = False
-
-            flush_every, flush_current = 20, 0
-            # Iterate URLs
-            for (id, url, current_status) in list_ids_and_urls:
-                # Skip duplicate URLs
-                if (current_status == "duplicate"):
-                    continue
-                # Skip invalid URLs?
-                if (skip_invalid_check):
-                    if (current_status == "invalid"):
-                        continue
-
-                # Get status
-                new_status = get_missing_kid_status(url)
-                # Different? Update
-                if (current_status != new_status):
-                    # Extend array
-                    dict_status_ids[new_status] = dict_status_ids.get(new_status, []) + [id]
-                    # Debugging dict
-                    dict_status_urls[new_status] = dict_status_urls.get(new_status, []) + [url]
-                    # +1 processed
-                    flush_current += 1
-
-                # Flush batch?
-                if (flush_every == flush_current):
-                    logger.info("Updating status to Missing Kids URLs: {}".format(dict_status_urls))
-                    # Update DB
-                    self.db_writer._update_urls_status(dict_status_ids)
-                    # Reset
-                    flush_current = 0
-                    dict_status_ids, dict_status_urls = {}, {}
-
-            # Flush remaining batch
-            if (flush_current > 0):
-                logger.info("Updating status to Missing Kids URLs: {}".format(dict_status_urls))
-                # Update DB
-                self.db_writer._update_urls_status(dict_status_ids)
-                # Reset
-                flush_current = 0
-                dict_status_ids, dict_status_urls = {}, {}
-            
-            logger.info("Finished updating status to Missing Kids URLs")
-        except Exception as e:
-            logger.warning("Exception in MissingKidsStatus.run(): {}".format(str(e)))
-        
--- a/OBSOLETE_app_fetcher/src/url_status.py
+++ b/OBSOLETE_app_fetcher/src/url_status.py
@@ -1,62 +0,0 @@
-from .db_utils import URL_DB_Writer
-from .url_utils import process_article
-from .logger import get_logger
-logger = get_logger()
-
-class UpdateErrorURLs():
-    def __init__(self, db_connect_info, redis_connect_info, num_urls) -> None:
-        self.num_urls = num_urls
-        self.db_connect_info = db_connect_info
-        self.redis_connect_info = redis_connect_info
-        self.db_writer = URL_DB_Writer(db_connect_info, redis_connect_info)
-
-    def update_error_urls_status(self):
-        try:
-            logger.info("Starting updating status to URLs with error, limit #URLs: {}".format(self.num_urls))
-            # List of URLs with status 'error'
-            list_ids_and_urls = self.db_writer._get_error_urls(self.num_urls)
-            # Current status
-            current_status = "error"
-            # Dict: status -> IDs to update to new status
-            dict_status_ids, dict_status_urls = {}, {}
-
-            # Get list of (pattern, priority, status) tuples to override status if required
-            list_pattern_status_tuple = self.db_writer._get_pattern_status_list()
-            # Sort pattern tuples by priority
-            list_pattern_status_tuple.sort(key=lambda tup: tup[1], reverse=True)
-
-            flush_every, flush_current = 20, 0
-            # Iterate URLs
-            for (id, url) in list_ids_and_urls:
-                # Get status
-                url_canonical, article_elements, new_status = process_article(url, list_pattern_status_tuple)
-                # Different? Update
-                if (current_status != new_status):
-                    # Extend array
-                    dict_status_ids[new_status] = dict_status_ids.get(new_status, []) + [id]
-                    # Debugging dict
-                    dict_status_urls[new_status] = dict_status_urls.get(new_status, []) + [url]
-                    # +1 processed
-                    flush_current += 1
-
-                # Flush batch?
-                if (flush_every == flush_current):
-                    logger.info("Updating status to URLs with error: {}".format(dict_status_urls))
-                    # Update DB
-                    self.db_writer._update_urls_status(dict_status_ids)
-                    # Reset
-                    flush_current = 0
-                    dict_status_ids, dict_status_urls = {}, {}
-
-            # Flush remaining batch
-            if (flush_current > 0):
-                logger.info("Updating status to URLs with error: {}".format(dict_status_urls))
-                # Update DB
-                self.db_writer._update_urls_status(dict_status_ids)
-                # Reset
-                flush_current = 0
-                dict_status_ids, dict_status_urls = {}, {}
-            
-            logger.info("Finished updating status to URLs with error")
-        except Exception as e:
-            logger.warning("Exception in UpdateErrorURLs.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/url_utils.py
+++ b/OBSOLETE_app_fetcher/src/url_utils.py
@@ -1,262 +0,0 @@
-from gnews import GNews
-import dateutil.parser
-from datetime import datetime, timedelta
-from .utils import remove_http_s
-import time
-import random
-import traceback
-import requests
-import json
-import re
-from bs4 import BeautifulSoup
-
-from .logger import get_logger
-logger = get_logger()
-
-def get_published_date(article):
-    try:
-        """
-        # Already fetched publish date information?
-        if (publish_date_ is not None):
-            return publish_date_
-        """
-        
-        # List of potential publish dates
-        potential_dates = []
-        # Publish date is the best match
-        potential_dates.append(article.publish_date)
-        # Publish date metadata is the following best match
-        potential_dates.append(article.meta_data.get('article', {}).get("published_time", None))
-        # Iterate remaining keys
-        for key in article.meta_data.keys():
-            if ("date" in key):
-                potential_dates.append(article.meta_data[key])
-
-        def invalid_date(p_date):
-            # Today + 2 days, article from the future?
-            today_plus_two = datetime.utcnow() + timedelta(days=2)
-            # Article from the future?
-            return p_date.timestamp() > today_plus_two.timestamp()
-        
-        for date_ in potential_dates:
-            # String date? parse
-            if (type(date_) == str):
-                try:
-                    date_ = dateutil.parser.parse(date_)
-                except Exception as e:
-                    logger.info("Invalid date found while parsing potential date: {} for URL: {}".format(date_, article.url))
-                    date_ = None
-            # Valid?
-            if (date_ is not None) and (not invalid_date(date_)):
-                return date_
-            
-        logger.debug("Article with no published date: {}".format(article.url))
-        return None
-    except Exception as e:
-        logger.info("Error while retrieving published date for URL: {}".format(article.url))
-        return None
-
-def get_url_host(article_source_url, url):
-    # https://www.blabla.com/blabla -> www.blabla.com
-    if (article_source_url != ""):
-        # Article source URL already extracted, save path if any
-        return remove_http_s(article_source_url) # .split("/")[0]
-    else:
-        return remove_http_s(url).split("/")[0]
-
-def get_status_pattern_matching(url, article_status, list_pattern_status_tuple):
-    # Regex pattern to update status on "valid", "invalid", and "unknown" status only
-    # Status "raw", "duplicated" and "error" should remain the way they are
-    # Assumption: List of patterns sorted by importance
-    if (article_status in ["valid", "invalid", "unknown"]):
-        # Regular expression pattern matching: https://regexr.com/
-        for regex_pattern, regex_priority, status_if_match in list_pattern_status_tuple:
-            # Matching?
-            matching = bool(re.match(regex_pattern, url))
-            # Update article status
-            if (matching):
-                if (status_if_match != article_status):
-                    logger.debug("Regex pattern found, updating status from '{}' to '{}' for URL: {}".format(article_status, status_if_match, url))
-                return status_if_match
-    # Pattern matching not required or not found, original article status
-    return article_status
-
-
-
-def bypass_google_link(article_url):
-
-    def bypass_google_consent(article_url):
-        # Sample URL: https://consent.google.com/m?continue=https://news.google.com/rss/articles/CBMiMGh0dHBzOi8vd3d3Lm1pc3NpbmdraWRzLm9yZy9wb3N0ZXIvbmNtYy84NjAxMTkvMdIBAA?oc%3D5&gl=NL&m=0&pc=n&cm=2&hl=en-US&src=1
-        article_url_no_consent = article_url.replace("https://consent.google.com/m?continue=", "")
-
-        # https://stackoverflow.com/questions/76063646/how-can-i-have-redirection-link-from-google-news-link-using-requests
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
-        }
-        cookies = {'CONSENT': 'YES+cb.20220419-08-p0.cs+FX+111'}
-
-        try:
-            # Request
-            r = requests.get(article_url_no_consent, headers=headers, cookies=cookies, timeout=300)
-            # Decode
-            soup = BeautifulSoup(r.text, 'html.parser')
-            url_of_interest = soup.a['href']
-        except Exception as e:
-            logger.warning("Exception on request trying to G_bypass with headers: {}. {}".format(article_url_no_consent, str(e)))
-            url_of_interest = None
-        
-        # Not able to bypass?
-        if (url_of_interest == "") or ("support.google.com" in url_of_interest) or ("news.google.com" in url_of_interest):
-            url_of_interest = None
-        return url_of_interest
-
-    def bypass_google_using_service(article_url):
-        try:
-            # e.g.: url = "https://news.google.com/articles/CBMiX2h0dHBzOi8vd3d3LmZveGJ1c2luZXNzLmNvbS9wb2xpdGljcy9kaXNuZXktc3Vlcy1mbG9yaWRhLWdvdi1yb24tZGVzYW50aXMtbG9zcy1zcGVjaWFsLWRpc3RyaWN00gEA?hl=en-US&gl=US&ceid=US%3Aen"
-            gbypass_endpoint = "http://selenium_app:80/get_redirection"
-            # Timeout: 5 minutes
-            r = requests.post(gbypass_endpoint, json={"url": article_url}, timeout=300)
-            # Decode
-            redirect_url = json.loads(r.text).get("redirect_url", "")
-        except Exception as e:
-            logger.warning("Exception on request: {}. {}".format(gbypass_endpoint, str(e)))
-            redirect_url = ""
-            
-        return redirect_url
-    
-    logger.debug("Starting gbypass_endpoint()")
-    
-    article_url_bypassed = None
-    # Bypass using request
-    if ("consent.google.com" in article_url):
-        article_url_bypassed = bypass_google_consent(article_url)
-    # Not bypassed yet? Bypass using service
-    if (article_url_bypassed is None):
-        article_url_bypassed = bypass_google_using_service(article_url)
-
-    # if (article_url_bypassed is None) or (article_url_bypassed == "") or ("news.google.com" in article_url_bypassed):
-    if (article_url_bypassed == "") or (article_url_bypassed is None):
-        # Empty URL returned by Gbypass
-        logger.warning("Error while bypassing Gnews for URL: {}".format(article_url))
-        return None
-    else:
-        logger.debug("Correctly bypassed GNews to URL_redirect, from URL: {} {}".format(article_url_bypassed, article_url))
-        return article_url_bypassed
-
-def process_article(article_url, list_pattern_status_tuple, language="en"):
-    # TODO:
-    """
-    https://github.com/fhamborg/news-please
-    https://github.com/fhamborg/Giveme5W1H
-    https://github.com/santhoshse7en/news-fetch
-    """
-    try:
-        logger.debug("Starting process_article()")
-
-        if ("news.google.com" in article_url) or ("consent.google.com" in article_url):
-            # Bypass to get redirection
-            article_url = bypass_google_link(article_url)
-            # Error?
-            if (article_url is None):
-                return None, {}, "error"
-        elif ("missingkids.org/poster" in article_url):
-            # Get status
-            article_status, url_canonical = get_missing_kid_status(article_url, return_canonical_url=True)
-            article_elements = {
-                "url_full": article_url,
-                "url_canonical": url_canonical
-            }
-            return url_canonical, article_elements, article_status
-        else:
-            # Avoid Too many requests (feeds, ...)
-            time.sleep(0.75)
-
-        logger.debug("Processing: {}".format(article_url))
-
-        # Default status unless something happens
-        article_status = "valid"
-
-        # Parse article
-        # TODO: :param proxy: The proxy parameter is a dictionary with a single key-value pair. self._proxy = {'http': proxy, 'https': proxy} if proxy else None
-        # TODO: Language per config
-        article = GNews(language).get_full_article(url=article_url)
-
-        # Article parsed?
-        if (article is None) or (not article.is_parsed):
-            logger.debug("Article not parsed: {}".format(article_url))
-            return article_url, {}, "error"
-
-        # Canonical link as main URL
-        url_canonical = article.canonical_link
-        # Empty canonical URL?
-        if (article.canonical_link is None) or (article.canonical_link == ""):
-            # URL with parameters? e.g. some zerohedge news fetched from newspaper3k end with #comment-stream -> Remove extra parameter in link
-            if ("?" in article.url) or (article.url.endswith("#comment-stream")) or (article.url.endswith("#disqus_thread")):
-                logger.debug("Article URL contains parameters, trying to clean URL: {}".format(article.url))
-                try:
-                    # Remove text after parameter call
-                    url = article.url.split("?")[0]
-                    # Remove comment-stream
-                    url = url.replace("#comment-stream", "").replace("#disqus_thread", "")
-                    # Article
-                    article_attempt = GNews(language).get_full_article(url=url)
-                    # Retrieving same title? Update article based on clean URL
-                    if (article_attempt is not None) and (article_attempt.title == article.title):
-                        article = article_attempt
-                except Exception as e:
-                    logger.info("Article parsing of URL without parameters failed: {}".format(article.url))
-            else:  # Default behaviour
-                logger.debug("Article canonical link is empty, assuming URL=URL_CANONICAL: {}".format(article.url))
-
-            # By default, URL same as canonical
-            url_canonical = article.url
-
-        elif (article.url != article.canonical_link):
-            # If different, stick to canonical URL
-            logger.debug("Article URL and canonical link are different: {} {}".format(article.url, article.canonical_link))
-        else:
-            # If same, continue...
-            pass
-        
-        # Update config to determine if content is valid
-        article.config.MIN_WORD_COUNT = 150
-        article.config.MIN_SENT_COUNT = 6
-        
-        # Valid URL?
-        if (not article.is_valid_url()):
-            logger.debug("Not a valid news article: {}".format(url_canonical))
-            article_status = "invalid"
-        # Is the article's body text is long enough to meet standard article requirements?
-        if (not article.is_valid_body()):
-            logger.debug("Article body not valid: {}".format(url_canonical))
-            article_status = "unknown"
-
-        if (article.images != article.imgs):
-            logger.debug("Article images and imgs are different: {} {}".format(article.images, article.imgs))
-
-        # article.keywords, article.meta_keywords, article.summary
-        # article.movies 
-        # article.top_image
-
-        # Check if article status needs to be updated
-        article_status = get_status_pattern_matching(url_canonical, article_status, list_pattern_status_tuple)
-
-        article_elements = {
-            'url_full': article.url,                          # https://www.breitbart.com/tech/2022/10/03/report-election-integrity-project-worked-with-feds-to-censor-news-sites-in-2020/
-            'url_host': get_url_host(article.source_url, url_canonical),    # www.breitbart.com
-            'title': article.title,                           # Report: ‘Election Integrity’ Partnership Worked with Feds to Censor News Sites in 2020
-            'description': article.meta_description,          # Coalition committed to respond in ‘early 2022’ but failed to do so, while Labor has not issued a full response since taking office
-            'text': article.text,                             # ${Article content}
-            'published_date': get_published_date(article),    # python.datetime format, obtained from "YYYY-MM-DD" or '2022-10-03T20:54:17+00:00'
-            'authors': article.authors,                       # ['Christopher Knaus']
-            'language': article.meta_lang,                    # en
-            'tags': list(article.tags),                       # ['Wide Open Border', '’My Son Hunter’ Movie', ...]
-            'images': list(article.images),                   # [URL_IMAGE_1, URL_IMAGE_2, ...]
-            'url_canonical': url_canonical,                   # Canonical URL (redirection)
-            # 'html': article.html,                           # HTML article
-        }
-        logger.debug("Processing OK: {}".format(url_canonical))
-        return url_canonical, article_elements, article_status
-    except Exception as e:
-        logger.warning("Exception processing url: {}\n{}".format(article_url, traceback.format_exc()))
-        return None, {}, "error"
--- a/OBSOLETE_app_fetcher/src/utils.py
+++ b/OBSOLETE_app_fetcher/src/utils.py
@@ -1,33 +0,0 @@
-
-def remove_http_s(url):
-    url = url.replace("https://", "") if url.startswith("https://") else url
-    url = url.replace("http://", "") if url.startswith("http://") else url
-    return url
-
-def is_valid_url(url):
-    if (url.startswith("https://")):
-        return True
-    else:
-        return False
-    
-def get_searxng_instances():
-    # SearxNG instances: https://searx.space/
-    searx_instances = set()
-    searx_instances.add("https://searx.work/")
-    searx_instances.add("https://search.ononoki.org/")
-    searx_instances.add("https://searxng.nicfab.eu/")
-    searx_instances.add("https://searx.be/")    
-    
-    # searx_instances.add("https://searx.fmac.xyz/")
-    # searx_instances.add("https://northboot.xyz/") # FIX
-    
-    # searx_instances.add("https://serx.ml/") # Offline
-    # searx_instances.add("https://searx.ru/")
-    # searx_instances.add("https://searx.sp-codes.de/")
-    # searx_instances.add("https://searxng.nicfab.eu/")
-    # searx_instances.add("https://s.frlt.one/")
-    # searx_instances.add("https://search.sapti.me/")
-    
-    # To list
-    list_searx_instances = list(searx_instances)
-    return list_searx_instances
--- a/app_selenium/README.md
+++ b/app_selenium/README.md
@@ -0,0 +1,3 @@
+
+* Missing kids posters fetch (num_pages=X)
+* ...
--- a/app_urls/api/models.py
+++ b/app_urls/api/models.py
@@ -17,7 +17,7 @@ class Search(models.Model):
        db_table = 'search'

    def __str__(self):
-        return "[{}]->{}".format(self.type, self.search)
+        return "[{}: {}]".format(self.type, self.search)

 class Source(models.Model):
    id = models.SmallAutoField(primary_key=True)
--- a/app_urls/api/src/db_utils.py
+++ b/app_urls/api/src/db_utils.py
@@ -130,7 +130,7 @@ class DB_Handler():
            # Get or create URL with canonical form
            obj_url_canonical, created = Urls.objects.get_or_create(url=dict_url_data.get("url_canonical"))
            # Get the source-search IDs associated to obj_url.id
-            list_url_source_search = UrlsSourceSearch.objects.fiter(id_url=obj_url)
+            list_url_source_search = UrlsSourceSearch.objects.filter(id_url=obj_url)
            for obj_url_source_search in list_url_source_search:
                # Associate same sources to url_canonical (it might already exist)
                UrlsSourceSearch.objects.get_or_create(id_url=obj_url_canonical, id_source=obj_url_source_search.id_source, id_search=obj_url_source_search.id_search)
--- a/app_urls/api/templates/item_list.html
+++ b/app_urls/api/templates/item_list.html
@@ -9,7 +9,7 @@

    <script>
        
-        function getQueryString(pageNumber, itemsNumber, sources, statuses){
+        function getQueryString(pageNumber, itemsNumber, sources, searches, statuses){
            // Query parameters. If input is null, get most recent value
            let queryParams = new URLSearchParams(window.location.search);
            // page
@@ -21,6 +21,9 @@
            // sources
            if (sources == null) sources = queryParams.get("sources") ?? "all";
            queryParams.set("sources", sources);
+            // searches
+            if (searches == null) searches = queryParams.get("searches") ?? "all";
+            queryParams.set("searches", searches);
            // status
            if (statuses == null) statuses = queryParams.get("status") ?? "all";
            queryParams.set("status", statuses);
@@ -33,11 +36,11 @@
            return queryParamsString;
        }

-        function loadPage(pageNumber, itemsNumber, sources, statuses) {
+        function loadPage(pageNumber, itemsNumber, sources, searches, statuses) {
            $("#item-list").fadeTo(100, 0.5); // Smooth fade effect
            $("#loading").show();
            
-            queryParamsString = getQueryString(pageNumber, itemsNumber, sources, statuses);
+            queryParamsString = getQueryString(pageNumber, itemsNumber, sources, searches, statuses);

            $.ajax({
                url: "?" + queryParamsString,
@@ -58,7 +61,7 @@
        $(document).on("click", ".pagination a", function (event) {
            event.preventDefault();
            let page = $(this).attr("data-page");
-            loadPage(pageNumber=page, itemsNumber=null, sources=null, statuses=null);
+            loadPage(pageNumber=page, itemsNumber=null, sources=null, searches=null, statuses=null);
        });

        $(document).ready(function () {
@@ -68,25 +71,63 @@
            ////////////////////////////////////////////////////////////////////////////
            const sourcesToggleAll = $("#toggle-all-sources");
            const sourcesCheckboxes = $(".source-checkbox");
+            const searchesToggleAll = $("#toggle-all-searches");
+            const searchesCheckboxes = $(".search-checkbox");
            const statusesToggleAll = $("#toggle-all-status");
            const statusCheckboxes = $(".status-checkbox");

            function updateFilters() {
                // Get selected sources                
-                let selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
+                if (sourcesToggleAll.prop("checked")) {
+                    selectedSources = "all";
+                }
+                else {
+                    if (sourcesCheckboxes.filter(":checked").length > 0 ){
+                        selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
                            return $(this).val();
                        }).get().join(",");
+                    }
+                    else {
+                        selectedSources = "none";
+                    }
+                    
+                }
+
+                // Get selected searches
+                if (searchesToggleAll.prop("checked")) {
+                    selectedSearches = "all";
+                }
+                else {
+                    if (searchesCheckboxes.filter(":checked").length > 0 ){
+                        selectedSearches = searchesCheckboxes.filter(":checked").map(function () {
+                            return $(this).val();
+                        }).get().join(",");
+                    }
+                    else {
+                        selectedSearches = "none";
+                    }
+                }

                // Get selected URL statuses
-                let selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
+                if (statusesToggleAll.prop("checked")) {
+                    selectedStatuses = "all";
+                }
+                else {
+                    if (statusCheckboxes.filter(":checked").length > 0 ){
+                        selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
                            return $(this).val();
                        }).get().join(",");
+                    }
+                    else {
+                        selectedStatuses = "none";
+                    }
+                }

                // Get selected items per page
                let selectedItems = $("input[name='items']:checked").val();

                // Update pagination and reload data
-                loadPage(1, selectedItems, selectedSources, selectedStatuses);
+                loadPage(1, selectedItems, selectedSources, selectedSearches, selectedStatuses);
            }

            ////////////////////////////////////////////////////////////////////////////
@@ -101,6 +142,15 @@
                sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
                updateFilters();
            });
+            // Searches
+            searchesToggleAll.on("change", function () {
+                searchesCheckboxes.prop("checked", searchesToggleAll.prop("checked"));
+                updateFilters();
+            });
+            searchesCheckboxes.on("change", function () {
+                searchesToggleAll.prop("checked", searchesCheckboxes.length === searchesCheckboxes.filter(":checked").length);
+                updateFilters();
+            });
            // Status
            statusesToggleAll.on("change", function () {
                statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
@@ -121,11 +171,15 @@
            // Sources
            sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
            sourcesToggleAll.prop("checked", true);
+            // Searches
+            searchesCheckboxes.each(function () { $(this).prop("checked", true); });
+            searchesToggleAll.prop("checked", true);
            // Statuses
            statusCheckboxes.each(function () { $(this).prop("checked", true); });
            statusesToggleAll.prop("checked", true);
            // Items
-            $("input[name='items'][value='" + 15 + "']").prop("checked", true);
+            // $("input[name='items'][value='" + 15 + "']").prop("checked", true);
+            // loadPage(pageNumber=page, itemsNumber=null, sources=null, searches=null, statuses=null);
        });

        ////////////////////////////////////////////////////////////////////////////
@@ -148,6 +202,23 @@
            let savedTheme = localStorage.getItem("theme") || 
                (window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
            setTheme(savedTheme);
+            // Local browser timestamp aware for ts_fetch print
+            document.querySelectorAll(".timestamp").forEach(function (el) {
+                const ts = el.getAttribute("data-ts");
+                if (ts) {
+                    const options = {
+                        day: "2-digit",
+                        month: "2-digit",
+                        year: "numeric",
+                        hour: "2-digit",
+                        minute: "2-digit",
+                        second: "2-digit",
+                        hour12: false  // Use 24-hour format
+                    }; // "en-GB" for DD-MM-YYYY
+                    const localDate = new Date(ts).toLocaleString("en-GB", options);  // Adjust to browser's timezone
+                    el.innerHTML = `${localDate}`;
+                }
+            });
        });
        ////////////////////////////////////////////////////////////////////////////
    </script>
@@ -174,6 +245,9 @@
            box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
            padding: 15px;
            transition: width 0.3s ease;
+            /* Enable scrolling */
+            overflow-y: auto;
+            max-height: 100vh;
        }

        #sidebar .nav-link {
@@ -313,10 +387,10 @@
        }
        
        th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
-        th:nth-child(2), td:nth-child(2) { width: 20%; } /* Fetch Date */
-        th:nth-child(3), td:nth-child(3) { width: 20%; } /* Sources */
-        th:nth-child(4), td:nth-child(4) { width: 5%; } /* Status */
-        th:nth-child(5), td:nth-child(5) { width: 5%; } /* Action */
+        th:nth-child(2), td:nth-child(2) { width: 27.5%; } /* Fetch Date */
+        th:nth-child(3), td:nth-child(3) { width: 10%; } /* Sources */
+        th:nth-child(4), td:nth-child(4) { width: 10%; } /* Searches */
+        th:nth-child(5), td:nth-child(5) { width: 2.5%; } /* Status */

        /* ============================= */
        /* Pagination Styling            */
@@ -408,32 +482,22 @@
                </button>
            </div>

-            <!-- Sources -->
+            <!-- URLs per page -->
            <div class="nav-item mt-3">
-                <strong>Select sources</strong>
-                <form id="source-filter-form">
-                    <!-- Toggle All Checkbox -->
-                    <div class="form-check">
-                        <input class="form-check-input" type="checkbox" id="toggle-all-sources">
-                        <label class="form-check-label fw-bold" for="toggle-all-sources">
-                            Toggle all
-                        </label>
-                    </div>
-
-                    <!-- Individual Source Checkboxes -->
-                    {% for source in sources %}
-                        <div class="form-check">
-                            <input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
-                            <label class="form-check-label" for="source-{{ source.id }}">
-                                {{ source.source }}
-                            </label>
+                <strong>URLs per page</strong>
+                <div class="card-body">
+                    <!-- Individual Status Checkboxes -->
+                    {% for url_per_page in list_urls_per_page %}
+                        <div class="items-form-check">
+                            <input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
+                            <label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
                        </div>
                    {% empty %}
                    <tr>
-                        <td colspan="2" class="text-center">No sources available.</td>
+                        <td colspan="2" class="text-center">No options available.</td>
                    </tr>
                    {% endfor %}
-                </form>
+                </div>
            </div>

            <!-- Status -->
@@ -457,6 +521,33 @@
                            </label>
                        </div>
                    {% empty %}
+                    <tr>
+                        <td colspan="2" class="text-center">No statuses available.</td>
+                    </tr>
+                    {% endfor %}
+                </form>
+            </div>
+            
+            <!-- Sources -->
+            <div class="nav-item mt-3">
+                <strong>Select sources</strong>
+                <form id="source-filter-form">
+                    <!-- Toggle All Checkbox -->
+                    <div class="form-check">
+                        <input class="form-check-input" type="checkbox" id="toggle-all-sources">
+                        <label class="form-check-label fw-bold" for="toggle-all-sources">
+                            Toggle all
+                        </label>
+                    </div>
+                    <!-- Individual Source Checkboxes -->
+                    {% for source in sources %}
+                        <div class="form-check">
+                            <input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
+                            <label class="form-check-label" for="source-{{ source.id }}">
+                                {{ source.source }}
+                            </label>
+                        </div>
+                    {% empty %}
                    <tr>
                        <td colspan="2" class="text-center">No sources available.</td>
                    </tr>
@@ -464,24 +555,32 @@
                </form>
            </div>

-            <!-- URLs per page -->
+            <!-- Searches -->
            <div class="nav-item mt-3">
-                <strong>URLs per page</strong>
-                <div class="card-body">
-                    <!-- Individual Status Checkboxes -->
-                    {% for url_per_page in list_urls_per_page %}
-                        <div class="items-form-check">
-                            <input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
-                            <label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
+                <strong>Select searches</strong>
+                <form id="search-filter-form">
+                    <!-- Toggle All Checkbox -->
+                    <div class="form-check">
+                        <input class="form-check-input" type="checkbox" id="toggle-all-searches">
+                        <label class="form-check-label fw-bold" for="toggle-all-searches">
+                            Toggle all
+                        </label>
+                    </div>
+                    <!-- Individual Search Checkboxes -->
+                    {% for search in searches %}
+                        <div class="form-check">
+                            <input class="form-check-input search-checkbox" type="checkbox" value="{{ search.id }}" id="search-{{ search.id }}">
+                            <label class="form-check-label" for="search-{{ search.id }}">
+                                [{{ search.type }}] {{ search.search }}
+                            </label>
                        </div>
                    {% empty %}
                    <tr>
-                        <td colspan="2" class="text-center">No options available.</td>
+                        <td colspan="2" class="text-center">No search available.</td>
                    </tr>
                    {% endfor %}
+                </form>
            </div>
-            </div>
-


        </ul>
--- a/app_urls/api/templates/item_list_partial.html
+++ b/app_urls/api/templates/item_list_partial.html
@@ -7,15 +7,18 @@
                <th scope="col"><strong>URL</strong></th>
                <th scope="col"><strong>Fetch date</strong></th>
                <th scope="col"><strong>Sources</strong></th>
+                <th scope="col"><strong>Search</strong></th>
                <th scope="col"><strong>Status</strong></th>
-                <th scope="col"><strong>Action</strong></th>
            </tr>
        </thead>
        <tbody>
            {% for item in page_obj %}
                <tr>
-                    <td><a href="{{ item.url }}/" target="_blank">{{ item.url }}</a></td>
-                    <td>{{ item.ts_fetch }}</td>
+                    <td>
+                        <a href="./{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">➤ </a>
+                        <a href="{{ item.url }}/" target="_blank">{{ item.url }}</a>
+                    </td>
+                    <td class="timestamp" data-ts="{{ item.ts_fetch|date:'c' }}">{{ item.ts_fetch }}</td>
                    <td>
                        {% with sources_map|dict_get:item.id as sources %}
                            {% if sources %}
@@ -27,6 +30,17 @@
                            {% endif %}
                        {% endwith %}
                    </td>
+                    <td>
+                        {% with searches_map|dict_get:item.id as searches %}
+                            {% if searches %}
+                                {% for search in searches %}
+                                    <span class="badge bg-secondary">{{ search }}</span>
+                                {% endfor %}
+                            {% else %}
+                                <span class="text-muted">No searches</span>
+                            {% endif %}
+                        {% endwith %}
+                    </td>
                    <td>
                        {% if item.status == 'raw' %}
                            <span class="badge bg-secondary">{{ item.status|capfirst }}</span>
@@ -44,10 +58,6 @@
                            <span class="badge bg-light">Unknown</span>
                        {% endif %}
                    </td>        
-                    <td>
-                        <a href="url/{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">Details</a>  
-                    </td>
-                    
                </tr>
            {% empty %}
                <tr>
--- a/app_urls/api/templates/url_detail.html
+++ b/app_urls/api/templates/url_detail.html
@@ -54,7 +54,7 @@
        }

        // Fetch URL
-        let fetchUrl = `/news/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
+        let fetchUrl = `/api/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;

        let resultContainer = $("#chat-output");
        resultContainer.html(""); // Clear previous content before fetching
@@ -100,12 +100,6 @@
                        messageContainer.html(marked.parse(accumulatedText));
                        //////////////////////////////////////

-                        //////////////////////////////////////
-                        // ORIGINAL:
-                        //let text = decoder.decode(value).replace(/\n/g, "<br>");
-                        //resultContainer.append(text); // Append streamed text
-                        //////////////////////////////////////
-
                        resultContainer.scrollTop(resultContainer[0].scrollHeight); // Auto-scroll to bottom
                        return read();
                    });
@@ -135,12 +129,16 @@
            </tr>
            <tr>
                <th>Fetch Date</th>
-                <td>{{ url_item.ts_fetch }}</td>
+                <td>{{ url_item.ts_fetch }} UTC</td>
            </tr>
            <tr>
-                <th>Sources</th>
+                <th>Source</th>
                <td>{{ sources|join:", " }}</td>
            </tr>
+            <tr>
+                <th>Search</th>
+                <td>{{ searches|join:", " }}</td>
+            </tr>
            <tr>
                <th>Status</th>
                <td>{{ url_item.status }}</td>
@@ -175,7 +173,6 @@
        <form onsubmit="fetchDetailsWithSelection(event, {{ url_item.id }}, '{{ url_item.url }}')">
            <label for="options-{{ url_item.id }}">Model:</label>
            <select id="options-{{ url_item.id }}" class="form-control mb-2">
-                <!-- <option value="">-- Select an option --</option> -->
                {% for model in models %}
                    <option value="{{ model }}">{{ model }}</option>
                {% endfor %}
@@ -186,21 +183,23 @@
        <label for="custom-input-{{ url_item.id }}">Prompt:</label>
        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="3">{{ prompt }} {{ url_item.url }}</textarea>
        
+        <div class="d-flex align-items-center">
            <!-- Fetch details button -->
            <button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
                Fetch Details
            </button>
        
+            <!-- Loading Spinner (Hidden by Default) -->
+            <div id="loading-spinner" class="spinner-border text-primary ms-2" role="status" style="display: none;">
+                <span class="visually-hidden">Loading...</span>
+            </div>
+        </div>
+
        <!-- Chatbot-style response box -->
        <div class="chat-box mt-3 p-3 border rounded">
            <div id="chat-output"></div>
        </div>
        
-        <!-- Loading Spinner (Hidden by Default) -->
-        <div id="loading-spinner" class="spinner-border text-primary mt-3" role="status" style="display: none;">
-            <span class="visually-hidden">Loading...</span>
-        </div>
-        
    </div>

    <!-- Bootstrap JS -->
--- a/app_urls/api/urls.py
+++ b/app_urls/api/urls.py
@@ -3,7 +3,7 @@ from . import views

 urlpatterns = [
    path('', views.link_list, name='link_list'),
-    path('url/', views.news, name='url_detail'),
+    path('url/', views.urls, name='url_detail'),
    path('url/<int:id>/', views.url_detail_view, name='url_detail'),
    path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
    path('task/<str:task>', views.trigger_task, name='trigger_task'),
--- a/app_urls/api/views.py
+++ b/app_urls/api/views.py
@@ -18,62 +18,78 @@ def link_list(request):
    prefix = "http://localhost:8000/api/task"
    links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]

-    db_links = ["http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500"]
-    return JsonResponse({"links": ["http://localhost:8000/api/url"] + db_links + [os.path.join(prefix, l) for l in links]})
+    list_links = [
+        # DB
+        "http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
+        # Admin panel
+        "http://localhost:8000/admin",
+        # URLs
+        "http://localhost:8000/api/url",
+        # API tasks
+    ] + [os.path.join(prefix, l) for l in links]
+    # Json
+    return JsonResponse({"links": list_links })


-from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
+from django.http import StreamingHttpResponse, JsonResponse
 from django.shortcuts import render, get_object_or_404
 from django.core.paginator import Paginator
-import requests
-from django.http import StreamingHttpResponse
-import json
-import time
 import ollama

-from .models import Urls, Source, Search, UrlsSourceSearch, UrlContent
+from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch

 # Create your views here.
-def news(request):
+def urls(request):
    # URLs
    urls = Urls.objects.all()
    # Sources
    sources = Source.objects.all()
-    seaerches = Search.objects.all()
+    searches = Search.objects.all()

    # Parameters
    page_number = request.GET.get("page", 1)
    num_items = request.GET.get("items", 15)
    source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
+    search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
    status_filters = request.GET.get("status", None)

    # Filters
    if (status_filters) and (status_filters != "all"):
+        if (status_filters == "none"):
+            urls = []
+        else:
            urls = urls.filter(status__in=status_filters.split(","))
    if (source_ids) and (source_ids != "all"):
-        # TODO: Distinct needed?
-        # urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
-        pass
+        if (source_ids == "none"):
+            urls = []
+        else:
+            urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
+    if (search_ids) and (search_ids != "all"):
+        if (search_ids == "none"):
+            urls = []
+        else:
+            urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()

    # Pagination
    paginator = Paginator(urls, num_items)
    page_obj = paginator.get_page(page_number)

-    # Map URL IDs to their sources, only for subset of URLs (page of interest)
-    sources_map= {}
-    """
+    # Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
    sources_map = {
-        url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
-        for url in page_obj.object_list
+        url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
+    }
+    searches_map = {
+        url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
    }
-    """

    context = {
        "page_obj": page_obj,
        "sources": sources,
+        "searches": searches,
        "sources_map": sources_map,
+        "searches_map": searches_map,
        "list_status": Urls.STATUS_ENUM.values,
-        "list_urls_per_page": [15, 50, 100],
+        "list_urls_per_page": [15, 100, 500],
    }

    # If request is AJAX, return JSON response
@@ -83,32 +99,54 @@ def news(request):
    return render(request, "item_list.html", context)


+class OllamaClient():
+    def __init__(self):
+        self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))
+    
+    def _get_default_model(self):
+        return "gemma3:1b"
+
+    def get_models(self):
+        models = sorted([m.model for m in self.client.list().models])
+        if (self._get_default_model() in models):
+            return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
+        else:
+            return models
+    
+    def get_prompt(self):
+        return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
+        #return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
+        #return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
+        #return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
+        # return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
+        #return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
+
+
 def url_detail_view(request, id):
    url_item = get_object_or_404(Urls, id=id)
-    url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
+    url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
+    url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
+    # url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
+    
    try:
        url_content = UrlContent.objects.get(pk=id)
    except UrlContent.DoesNotExist:
        url_content = {}
    
    # TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
-    # LLM models available
-    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
-    models = sorted([m.model for m in client.list().models])
-    # default_model = "llama3.2:3b"
+    ollama = OllamaClient()

    context = {
        'url_item': url_item,
        'sources': url_sources,
-        'models': models,
-        #'default_model': default_model,
-        'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
-        #"prompt": "Image you are a journalist, TLDR in a paragraph:",
-        #"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
+        'searches': url_searches,
+        'models': ollama.get_models(),
+        'prompt': ollama.get_prompt(),
        'url_content': url_content,
    }
    return render(request, 'url_detail.html', context)

+# TODO: move to ollamajs...
 def fetch_details(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_param = request.GET.get("url", "")  # Get URL
@@ -116,14 +154,14 @@ def fetch_details(request, id):
    text = request.GET.get("text", "")  # Get LLM prompt

    # LLM
-    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
+    ollama = OllamaClient()

    def stream_response():
        msg_content = {
            "role": "user", 
            "content": text,
        }
-        response = client.chat(model=model, messages=[msg_content], stream=True)
+        response = ollama.client.chat(model=model, messages=[msg_content], stream=True)
        for chunk in response:
            yield chunk["message"]["content"]  # Stream each chunk of text
    
--- a/app_urls/core/settings.py
+++ b/app_urls/core/settings.py
@@ -124,9 +124,6 @@ SCHEDULER_QUEUES = {
        'PORT': os.environ.get("REDIS_PORT", 6379),
        'DB': os.environ.get("REDIS_DB", 0),
        'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15),
-        #'USERNAME': 'some-user',
-        #'PASSWORD': 'some-password',
-        #'DEFAULT_TIMEOUT': 360,
  }
 }
 SCHEDULER_CONFIG = {
--- a/app_urls/core/urls.py
+++ b/app_urls/core/urls.py
@@ -20,6 +20,5 @@ from django.urls import path, include
 urlpatterns = [
    path('admin/', admin.site.urls),
    path('api/', include('api.urls')),
-    #path('scheduler/', include('django_rq.urls')),
    path('scheduler/', include('scheduler.urls')),
 ]
--- a/app_web/manage.py
+++ b/app_web/manage.py
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-"""Django's command-line utility for administrative tasks."""
-import os
-import sys
-
-
-def main():
-    """Run administrative tasks."""
-    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
-    try:
-        from django.core.management import execute_from_command_line
-    except ImportError as exc:
-        raise ImportError(
-            "Couldn't import Django. Are you sure it's installed and "
-            "available on your PYTHONPATH environment variable? Did you "
-            "forget to activate a virtual environment?"
-        ) from exc
-    execute_from_command_line(sys.argv)
-
-
-if __name__ == '__main__':
-    main()
--- a/app_web/mysite/init.py
+++ b/app_web/mysite/init.py
--- a/app_web/mysite/asgi.py
+++ b/app_web/mysite/asgi.py
@@ -1,16 +0,0 @@
-"""
-ASGI config for mysite project.
-
-It exposes the ASGI callable as a module-level variable named ``application``.
-
-For more information on this file, see
-https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
-"""
-
-import os
-
-from django.core.asgi import get_asgi_application
-
-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
-
-application = get_asgi_application()
--- a/app_web/mysite/settings.py
+++ b/app_web/mysite/settings.py
@@ -1,132 +0,0 @@
-"""
-Django settings for mysite project.
-
-Generated by 'django-admin startproject' using Django 5.1.6.
-
-For more information on this file, see
-https://docs.djangoproject.com/en/5.1/topics/settings/
-
-For the full list of settings and their values, see
-https://docs.djangoproject.com/en/5.1/ref/settings/
-"""
-
-import os
-from pathlib import Path
-
-# Build paths inside the project like this: BASE_DIR / 'subdir'.
-BASE_DIR = Path(__file__).resolve().parent.parent
-
-
-# Quick-start development settings - unsuitable for production
-# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
-
-# SECURITY WARNING: keep the secret key used in production secret!
-SECRET_KEY = 'django-insecure-0+jg0u+%s@sj759i7@jn*%-#jl)8&#=siclb5908pwe!7=*$qb'
-
-# SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
-
-ALLOWED_HOSTS = []
-
-
-# Application definition
-
-INSTALLED_APPS = [
-    'news.apps.NewsConfig',
-    'django.contrib.admin',
-    'django.contrib.auth',
-    'django.contrib.contenttypes',
-    'django.contrib.sessions',
-    'django.contrib.messages',
-    'django.contrib.staticfiles',
-]
-
-MIDDLEWARE = [
-    'django.middleware.security.SecurityMiddleware',
-    'django.contrib.sessions.middleware.SessionMiddleware',
-    'django.middleware.common.CommonMiddleware',
-    'django.middleware.csrf.CsrfViewMiddleware',
-    'django.contrib.auth.middleware.AuthenticationMiddleware',
-    'django.contrib.messages.middleware.MessageMiddleware',
-    'django.middleware.clickjacking.XFrameOptionsMiddleware',
-]
-
-ROOT_URLCONF = 'mysite.urls'
-
-TEMPLATES = [
-    {
-        'BACKEND': 'django.template.backends.django.DjangoTemplates',
-        'DIRS': [],
-        'APP_DIRS': True,
-        'OPTIONS': {
-            'context_processors': [
-                'django.template.context_processors.debug',
-                'django.template.context_processors.request',
-                'django.contrib.auth.context_processors.auth',
-                'django.contrib.messages.context_processors.messages',
-            ],
-        },
-    },
-]
-
-WSGI_APPLICATION = 'mysite.wsgi.application'
-
-
-# Database
-# https://docs.djangoproject.com/en/5.1/ref/settings/#databases
-
-DATABASES = {
-    'default': {
-        'ENGINE': 'django.db.backends.postgresql',
-        'NAME': os.environ.get("DJANGO_DB_NAME", "matitos"),
-        'USER': os.environ.get("DJANGO_DB_USER", "supermatitos"),
-        'PASSWORD': os.environ.get("DJANGO_DB_PASSWORD", "supermatitos"),
-        'HOST': os.environ.get("DJANGO_DB_HOST", "localhost"),
-        'PORT': os.environ.get("DJANGO_DB_PORT", "5432"),
-        #'OPTIONS': {
-        #    'options': '-c default_transaction_read_only=on'
-        #}
-    }
-}
-
-
-# Password validation
-# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
-
-AUTH_PASSWORD_VALIDATORS = [
-    {
-        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
-    },
-    {
-        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
-    },
-    {
-        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
-    },
-    {
-        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
-    },
-]
-
-
-# Internationalization
-# https://docs.djangoproject.com/en/5.1/topics/i18n/
-
-LANGUAGE_CODE = 'en-us'
-
-TIME_ZONE = 'UTC'
-
-USE_I18N = True
-
-USE_TZ = True
-
-
-# Static files (CSS, JavaScript, Images)
-# https://docs.djangoproject.com/en/5.1/howto/static-files/
-
-STATIC_URL = 'static/'
-
-# Default primary key field type
-# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
-
-DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
--- a/app_web/mysite/urls.py
+++ b/app_web/mysite/urls.py
@@ -1,26 +0,0 @@
-"""
-URL configuration for mysite project.
-
-The `urlpatterns` list routes URLs to views. For more information please see:
-    https://docs.djangoproject.com/en/5.1/topics/http/urls/
-Examples:
-Function views
-    1. Add an import:  from my_app import views
-    2. Add a URL to urlpatterns:  path('', views.home, name='home')
-Class-based views
-    1. Add an import:  from other_app.views import Home
-    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
-Including another URLconf
-    1. Import the include() function: from django.urls import include, path
-    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
-"""
-from django.contrib import admin
-from django.urls import include, path
-from django.views.generic.base import RedirectView
-
-urlpatterns = [
-    path("", RedirectView.as_view(url='news/', permanent=False)),
-    path("news/", include("news.urls")),
-    path('admin/', admin.site.urls),
-    # path("facerecognition", include("facerecognition.urls")),
-]
--- a/app_web/mysite/wsgi.py
+++ b/app_web/mysite/wsgi.py
@@ -1,16 +0,0 @@
-"""
-WSGI config for mysite project.
-
-It exposes the WSGI callable as a module-level variable named ``application``.
-
-For more information on this file, see
-https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
-"""
-
-import os
-
-from django.core.wsgi import get_wsgi_application
-
-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
-
-application = get_wsgi_application()
--- a/app_web/news/init.py
+++ b/app_web/news/init.py
--- a/app_web/news/admin.py
+++ b/app_web/news/admin.py
@@ -1,9 +0,0 @@
-from django.contrib import admin
-
-# Register your models here.
-
-from .models import Urls, UrlsSource, Source
-
-admin.site.register(Urls)
-admin.site.register(UrlsSource)
-admin.site.register(Source)
--- a/app_web/news/apps.py
+++ b/app_web/news/apps.py
@@ -1,6 +0,0 @@
-from django.apps import AppConfig
-
-
-class NewsConfig(AppConfig):
-    default_auto_field = 'django.db.models.BigAutoField'
-    name = 'news'
--- a/app_web/news/migrations/0001_initial.py
+++ b/app_web/news/migrations/0001_initial.py
@@ -1,38 +0,0 @@
-# Generated by Django 5.1.6 on 2025-02-20 15:36
-
-import django.db.models.deletion
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    initial = True
-
-    dependencies = [
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='SOURCE',
-            fields=[
-                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('source', models.TextField()),
-            ],
-        ),
-        migrations.CreateModel(
-            name='URL',
-            fields=[
-                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('url', models.TextField()),
-                ('pub_date', models.DateTimeField(verbose_name='date published')),
-            ],
-        ),
-        migrations.CreateModel(
-            name='URL_SOURCE',
-            fields=[
-                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('source', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.source')),
-                ('url', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.url')),
-            ],
-        ),
-    ]
--- a/app_web/news/migrations/0002_alter_source_table_alter_url_table_and_more.py
+++ b/app_web/news/migrations/0002_alter_source_table_alter_url_table_and_more.py
@@ -1,25 +0,0 @@
-# Generated by Django 5.1.6 on 2025-02-20 16:11
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('news', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.AlterModelTable(
-            name='source',
-            table='source',
-        ),
-        migrations.AlterModelTable(
-            name='url',
-            table='urls',
-        ),
-        migrations.AlterModelTable(
-            name='url_source',
-            table='urls_source',
-        ),
-    ]
--- a/app_web/news/migrations/0003_remove_url_pub_date_url_status_url_ts_fetch_and_more.py
+++ b/app_web/news/migrations/0003_remove_url_pub_date_url_status_url_ts_fetch_and_more.py
@@ -1,33 +0,0 @@
-# Generated by Django 5.1.6 on 2025-02-20 16:18
-
-import django.db.models.functions.datetime
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('news', '0002_alter_source_table_alter_url_table_and_more'),
-    ]
-
-    operations = [
-        migrations.RemoveField(
-            model_name='url',
-            name='pub_date',
-        ),
-        migrations.AddField(
-            model_name='url',
-            name='status',
-            field=models.CharField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw'),
-        ),
-        migrations.AddField(
-            model_name='url',
-            name='ts_fetch',
-            field=models.DateTimeField(db_default=django.db.models.functions.datetime.Now(), verbose_name='Date fetched'),
-        ),
-        migrations.AlterField(
-            model_name='url',
-            name='url',
-            field=models.TextField(verbose_name='URL'),
-        ),
-    ]
--- a/app_web/news/migrations/0004_alter_url_source_unique_together.py
+++ b/app_web/news/migrations/0004_alter_url_source_unique_together.py
@@ -1,17 +0,0 @@
-# Generated by Django 5.1.6 on 2025-02-20 16:32
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('news', '0003_remove_url_pub_date_url_status_url_ts_fetch_and_more'),
-    ]
-
-    operations = [
-        migrations.AlterUniqueTogether(
-            name='url_source',
-            unique_together={('url', 'source')},
-        ),
-    ]
--- a/app_web/news/migrations/0005_urls_remove_url_source_url_and_more.py
+++ b/app_web/news/migrations/0005_urls_remove_url_source_url_and_more.py
@@ -1,59 +0,0 @@
-# Generated by Django 5.1.6 on 2025-02-20 16:53
-
-import django.db.models.deletion
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('news', '0004_alter_url_source_unique_together'),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='Urls',
-            fields=[
-                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('url', models.TextField(unique=True)),
-                ('ts_fetch', models.DateTimeField()),
-                ('status', models.TextField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw')),
-            ],
-            options={
-                'db_table': 'urls',
-                'managed': False,
-            },
-        ),
-        migrations.RemoveField(
-            model_name='url_source',
-            name='url',
-        ),
-        migrations.AlterUniqueTogether(
-            name='url_source',
-            unique_together=None,
-        ),
-        migrations.RemoveField(
-            model_name='url_source',
-            name='source',
-        ),
-        migrations.AlterModelOptions(
-            name='source',
-            options={'managed': False},
-        ),
-        migrations.CreateModel(
-            name='UrlsSource',
-            fields=[
-                ('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='news.urls')),
-            ],
-            options={
-                'db_table': 'urls_source',
-                'managed': False,
-            },
-        ),
-        migrations.DeleteModel(
-            name='URL',
-        ),
-        migrations.DeleteModel(
-            name='URL_SOURCE',
-        ),
-    ]
--- a/app_web/news/migrations/0006_alter_urls_options.py
+++ b/app_web/news/migrations/0006_alter_urls_options.py
@@ -1,17 +0,0 @@
-# Generated by Django 5.1.6 on 2025-03-06 09:36
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('news', '0005_urls_remove_url_source_url_and_more'),
-    ]
-
-    operations = [
-        migrations.AlterModelOptions(
-            name='urls',
-            options={'managed': False, 'ordering': ['-ts_fetch']},
-        ),
-    ]
--- a/app_web/news/migrations/init.py
+++ b/app_web/news/migrations/init.py
--- a/app_web/news/models.py
+++ b/app_web/news/models.py
@@ -1,61 +0,0 @@
-from django.db import models
-from django.contrib.postgres.fields import ArrayField
-
-# Create your models here.
-class Urls(models.Model):
-    class STATUS_ENUM(models.TextChoices):
-        RAW = "raw"
-        ERROR = "error"
-        VALID = "valid"
-        UNKNOWN = "unknown"
-        INVALID = "invalid"
-        DUPLICATE = "duplicate"
-
-    url = models.TextField(unique=True)
-    ts_fetch = models.DateTimeField()
-    status = models.TextField(choices=STATUS_ENUM, default=STATUS_ENUM.RAW)  # This field type is a guess.
-
-    def __str__(self):
-        return self.url
-
-    class Meta:
-        managed = False
-        db_table = 'urls' # db_table = '{}_urls'.format(project_name)
-        ordering = ["-ts_fetch"]
-
-class Source(models.Model):
-    id = models.SmallAutoField(primary_key=True)
-    source = models.TextField(unique=True)
-
-    def __str__(self):
-        return self.source
-
-    class Meta:
-        managed = False
-        db_table = 'source'
-
-class UrlsSource(models.Model):
-    id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)  # The composite primary key (id_url, id_source) found, that is not supported. The first column is selected.
-    id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
-
-    def __str__(self):
-        return "Source: {}, URL: {}".format(self.id_source, self.id_url)
-
-    class Meta:
-        managed = False
-        db_table = 'urls_source'
-        unique_together = (('id_url', 'id_source'),)
-
-class UrlContent(models.Model):
-    id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)
-    date_published = models.DateTimeField(blank=True, null=True)
-    title = models.TextField(blank=True, null=True)
-    description = models.TextField(blank=True, null=True)
-    content = models.TextField(blank=True, null=True)
-    tags = ArrayField(models.TextField(blank=True, null=True))
-    authors = ArrayField(models.TextField(blank=True, null=True))
-    image_urls = ArrayField(models.TextField(blank=True, null=True))
-
-    class Meta:
-        managed = False
-        db_table = 'url_content'
--- a/app_web/news/templates/item_list.html
+++ b/app_web/news/templates/item_list.html
@@ -1,508 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>News</title>
-    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
-    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
-
-    <script>
-        
-        function getQueryString(pageNumber, itemsNumber, sources, statuses){
-            // Query parameters. If input is null, get most recent value
-            let queryParams = new URLSearchParams(window.location.search);
-            // page
-            if (pageNumber == null) pageNumber = queryParams.get("page") ?? 1;
-            queryParams.set("page", pageNumber);
-            // items
-            if (itemsNumber == null) itemsNumber = queryParams.get("items") ?? 15;
-            queryParams.set("items", itemsNumber);
-            // sources
-            if (sources == null) sources = queryParams.get("sources") ?? "all";
-            queryParams.set("sources", sources);
-            // status
-            if (statuses == null) statuses = queryParams.get("status") ?? "all";
-            queryParams.set("status", statuses);
-            
-            // Encoding fix: %2C -> ,
-            let queryParamsString = queryParams.toString();
-            while (queryParamsString.includes("%2C")) {
-                queryParamsString = queryParamsString.replace("%2C", ",");
-            }
-            return queryParamsString;
-        }
-
-        function loadPage(pageNumber, itemsNumber, sources, statuses) {
-            $("#item-list").fadeTo(100, 0.5); // Smooth fade effect
-            $("#loading").show();
-            
-            queryParamsString = getQueryString(pageNumber, itemsNumber, sources, statuses);
-
-            $.ajax({
-                url: "?" + queryParamsString,
-                type: "GET",
-                headers: { "X-Requested-With": "XMLHttpRequest" },
-                success: function (data) {
-                    $("#item-list").fadeTo(0, 1).html(data.items_html); // Restore opacity smoothly
-                    $("#loading").hide();
-                    // Update URL without reloading
-                    window.history.pushState({}, "", "?" + queryParamsString);
-                }
-            });
-        }
-
-        ////////////////////////////////////////////////////////////////////////////
-        // Pagination
-        ////////////////////////////////////////////////////////////////////////////
-        $(document).on("click", ".pagination a", function (event) {
-            event.preventDefault();
-            let page = $(this).attr("data-page");
-            loadPage(pageNumber=page, itemsNumber=null, sources=null, statuses=null);
-        });
-
-        $(document).ready(function () {
-
-            ////////////////////////////////////////////////////////////////////////////
-            // Filter updates
-            ////////////////////////////////////////////////////////////////////////////
-            const sourcesToggleAll = $("#toggle-all-sources");
-            const sourcesCheckboxes = $(".source-checkbox");
-            const statusesToggleAll = $("#toggle-all-status");
-            const statusCheckboxes = $(".status-checkbox");
-
-            function updateFilters() {
-                // Get selected sources
-                let selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
-                    return $(this).val();
-                }).get().join(",");
-
-                // Get selected URL statuses
-                let selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
-                    return $(this).val();
-                }).get().join(",");
-
-                // Get selected items per page
-                let selectedItems = $("input[name='items']:checked").val();
-
-                // Update pagination and reload data
-                loadPage(1, selectedItems, selectedSources, selectedStatuses);
-            }
-
-            ////////////////////////////////////////////////////////////////////////////
-            // Change triggers
-            ////////////////////////////////////////////////////////////////////////////
-            // Sources
-            sourcesToggleAll.on("change", function () {
-                sourcesCheckboxes.prop("checked", sourcesToggleAll.prop("checked"));
-                updateFilters();
-            });
-            sourcesCheckboxes.on("change", function () {
-                sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
-                updateFilters();
-            });
-            // Status
-            statusesToggleAll.on("change", function () {
-                statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
-                updateFilters();
-            });
-            statusCheckboxes.on("change", function () {
-                // If all checkboxes are checked, mark "Toggle All" as checked
-                statusesToggleAll.prop("checked", statusCheckboxes.length === statusCheckboxes.filter(":checked").length);
-                updateFilters();
-            });            
-
-            // Items change trigger update
-            $(".items").on("change", updateFilters);
-
-            ////////////////////////////////////////////////////////////////////////////
-            // Default values
-            ////////////////////////////////////////////////////////////////////////////
-            // Sources
-            sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
-            sourcesToggleAll.prop("checked", true);
-            // Statuses
-            statusCheckboxes.each(function () { $(this).prop("checked", true); });
-            statusesToggleAll.prop("checked", true);
-            // Items
-            $("input[name='items'][value='" + 15 + "']").prop("checked", true);
-        });
-
-        ////////////////////////////////////////////////////////////////////////////
-        // Theme logic
-        ////////////////////////////////////////////////////////////////////////////
-        function setTheme(mode) {
-            document.documentElement.setAttribute("data-theme", mode);
-            document.documentElement.setAttribute("data-bs-theme", mode);
-            localStorage.setItem("theme", mode);
-            document.getElementById("theme-icon").innerHTML = mode === "dark" ? "🌞" : "🌙";
-            document.body.classList.toggle("dark-mode", mode === "dark");
-        }
-
-        function toggleTheme() {
-            let currentTheme = document.documentElement.getAttribute("data-theme");
-            setTheme(currentTheme === "dark" ? "light" : "dark");
-        }
-
-        document.addEventListener("DOMContentLoaded", function () {
-            let savedTheme = localStorage.getItem("theme") || 
-                (window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
-            setTheme(savedTheme);
-        });
-        ////////////////////////////////////////////////////////////////////////////
-    </script>
-
-    <style>
-        /* Content Area */
-        #content {
-            margin-left: 170px; /* Match sidebar width */
-            min-width: calc(100vw - 170px); /* Ensure it doesn't shrink into the sidebar */
-            width: calc(100vw - 170px); /* Expands based on screen size */
-            padding: 20px;
-            overflow-x: auto; /* Prevent content from being squeezed */
-            transition: margin-left 0.3s ease;
-        }
-
-        /* Sidebar Styles */
-        #sidebar {
-            height: 100vh;
-            position: fixed;
-            top: 0;
-            left: 0;
-            width: 170px; /* Default width */
-            background-color: var(--bg-color);
-            box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
-            padding: 15px;
-            transition: width 0.3s ease;
-        }
-
-        #sidebar .nav-link {
-            color: var(--text-color);
-        }
-
-        #sidebar .nav-link:hover {
-            background-color: var(--pagination-hover-bg);
-        }
-
-        /* ============================= */
-        /* Responsive Enhancements       */
-        /* ============================= */
-        @media (min-width: 1200px) {
-            .table {
-                width: 95%; /* Allows table to take more space */
-                margin: 0 auto; /* Centers the table */
-            }
-        }
-        
-        @media (max-width: 768px) {
-            #sidebar {
-                width: 70px; /* Collapse sidebar to smaller width */
-                /*padding: 10px;*/
-            }
-
-            #content {
-                margin-left: 70px; /* Adjust margin to match collapsed sidebar */
-                min-width: calc(100vw - 70px); /* Prevent overlap */
-                /*padding: 10px;*/
-            }
-
-            /* Adjust table for small screens */
-            .table-responsive {
-                overflow-x: auto;
-            }
-
-            .table th,
-            .table td {
-                white-space: nowrap; /* Prevent text wrapping in cells */
-            }
-
-            .table a {
-                word-break: break-word; /* Ensure long URLs break properly */
-            }
-        }
-
-        /* ============================= */
-        /* Global Styles                 */
-        /* ============================= */
-        body {
-            background-color: var(--bg-color);
-            color: var(--text-color);
-            transition: background-color 0.3s, color 0.3s;
-        }
-
-        /* ============================= */
-        /* Light & Dark Mode Variables   */
-        /* ============================= */
-        :root {
-            --bg-color: #ffffff;
-            --text-color: #212529;
-            --table-bg: #ffffff;
-            --table-text: #000000;
-            --table-border: #dee2e6;
-            --link-color: #007bff;
-            --pagination-bg: #ffffff;
-            --pagination-border: #dee2e6;
-            --pagination-hover-bg: #f8f9fa;
-            --pagination-active-bg: #007bff;
-            --pagination-active-text: #ffffff;
-            --button-bg: #f8f9fa;
-            --button-border: #ced4da;
-            --button-text: #212529;
-        }
-
-        [data-theme="dark"] {
-            --bg-color: #121212;
-            --text-color: #e0e0e0;
-            --table-bg: #1e1e1e;
-            --table-text: #ffffff;
-            --table-border: #2c2c2c;
-            --link-color: #9ec5fe;
-            --pagination-bg: #1e1e1e;
-            --pagination-border: #444;
-            --pagination-hover-bg: #333;
-            --pagination-active-bg: #007bff;
-            --pagination-active-text: #ffffff;
-            --button-bg: #1e1e1e;
-            --button-border: #444;
-            --button-text: #e0e0e0;
-        }
-
-        /* ============================= */
-        /* Table Styling                 */
-        /* ============================= */
-        .table-responsive {
-            width: 100%; /* Ensure it spans the full width of its container */
-            max-width: 100%;
-            overflow-x: auto;
-        }
-
-        .table {
-            background-color: var(--table-bg);
-            color: var(--table-text);
-            border: 1px solid var(--table-border);
-            transition: background-color 0.3s, color 0.3s;
-            
-            width: 100%; /* Ensures it takes full width of its container */
-            table-layout: auto; /* Allows columns to adjust dynamically */
-            /*white-space: nowrap;*/ /* Prevents text wrapping in cells */
-        }
-
-        .table th,
-        .table td {
-            border-color: var(--table-border);
-        }
-
-        .table thead {
-            background-color: var(--pagination-active-bg);
-            color: var(--pagination-active-text);
-        }
-
-        [data-theme="dark"] .table {
-            background-color: var(--table-bg);
-            color: var(--table-text);
-        }
-
-        [data-theme="dark"] .table th,
-        [data-theme="dark"] .table td {
-            border-color: var(--table-border);
-        }
-
-        [data-theme="dark"] .table thead {
-            background-color: #333;
-            color: #fff;
-        }
-        
-        th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
-        th:nth-child(2), td:nth-child(2) { width: 20%; } /* Fetch Date */
-        th:nth-child(3), td:nth-child(3) { width: 20%; } /* Sources */
-        th:nth-child(4), td:nth-child(4) { width: 5%; } /* Status */
-        th:nth-child(5), td:nth-child(5) { width: 5%; } /* Action */
-
-        /* ============================= */
-        /* Pagination Styling            */
-        /* ============================= */
-        .pagination {
-            display: flex;
-            justify-content: center;
-            padding: 10px 0;
-        }
-
-        .pagination .page-link {
-            background-color: var(--pagination-bg);
-            border-color: var(--pagination-border);
-            color: var(--text-color);
-            padding: 10px 14px;
-            margin: 0 5px;
-            border-radius: 8px;
-            transition: background-color 0.3s, color 0.3s, transform 0.2s;
-        }
-
-        .pagination .page-link:hover {
-            background-color: var(--pagination-hover-bg);
-            transform: scale(1.05);
-        }
-
-        .pagination .active .page-link {
-            background-color: var(--pagination-active-bg);
-            color: var(--pagination-active-text);
-            border-color: var(--pagination-active-bg);
-        }
-
-        /* ============================= */
-        /* Theme Toggle Button           */
-        /* ============================= */
-        .theme-toggle-btn {
-            background-color: var(--button-bg);
-            border: 1px solid var(--button-border);
-            color: var(--button-text);
-            border-radius: 50%;
-            width: 40px;
-            height: 40px;
-            font-size: 20px;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            transition: background-color 0.3s, color 0.3s, transform 0.2s;
-            cursor: pointer;
-        }
-
-        .theme-toggle-btn:hover {
-            background-color: var(--pagination-hover-bg);
-            transform: rotate(20deg);
-        }
-
-        .theme-toggle-btn:active {
-            transform: scale(0.95);
-        }
-
-        /* ============================= */
-        /* Loading Spinner Styling       */
-        /* ============================= */
-        #loading {
-            position: fixed;
-            left: 50%;
-            top: 50%;
-            transform: translate(-50%, -50%);
-            z-index: 1050;
-            display: none;
-        }
-
-        .spinner-border {
-            width: 4rem;
-            height: 4rem;
-        }
-
-    </style>
-
-</head>
-<body>
-
-    <!-- Left Sidebar -->
-    <div id="sidebar" class="d-flex flex-column">
-        <ul class="nav flex-column">
-
-            <!-- Theme Toggle Button -->
-            <div class="nav-item">
-                <button onclick="toggleTheme()" class="theme-toggle-btn">
-                    <span id="theme-icon">🌙</span>
-                </button>
-            </div>
-            
-            <!-- Sources -->
-            <div class="nav-item mt-3">
-                <strong>Select sources</strong>
-                <form id="source-filter-form">
-                    <!-- Toggle All Checkbox -->
-                    <div class="form-check">
-                        <input class="form-check-input" type="checkbox" id="toggle-all-sources">
-                        <label class="form-check-label fw-bold" for="toggle-all-sources">
-                            Toggle all
-                        </label>
-                    </div>
-
-                    <!-- Individual Source Checkboxes -->
-                    {% for source in sources %}
-                        <div class="form-check">
-                            <input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
-                            <label class="form-check-label" for="source-{{ source.id }}">
-                                {{ source.source }}
-                            </label>
-                        </div>
-                    {% empty %}
-                    <tr>
-                        <td colspan="2" class="text-center">No sources available.</td>
-                    </tr>
-                    {% endfor %}
-                </form>
-            </div>
-
-            <!-- Status -->
-            <div class="nav-item mt-3">
-                <strong>Select status</strong>
-                <form id="status-filter-form">
-                    <!-- Toggle All Checkbox -->
-                    <div class="status-form-check">
-                        <input class="form-check-input" type="checkbox" id="toggle-all-status">
-                        <label class="form-check-label fw-bold" for="toggle-all-status">
-                            Toggle all
-                        </label>
-                    </div>
-
-                    <!-- Individual Status Checkboxes -->
-                    {% for status in list_status %}
-                        <div class="status-form-check">
-                            <input class="form-check-input status-checkbox" type="checkbox" value="{{ status }}" id="status-{{ status }}">
-                            <label class="form-check-label" for="status-{{ status }}">
-                                {{ status }}
-                            </label>
-                        </div>
-                    {% empty %}
-                    <tr>
-                        <td colspan="2" class="text-center">No sources available.</td>
-                    </tr>
-                    {% endfor %}
-                </form>
-            </div>
-
-            <!-- URLs per page -->
-            <div class="nav-item mt-3">
-                <strong>URLs per page</strong>
-                <div class="card-body">
-                    <!-- Individual Status Checkboxes -->
-                    {% for url_per_page in list_urls_per_page %}
-                        <div class="items-form-check">
-                            <input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
-                            <label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
-                        </div>
-                    {% empty %}
-                    <tr>
-                        <td colspan="2" class="text-center">No options available.</td>
-                    </tr>
-                    {% endfor %}
-                </div>
-            </div>
-
-
-
-        </ul>
-    </div>
-
-    <!-- Main Content Area -->
-    <div id="content" class="main-content">
-        <div class="container mt-4">
-
-            <!-- Table -->
-            <div id="item-list">
-                {% include 'item_list_partial.html' %}
-            </div>
-            <!-- Loading... -->
-            <div id="loading" class="text-center mt-3" style="display:none;">
-                <div class="spinner-border text-primary" role="status">
-                    <span class="visually-hidden">Loading...</span>
-                </div>
-            </div>
-        </div>
-    </div>
-
-</body>
-</html>
--- a/app_web/news/templates/item_list_partial.html
+++ b/app_web/news/templates/item_list_partial.html
@@ -1,87 +0,0 @@
-{% load custom_filters %}
-
-<div class="table-responsive">
-    <table class="table table-hover">
-        <thead>
-            <tr>
-                <th scope="col"><strong>URL</strong></th>
-                <th scope="col"><strong>Fetch date</strong></th>
-                <th scope="col"><strong>Sources</strong></th>
-                <th scope="col"><strong>Status</strong></th>
-                <th scope="col"><strong>Action</strong></th>
-            </tr>
-        </thead>
-        <tbody>
-            {% for item in page_obj %}
-                <tr>
-                    <td><a href="{{ item.url }}/" target="_blank">{{ item.url }}</a></td>
-                    <td>{{ item.ts_fetch }}</td>
-                    <td>
-                        {% with sources_map|dict_get:item.id as sources %}
-                            {% if sources %}
-                                {% for source in sources %}
-                                    <span class="badge bg-secondary">{{ source }}</span>
-                                {% endfor %}
-                            {% else %}
-                                <span class="text-muted">No sources</span>
-                            {% endif %}
-                        {% endwith %}
-                    </td>
-                    <td>
-                        {% if item.status == 'raw' %}
-                            <span class="badge bg-secondary">{{ item.status|capfirst }}</span>
-                        {% elif item.status == 'error' %}
-                            <span class="badge bg-danger">{{ item.status|capfirst }}</span>
-                        {% elif item.status == 'valid' %}
-                            <span class="badge bg-success">{{ item.status|capfirst }}</span>
-                        {% elif item.status == 'unknown' %}
-                            <span class="badge bg-warning">{{ item.status|capfirst }}</span>
-                        {% elif item.status == 'invalid' %}
-                            <span class="badge bg-danger">{{ item.status|capfirst }}</span>
-                        {% elif item.status == 'duplicate' %}
-                            <span class="badge bg-info">{{ item.status|capfirst }}</span>
-                        {% else %}
-                            <span class="badge bg-light">Unknown</span>
-                        {% endif %}
-                    </td>
-                    <td>
-                        <a href="url/{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">Details</a>  
-                    </td>
-                    
-                </tr>
-            {% empty %}
-                <tr>
-                    <td colspan="4" class="text-center">No items available.</td>
-                </tr>
-            {% endfor %}
-        </tbody>
-    </table>
-</div>
-
-<div class="d-flex justify-content-center mt-3">
-    <nav>
-        <ul class="pagination">
-            {% if page_obj.has_previous %}
-                <li class="page-item">
-                    <a class="page-link" href="#" data-page="1">First</a>
-                </li>
-                <li class="page-item">
-                    <a class="page-link" href="#" data-page="{{ page_obj.previous_page_number }}">Previous</a>
-                </li>
-            {% endif %}
-
-            <li class="page-item active">
-                <span class="page-link">Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}</span>
-            </li>
-
-            {% if page_obj.has_next %}
-                <li class="page-item">
-                    <a class="page-link" href="#" data-page="{{ page_obj.next_page_number }}">Next</a>
-                </li>
-                <li class="page-item">
-                    <a class="page-link" href="#" data-page="{{ page_obj.paginator.num_pages }}">Last</a>
-                </li>
-            {% endif %}
-        </ul>
-    </nav>
-</div>
--- a/app_web/news/templates/url_detail.html
+++ b/app_web/news/templates/url_detail.html
@@ -1,211 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>{% block title %}News{% endblock %}</title>
-
-    <!-- Bootstrap CSS -->
-    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
-    <!-- Add jQuery from CDN (before other scripts) -->
-    <script src="https://code.jquery.com/jquery-3.6.4.min.js"></script>
-    <!-- Markdown -->
-    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
-
-    <!-- Custom Styles -->
-    <style>
-        body {
-            background-color: #f4f4f4;
-        }
-        .navbar-dark .navbar-nav .nav-link {
-            color: rgba(255,255,255,0.75);
-        }
-        .chat-box {
-            background-color: #fff;
-            border: 1px solid #ddd;
-            padding: 15px;
-            border-radius: 8px;
-            overflow-y: auto;  /* Enable vertical scrolling */
-            max-width: 100%;
-            min-height: 150px;
-            max-height: 450px;
-            white-space: normal;
-            word-wrap: break-word;
-            word-break: break-word;
-        }
-
-    </style>
-
-</head>
-<script>
-
-    function fetchDetails(urlId, url) {
-        // Show the loading spinner
-        document.getElementById("loading-spinner").style.display = "block";
-
-        // Get the input value
-        let inputText = document.getElementById(`custom-input-${urlId}`).value;
-        // Get the input model
-        let selectedModel = document.getElementById(`options-${urlId}`).value;
-        // Check if a model is selected
-        if (!selectedModel) {
-            alert("Please select a model before fetching details.");
-            return;
-        }
-
-        // Fetch URL
-        let fetchUrl = `/news/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
-
-        let resultContainer = $("#chat-output");
-        resultContainer.html(""); // Clear previous content before fetching
-
-        let fetchButton = $("button[onclick^='fetchDetails']"); // Select the button
-        fetchButton.prop("disabled", true); // Disable button
-
-
-        fetch(fetchUrl)
-            .then(response => {
-                if (!response.ok) {
-                    throw new Error("Error on network response");
-                }
-                const reader = response.body.getReader();
-                const decoder = new TextDecoder();
-                
-                
-                //////////////////////////////////////
-
-                let accumulatedText = ""; // Store streamed text before rendering Markdown
-                // Create a temporary container for streaming response
-                let messageContainer = $('<div class="chat-message"></div>');
-                //let messageContainer = $('');
-                resultContainer.append(messageContainer);
-                //////////////////////////////////////
-
-                function read() {
-                    return reader.read().then(({ done, value }) => {
-                        if (done) {
-                            //////////////////////////////////////
-                            messageContainer.html(marked.parse(accumulatedText));
-                            //////////////////////////////////////
-                            fetchButton.prop("disabled", false); // Re-enable button when done
-                            return;
-                        }
-                        
-                        //////////////////////////////////////
-                        // Decode the streamed chunk
-                        let chunk = decoder.decode(value);
-                        // Append to the accumulated text
-                        accumulatedText += chunk;
-                        // Render Markdown progressively (but safely)
-                        messageContainer.html(marked.parse(accumulatedText));
-                        //////////////////////////////////////
-                    
-                        //////////////////////////////////////
-                        // ORIGINAL:
-                        //let text = decoder.decode(value).replace(/\n/g, "<br>");
-                        //resultContainer.append(text); // Append streamed text
-                        //////////////////////////////////////
-
-                        resultContainer.scrollTop(resultContainer[0].scrollHeight); // Auto-scroll to bottom
-                        return read();
-                    });
-                }
-                return read();
-            })
-            .catch(error => {
-                resultContainer.html(`<p class="text-danger">Error fetching details: ${error.message}</p>`);
-                fetchButton.prop("disabled", false); // Re-enable button on error
-            })
-            .finally(() => {
-                // Hide the loading spinner after request is complete
-                document.getElementById("loading-spinner").style.display = "none";
-            });
-            ;
-    }
-</script>
-<body>
-
-    <!-- Main Content -->
-    <div class="container mt-4">
-        <h2>URL Details</h2>
-        <table class="table table-bordered">
-            <tr>
-                <th>URL</th>
-                <td><a href="{{ url_item.url }}" target="_blank">{{ url_item.url }}</a></td>
-            </tr>
-            <tr>
-                <th>Fetch Date</th>
-                <td>{{ url_item.ts_fetch }}</td>
-            </tr>
-            <tr>
-                <th>Sources</th>
-                <td>{{ sources|join:", " }}</td>
-            </tr>
-            <tr>
-                <th>Status</th>
-                <td>{{ url_item.status }}</td>
-            </tr>
-            <tr>
-                <th>Title</th>
-                <td>{{ url_content.title }}</td>
-            </tr>
-            <tr>
-                <th>Description</th>
-                <td>{{ url_content.description }}</td>
-            </tr>
-            <tr>
-                <th>Content</th>
-                <td>{{ url_content.content }}</td>
-            </tr>
-            <tr>
-                <th>Tags</th>
-                <td>{{ url_content.tags }}</td>
-            </tr>
-            <tr>
-                <th>Authors</th>
-                <td>{{ url_content.authors }}</td>
-            </tr>
-            <tr>
-                <th>Image URLs</th>
-                <td>{{ url_content.image_urls }}</td>
-            </tr>
-        </table>
-
-        <!-- Independent form for optional values -->
-        <form onsubmit="fetchDetailsWithSelection(event, {{ url_item.id }}, '{{ url_item.url }}')">
-            <label for="options-{{ url_item.id }}">Model:</label>
-            <select id="options-{{ url_item.id }}" class="form-control mb-2">
-                <!-- <option value="">-- Select an option --</option> -->
-                {% for model in models %}
-                    <option value="{{ model }}">{{ model }}</option>
-                {% endfor %}
-            </select>
-        </form>
-        
-        <!-- Input field with a default value -->        
-        <label for="custom-input-{{ url_item.id }}">Prompt:</label>
-        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="3">{{ prompt }} {{ url_item.url }}</textarea>
-
-        <!-- Fetch details button -->
-        <button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
-            Fetch Details
-        </button>
-
-        <!-- Chatbot-style response box -->
-        <div class="chat-box mt-3 p-3 border rounded">
-            <div id="chat-output"></div>
-        </div>
-
-        <!-- Loading Spinner (Hidden by Default) -->
-        <div id="loading-spinner" class="spinner-border text-primary mt-3" role="status" style="display: none;">
-            <span class="visually-hidden">Loading...</span>
-        </div>
-        
-    </div>
-
-    <!-- Bootstrap JS -->
-    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
-
-    {% block extra_js %}{% endblock %}
-</body>
-</html>
--- a/app_web/news/templatetags/init.py
+++ b/app_web/news/templatetags/init.py
--- a/app_web/news/templatetags/custom_filters.py
+++ b/app_web/news/templatetags/custom_filters.py
@@ -1,8 +0,0 @@
-from django import template
-
-register = template.Library()
-
-@register.filter
-def dict_get(dictionary, key):
-    """Custom filter to get a value from a dictionary in Django templates."""
-    return dictionary.get(key, [])
--- a/app_web/news/tests.py
+++ b/app_web/news/tests.py
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
--- a/app_web/news/urls.py
+++ b/app_web/news/urls.py
@@ -1,8 +0,0 @@
-from django.urls import path
-
-from . import views
-
-urlpatterns = [
-    path("", views.news, name="home"),
-    path('url/<int:id>/', views.url_detail_view, name='url_detail'),
-    path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),]
--- a/app_web/news/views.py
+++ b/app_web/news/views.py
@@ -1,104 +0,0 @@
-from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
-from django.shortcuts import render, get_object_or_404
-from django.core.paginator import Paginator
-import requests
-from django.http import StreamingHttpResponse
-import json
-import time
-import ollama
-
-from .models import Urls, Source, UrlsSource, UrlContent
-
-# Create your views here.
-def index(request):
-    return HttpResponse("Hello, world. You're at the news index.")
-
-def news(request):
-    # URLs
-    urls = Urls.objects.all()
-    # Sources
-    sources = Source.objects.all()
-
-    # Parameters
-    page_number = request.GET.get("page", 1)
-    num_items = request.GET.get("items", 15)
-    source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
-    status_filters = request.GET.get("status", None)
-
-    # Filters
-    if (status_filters) and (status_filters != "all"):
-        urls = urls.filter(status__in=status_filters.split(","))
-    if (source_ids) and (source_ids != "all"):
-        # TODO: Distinct needed?
-        urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
-
-    # Pagination
-    paginator = Paginator(urls, num_items)
-    page_obj = paginator.get_page(page_number)
-
-    # Map URL IDs to their sources, only for subset of URLs (page of interest)
-    sources_map = {
-        url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
-        for url in page_obj.object_list
-    }
-
-    context = {
-        "page_obj": page_obj,
-        "sources": sources,
-        "sources_map": sources_map,
-        "list_status": Urls.STATUS_ENUM.values,
-        "list_urls_per_page": [15, 50, 100],
-    }
-    
-    # If request is AJAX, return JSON response
-    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
-        return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
-
-    return render(request, "item_list.html", context)
-
-
-def url_detail_view(request, id):
-    url_item = get_object_or_404(Urls, id=id)
-    url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
-    try:
-        url_content = UrlContent.objects.get(pk=id)
-    except UrlContent.DoesNotExist:
-        url_content = {}
-    
-    # TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
-    # LLM models available
-    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
-    models = sorted([m.model for m in client.list().models])
-    # default_model = "llama3.2:3b"
-
-    context = {
-        'url_item': url_item,
-        'sources': url_sources,
-        'models': models,
-        #'default_model': default_model,
-        'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
-        #"prompt": "Image you are a journalist, TLDR in a paragraph:",
-        #"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
-        'url_content': url_content,
-    }
-    return render(request, 'url_detail.html', context)
-
-def fetch_details(request, id):
-    url_item = get_object_or_404(Urls, id=id)
-    url_param = request.GET.get("url", "")  # Get URL
-    model = request.GET.get("model", "")  # Get LLM model
-    text = request.GET.get("text", "")  # Get LLM prompt
-
-    # LLM
-    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
-
-    def stream_response():
-        msg_content = {
-            "role": "user", 
-            "content": text,
-        }
-        response = client.chat(model=model, messages=[msg_content], stream=True)
-        for chunk in response:
-            yield chunk["message"]["content"]  # Stream each chunk of text
-    
-    return StreamingHttpResponse(stream_response(), content_type="text/plain")