Urls web visualization, cleaning obsolete code

2025-03-25 02:51:16 +01:00
parent 0c6b5f1ea4
commit 24b4614049
52 changed files with 371 additions and 3293 deletions
--- a/1-DB.ipynb
+++ b/1-DB.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -11,7 +11,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@@ -20,17 +20,108 @@
     "text": [
      "db_postgres\n",
      "db_redis\n",
-      "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 1/0\n",
+      "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 0/0\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
+      " ⠙ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.1s \u001b[0m\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
+      " ⠹ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.2s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠸ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.3s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠼ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.4s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠴ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.5s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠦ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.6s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠧ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.7s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠇ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.8s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠏ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m0.9s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠋ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.0s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠙ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.1s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠹ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.2s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠸ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.3s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠼ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.4s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠴ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.5s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠦ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.6s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠧ matitos_dozzle Pulling \u001b[39m\u001b[0m                                                 \u001b[34m1.7s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/1\n",
      " ⠇ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m1.8s \u001b[0m\n",
      "   ⠋ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.0s \u001b[0m\n",
      "   ⠋ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.0s \u001b[0m\n",
      "   ⠋ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/4\n",
      " ⠏ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m1.9s \u001b[0m\n",
      "   ⠙ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.1s \u001b[0m\n",
      "   ⠙ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.1s \u001b[0m\n",
      "   ⠙ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.1s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/4\n",
      " ⠋ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m2.0s \u001b[0m\n",
      "   ⠹ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.2s \u001b[0m\n",
      "   ⠹ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.2s \u001b[0m\n",
      "   ⠹ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.2s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 0/4\n",
      " ⠙ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⠀⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m2.1s \u001b[0m\n",
      "   ⠸ b5b68a794063 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.3s \u001b[0m\n",
      "   ⠸ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.3s \u001b[0m\n",
      "   ⠸ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.3s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/4\n",
      " ⠹ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⠀⠀\u001b[0m]      0B/0B      Pulling \u001b[39m\u001b[0m                  \u001b[34m2.2s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete  \u001b[32m\u001b[0m                                          \u001b[34m0.4s \u001b[0m\n",
      "   ⠼ 764914624645 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.4s \u001b[0m\n",
      "   ⠼ 82780b9b6d69 Pulling fs layer \u001b[39m\u001b[0m                                        \u001b[34m0.4s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
      " ⠸ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⠀\u001b[0m] 166.8kB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.3s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   ⠴ 82780b9b6d69 Downloading \u001b[39m 166.8kB/16.38MB\u001b[0m                             \u001b[34m0.5s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
      " ⠼ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⣤\u001b[0m] 9.833MB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.4s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   ⠦ 82780b9b6d69 Downloading \u001b[39m 9.833MB/16.38MB\u001b[0m                             \u001b[34m0.6s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
      " ⠴ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⠀\u001b[0m] 163.8kB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.5s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   ⠿ 82780b9b6d69 Extracting \u001b[39m 163.8kB/16.38MB\u001b[0m                              \u001b[34m0.7s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
      " ⠦ matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⣤\u001b[0m] 9.667MB/16.38MB Pulling \u001b[39m\u001b[0m                  \u001b[34m2.6s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   ⠿ 82780b9b6d69 Extracting \u001b[39m 9.667MB/16.38MB\u001b[0m                              \u001b[34m0.8s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 4/4\u001b[0m\n",
      " \u001b[32m✔\u001b[0m matitos_dozzle \u001b[33m3 layers\u001b[0m [\u001b[32m\u001b[1m⣿⣿⣿\u001b[0m]      0B/0B      Pulled \u001b[32m\u001b[0m                   \u001b[34m2.7s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m b5b68a794063 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m 764914624645 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.4s \u001b[0m\n",
      "   \u001b[32m✔\u001b[0m 82780b9b6d69 Pull complete \u001b[32m\u001b[0m                                           \u001b[34m0.9s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 0/0\n",
      " ⠋ Container db_redis     \u001b[39mCreating\u001b[0m                                         \u001b[34m0.0s \u001b[0m\n",
      " ⠋ Container db_postgres  \u001b[39mCreating\u001b[0m                                         \u001b[34m0.0s \u001b[0m\n",
      " ⠋ Container dozzle       \u001b[39mCreating\u001b[0m                                         \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
+      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
+      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
      " ⠿ Container dozzle       \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n",
+      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/4\n",
-      " \u001b[32m✔\u001b[0m Container db_postgres  \u001b[32mStarted\u001b[0m                                          \u001b[34m0.2s \u001b[0m\n",
+      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container db_redis     \u001b[32mStarted\u001b[0m                                          \u001b[34m0.2s \u001b[0m\n",
+      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
      " ⠿ Container dozzle       \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 4/4\u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container db_redis     \u001b[32mStarted\u001b[0m                                          \u001b[34m0.3s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container db_postgres  \u001b[32mStarted\u001b[0m                                          \u001b[34m0.3s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container dozzle       \u001b[32mStarted\u001b[0m                                          \u001b[34m0.3s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h"
     ]
@@ -42,7 +133,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -143,6 +234,7 @@
    "                # Feeds\n",
    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC', 'rss_feed');\" )\n",
    "                # Websites of interest\n",
    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('www.missingkids.org/poster', 'url_host');\" )\n",
    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('www.breitbart.com', 'url_host');\" )\n",
    "                # Search keywords\n",
    "                cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search');\" )\n",
@@ -159,7 +251,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -211,7 +303,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -260,7 +352,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -285,7 +377,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
--- a/OBSOLETE_app_fetcher/Dev.ipynb
+++ b/OBSOLETE_app_fetcher/Dev.ipynb
@@ -1,46 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "conda create -n matitos_fetcher python=3.12\n",
    "conda activate matitos_fetcher\n",
    "conda install -c conda-forge curl\n",
    "pip install ipykernel \"psycopg[binary]\" git+https://github.com/ranahaani/GNews.git GoogleNews duckduckgo_search newspaper4k numpy beautifulsoup4 requests feedparser pytz redis fastapi uvicorn fastapi-utils lxml[html_clean]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!uvicorn app:app --host 0.0.0.0 --port 5000 --reload"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "matitos_fetcher",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/OBSOLETE_app_fetcher/Dockerfile
+++ b/OBSOLETE_app_fetcher/Dockerfile
@@ -1,17 +0,0 @@
 FROM continuumio/miniconda3:25.1.1-2
 # App repository
 COPY . /opt/app/
 RUN conda install -c conda-forge curl
 RUN pip install --no-cache-dir --upgrade "psycopg[binary]" git+https://github.com/ranahaani/GNews.git GoogleNews duckduckgo_search newspaper4k numpy beautifulsoup4 requests feedparser pytz redis fastapi uvicorn fastapi-utils lxml[html_clean]
 RUN pip freeze
 # GoogleNews-1.6.10 Pillow-10.1.0 PyYAML-6.0.1 aiofiles-23.2.1 anyio-3.7.1 beautifulsoup4-4.9.3 bs4-0.0.1 click-8.1.7 cssselect-1.2.0 dateparser-1.2.0 dnspython-1.16.0 duckduckgo_search-3.9.8 fastapi-0.104.1 fastapi-utils-0.2.1 feedfinder2-0.0.4 feedparser-6.0.10 filelock-3.13.1 gnews-0.3.6 greenlet-3.0.1 h11-0.14.0 h2-4.1.0 hpack-4.0.0 httpcore-1.0.2 httpx-0.25.2 hyperframe-6.0.1 jieba3k-0.35.1 joblib-1.3.2 lxml-4.9.3 newspaper3k-0.2.8 nltk-3.8.1 numpy-1.26.2 psycopg-3.1.13 psycopg-binary-3.1.13 pydantic-1.10.13 pymongo-3.12.3 python-dateutil-2.8.2 python-dotenv-0.19.2 pytz-2023.3.post1 redis-5.0.1 regex-2023.10.3 requests-2.26.0 requests-file-1.5.1 sgmllib3k-1.0.0 six-1.16.0 sniffio-1.3.0 socksio-1.0.0 soupsieve-2.5 sqlalchemy-1.4.50 starlette-0.27.0 tinysegmenter-0.3 tldextract-5.1.1 typing-extensions-4.8.0 tzlocal-5.2 uvicorn-0.24.0.post1
 WORKDIR /opt/app
 # https://www.uvicorn.org/settings/#resource-limits
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
 # docker build -t fetch_app .
 # docker run --rm --name container_fetch_app fetch_app
--- a/OBSOLETE_app_fetcher/README.md
+++ b/OBSOLETE_app_fetcher/README.md
@@ -1,20 +0,0 @@
 # Fetcher
 ```
 conda create -n matitos_fetcher python=3.12
 conda activate matitos_fetcher
 conda install -c conda-forge curl
 pip install ipykernel "psycopg[binary]" git+https://github.com/ranahaani/GNews.git GoogleNews duckduckgo_search newspaper4k numpy beautifulsoup4 requests feedparser pytz redis fastapi uvicorn fastapi-utils lxml[html_clean]
 ```
 * Fetcher app
    - Contains several endpoints to perform a specific fetching type task 
        - For more details, check in [app.py](app.py) /{fetch_type}
 * Build and run
    - Important: To be deployed with other micro-services, [docker-compose.yml](../docker-compose.yml)
 ```
 docker build -t fetch_app .
 docker run --rm --name container_fetch_app fetch_app
 ```
--- a/OBSOLETE_app_fetcher/app.py
+++ b/OBSOLETE_app_fetcher/app.py
@@ -1,79 +0,0 @@
 from src.fetch_feed import FetchFeed
 from src.fetch_parser import FetchParser
 from src.fetch_search import FetchSearch
 from src.missing_kids_fetch import MissingKidsFetch
 from src.missing_kids_status import MissingKidsStatus
 from src.url_status import UpdateErrorURLs
 from src.db_utils import DB_Handler
 import src.credentials as cred
 from logging_ import get_logger
 from fastapi import FastAPI, BackgroundTasks
 ##################################################################################################
 logger = get_logger()
 logger.info("Environment: {}".format(cred.ENVIRONMENT))
 db_handler = DB_Handler(cred.db_connect_info, cred.redis_connect_info)
 app = FastAPI()
@app.get("/")
 def hello_world():
    return {"message": "Ok"}
@app.get("/{process_type}")
 async def process(background_tasks: BackgroundTasks, process_type: str):
    # Concurrent job running
    logger.info("Triggered: {}".format(process_type))
    if (process_type == "fetch_feeds"):
        task_run = FetchFeed(db_handler).run
    elif (process_type == "fetch_parser"):
        task_run = FetchParser(db_handler).run
    elif (process_type == "search") or (process_type == "search_full"):
        task_run = FetchSearch(cred.db_connect_info, cred.redis_connect_info, full=True).run
    elif (process_type == "search_reduced"):
        task_run = FetchSearch(cred.db_connect_info, cred.redis_connect_info, full=False).run
    # Selenium based
    elif (process_type == "fetch_missing_kids_reduced"):
        task_run = MissingKidsFetch(db_handler, num_pages=4).run
    elif (process_type == "fetch_missing_kids_full"):
        task_run = MissingKidsFetch(db_handler, num_pages=100000).run
    elif (process_type == "update_missing_kids_status_reduced"):
        task_run = MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=50).update_missing_kids_status
    elif (process_type == "update_missing_kids_status_full"):
        task_run = MissingKidsStatus(cred.db_connect_info, cred.redis_connect_info, num_urls=None).update_missing_kids_status
    elif (process_type == "update_error_urls"):
        task_run = UpdateErrorURLs(cred.db_connect_info, cred.redis_connect_info, num_urls=100).update_error_urls_status
    else:
        return {"message": "ERROR. Unknown fetcher type!"}
    # Run task
    background_tasks.add_task(task_run)
    # Return message
    return {"message": "Started {}: Ok".format(process_type)}
 """
 # TODO: Instead of background tasks!
 import rq
 import redis
 # Redis connection
 redis_conn = redis.Redis(host='localhost', port=6379, db=0)
 queue = rq.Queue(connection=redis_conn)
 # ... 
 # Queue the processing task
 dict_args= {"db_handler": db_handler, }
 queue.enqueue(task_run, **dict_args)
 # https://python-rq.org/
 """
--- a/OBSOLETE_app_fetcher/src/db_utils.py
+++ b/OBSOLETE_app_fetcher/src/db_utils.py
@@ -1,502 +0,0 @@
 import psycopg
 import redis
 import traceback
 import random
 import requests
 import json
 import os
 from .url_utils import process_article
 from .logger import get_logger
 logger = get_logger()
 # TODO: URL_DB_HANDLER, _get_search_list, _get_url_host, _get_url_host_list, ...
 # The rest, elsewhere
 class DB_Handler():
    def __init__(self, db_connect_info, redis_connect_info):
        logger.debug("Initializing URL DB writer")
        self.db_connect_info = db_connect_info
        self.redis_instance = redis.Redis(host=redis_connect_info.get("host"), port=redis_connect_info.get("port"))
        self.redis_expiry_seconds = redis_connect_info.get("expiry_seconds", 172800) # Default: 48 hours
        try:
            self.redis_instance.ping()
            logger.debug("Succesfully pinged Redis")
        except Exception as e:
            logger.warning("Error trying to ping Redis: {}".format(str(e)))
    def get_urls_count(self, last_minutes_check):
        #####################
        ### Get number of URLs within last X minutes
        #####################
        try:
            # Update
            with psycopg.connect(self.db_connect_info) as conn:
                # Open cursor
                cursor = conn.cursor()
                num_urls = cursor.execute("SELECT COUNT(*) FROM URLS WHERE ts_fetch >= current_timestamp - interval '{} minutes';".format(last_minutes_check)).fetchone()[0]
        except Exception as e:
            logger.warning("Error updating URLs status: {}".format(str(e)))
            num_urls = None
        return num_urls
    def _get_url_host_list(self):
        try:
            with psycopg.connect(self.db_connect_info) as conn:
                # List of URL host
                list_url_host = [l[0] for l in conn.execute("SELECT url_host FROM WEBSITE_OF_INTEREST;").fetchall()]
            # Clean http / https from URLs
            list_url_host = [l.replace("https://", "").replace("http://", "") for l in list_url_host]
            # Clean last slash if exists
            list_url_host = [ l if not l.endswith("/") else l[:-1] for l in list_url_host]
        except Exception as e:
            logger.warning("Exception fetching URL host list: " + str(e))
            list_url_host = []
        return list_url_host
    def _get_search_list(self):
        try:
            with psycopg.connect(self.db_connect_info) as conn:
                # List of keyword searches
                list_search_text = [l[0] for l in conn.execute("SELECT keyword_search FROM SEARCH;").fetchall()]
        except Exception as e:
            logger.warning("Exception fetching searches list: " + str(e))
            list_search_text = []
        return list_search_text
    def _get_feed_urls(self):
        try:
            with psycopg.connect(self.db_connect_info) as conn:
                list_url_feeds = conn.execute("SELECT rss_feed FROM FEED;").fetchall()
                # Decode (tuple with 1 element)
                list_url_feeds = [l[0] for l in list_url_feeds]
        except Exception as e:
            logger.warning("Exception fetching RSS sites: " + str(e))
            list_url_feeds = []
        return list_url_feeds
    def _get_url_hosts(self):
        try:
            with psycopg.connect(self.db_connect_info) as conn:
                list_url_hosts = conn.execute("SELECT url_host FROM WEBSITE_OF_INTEREST;").fetchall()
                # Decode (tuple with 1 element)
                list_url_hosts = [l[0] for l in list_url_hosts]
        except Exception as e:
            logger.warning("Exception fetching RSS sites: " + str(e))
            list_url_hosts = []
        return list_url_hosts
    def _format(self, values):
        # Repalce single quote ' with ''. Based on https://stackoverflow.com/a/12320729
        # String -> 'string', Int -> '1' (string-based), None -> NULL (no quotes for pgSQL to interpret Null value)
        if (type(values) == list) or (type(values) == tuple):
            insert_args = "(" + ", ".join([ "NULL" if v is None else "'" + str(v).replace("'", "''") + "'" for v in values]) + ")"
        elif (type(values) == str):
            insert_args = "({})".format( "NULL" if values is None else "'" + values.replace("'", "''") + "'" )
        else:
            logger.warning("Error formatting input values: {}".format(values))
            assert False
        return insert_args
    def _get_cached_canonical_url(self, url):
        ### Redis: URL processed recently? -> Avoid increasing SERIAL counter & efficiency of DB
        try:
            filter_url = self.redis_instance.get(url)
            if (filter_url is not None):
                filter_url = filter_url.decode("utf-8")
        except Exception as e:
            logger.warning("Exception querying Redis: {}".format(str(e)))
            filter_url = None
        return filter_url
    def _update_urls_status(self, dict_status_ids):
        #####################
        ### Update status to array of URL IDs
        #####################
        try:
            # Update
            with psycopg.connect(self.db_connect_info) as conn:
                # Open cursor
                cursor = conn.cursor()
                # Autocommit at end of transaction (Atomic insert of URLs and sources)
                with conn.transaction() as tx:
                    for key_status, value_ids in dict_status_ids.items():
                        cursor.execute("UPDATE URLS SET status='{}' WHERE id IN ({});".format(key_status, ",".join([str(v) for v in value_ids])))
        except Exception as e:
            logger.warning("Error updating URLs status: {}".format(str(e)))
    def _get_missing_kids_urls(self, num_urls=None):
        #####################
        ### Get list of Missing Kids URLs
        #####################
        try:
            missing_kids_ids_and_urls = []
            if (num_urls is None):
                limit = 500
            else:
                limit = num_urls
            offset = 0
            with psycopg.connect(self.db_connect_info) as conn:
                # Open cursor
                cursor = conn.cursor()
                while True:
                    # Query
                    missing_kids_ids_and_urls_query = cursor.execute("SELECT id, url, status FROM URLS WHERE url LIKE '%missingkids.org/poster%' ORDER BY ts_fetch DESC LIMIT {} OFFSET {};".format(limit, offset)).fetchall()
                    # Finished?
                    if (len(missing_kids_ids_and_urls_query) == 0):
                        break
                    # Extend
                    missing_kids_ids_and_urls = missing_kids_ids_and_urls + missing_kids_ids_and_urls_query
                    # Offset
                    offset += len(missing_kids_ids_and_urls_query)
                    # Stop?
                    if (num_urls is not None) and (len(missing_kids_ids_and_urls) >= num_urls):
                        break
        except Exception as e:
            logger.warning("Error getting Missing Kids URLs: {}".format(str(e)))
            missing_kids_ids_and_urls = []
        return missing_kids_ids_and_urls
    def _get_error_urls(self, num_urls=None):
        #####################
        ### Get list of Missing Kids URLs
        #####################
        try:
            error_urls = []
            if (num_urls is None):
                limit = 500
            else:
                limit = num_urls
            offset = 0
            with psycopg.connect(self.db_connect_info) as conn:
                # Open cursor
                cursor = conn.cursor()
                while True:
                    # Query
                    error_urls_query = cursor.execute("SELECT id, url FROM URLS WHERE status='error' ORDER BY ts_fetch DESC LIMIT {} OFFSET {};".format(limit, offset)).fetchall()
                    # Finished?
                    if (len(error_urls_query) == 0):
                        break
                    # Extend
                    error_urls = error_urls + error_urls_query
                    # Offset
                    offset += len(error_urls_query)
                    # Stop?
                    if (num_urls is not None) and (len(error_urls) >= num_urls):
                        break
        except Exception as e:
            logger.warning("Error getting Error URLs: {}".format(str(e)))
            error_urls = []
        return error_urls
    def _decode_urls(self, urls_fetched, list_domains_to_filter, list_pattern_status_tuple): # TODO: language for urls_fetched...
        """
        # TODO: REFACTOR
        For each input url
            Already processed? 
                -> Update on Redis expire time
                -> Associate to source
            Not processed? Get main URL:
                -> URL Canonical valid? 
                    -> Rely on this as main URL
                -> URL Canonical not valid?
                    -> Use input url, unless it's a news.google.com link
                        -> If news.google.com link, filter out. REDIS?
            Main URL processing:
                -> Update in REDIS, association url -> url_canonical
                -> url != url_canonical: Add in duplicate table
                    If both != news.google.com
        """
        # URLs to insert, URLs duplicated association, URL to Canonical form
        list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical = [], [], {}
        # URL VS CANONICAL:
        # News URL returned: https://news.google.com/articles/CBMifmh0dHBzOi8vd3d3LmJyZWl0YmFydC5jb20vMm5kLWFtZW5kbWVudC8yMDIzLzA0LzAzL2dvdi1kZXNhbnRpcy1zaWducy1iaWxsLW1ha2luZy1mbG9yaWRhLXRoZS0yNnRoLWNvbnN0aXR1dGlvbmFsLWNhcnJ5LXN0YXRlL9IBAA?hl=en-US&gl=US&ceid=US%3Aen
        # Corresponds to canonical URL: https://www.breitbart.com/2nd-amendment/2023/04/03/gov-desantis-signs-bill-making-florida-the-26th-constitutional-carry-state/
        for url in urls_fetched:
            # Domain to filter? Input url
            filter_due_to_domain = False
            for domain_to_filter in list_domains_to_filter:
                if (domain_to_filter in url):
                    logger.debug("Domain filter applied based on {} for input URL: {}".format(domain_to_filter, url))
                    filter_due_to_domain = True
            if (filter_due_to_domain):
                continue
            # URL processed recently? -> Filter and avoid increasing SERIAL counter & efficiency of DB
            cached_canonical_url = self._get_cached_canonical_url(url)
            if (cached_canonical_url is not None):
                # Even if url processed, need to add url_canonical to list_filtered_urls, so as to associate search source to canonical URL (canonical is the main URL entry)
                dict_full_urls_to_canonical[url] = cached_canonical_url # X -> Y
                # If url has been processed, so was its canonical form
                logger.debug("Filtering out already inserted (processed) URL and its canonical form: {} {}".format(url, cached_canonical_url))
                continue
            # Process TODO: Add language...
            url_canonical, article_elements, article_status = process_article(url, list_pattern_status_tuple)
            # TODO: Store article_elements information to insert into OS after inserted into DB (and therefore having associated url_id)
            # Could not retrieve redirection for news.google.com based URL? Continue (avoid inserting in DB)
            if (url_canonical is None) and ("news.google.com" in url):
                logger.debug("Filtering empty canonical link for base URL based on news.google.com: {}".format(url))
                continue
            # Canonical URL still news.google.com? Continue (avoid inserting in DB)
            if (url_canonical is not None) and ("news.google.com" in url_canonical):
                logger.debug("Filtering canonical news.google.com based URL: {}".format(url_canonical))
                continue
            # Domain to filter? Input canonical_url
            filter_due_to_domain = False
            for domain_to_filter in list_domains_to_filter:
                if (url_canonical is not None) and (domain_to_filter in url_canonical):
                    filter_due_to_domain = True
            if (filter_due_to_domain):
                logger.info("Filtering due to domain input URL, Canonical_URL: {} {}".format(url, url_canonical))
                continue
            if (url_canonical is None) or (article_status == "error"):
                logger.debug("Processing failed for URL: {}".format(url))
                # Still insert URL with "error"? -> If processed later, might have inconsistent sources (url vs url_canonical). Only store if not news.google.com based
                if ("news.google.com" in url) or ("consent.google.com" in url):
                    logging.debug("Not able to process Google News link, skipping: {}".format(url))
                else:
                    dict_full_urls_to_canonical[url] = url # X -> X
                    list_insert_url_tuple_args.append( (url, article_status) )
                continue
            # URL was not processed (not sure canonical yet). Generate URL_CANONICAL <-> URL_ORIGINAL association if they're different
            if (url_canonical != url):
                list_tuple_canonical_duplicate_urls.append( (url_canonical, url) )
            # Dict: url -> canonical (update association)
            dict_full_urls_to_canonical[url] = url_canonical # X -> Y or X
            # Canonical URL processed recently? -> Filter and avoid increasing SERIAL counter & efficiency of DB
            if (self._get_cached_canonical_url(url_canonical) is not None):
                # Canonical URL was already processed
                logger.debug("Filtering out already inserted (processed) URL canonical: {}".format(url_canonical))
            else:
                # Insert url_canonical to DB formatted
                list_insert_url_tuple_args.append( (url_canonical, article_status) )
                # Canonical URL different? Process
                if (url_canonical != url):
                    if ("news.google.com" in url) or ("consent.google.com" in url):
                        logging.debug("Not adding google.news.com based link, skipping: {}".format(url))
                    else:
                        # Fetched url -> duplicate (using canonical as main link)
                        article_status = "duplicate"
                        # Insert url (non-canonical) to DB formatted
                        list_insert_url_tuple_args.append( (url, article_status) )
        return list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical
    def _insert_urls(self, cursor, list_insert_url_tuple_args):
        #####################
        ### Insert URLs with status
        #####################
        if (len(list_insert_url_tuple_args) > 0):
            insert_args = ', '.join( [ self._format(t) for t in list_insert_url_tuple_args] )
            # Insert. (url_1, status_1), (url_2, status_2), ...
            sql_code = "INSERT INTO URLS {} VALUES {} ON CONFLICT (url) DO NOTHING;".format("(url, status)", insert_args)
            # logger.debug("SQL CODE: {}".format(sql_code))
            c = cursor.execute(sql_code)
            # NOTE: Not using "RETURNING id" since previously inserted URLs are not returned (ON CONFLICT)
            # https://stackoverflow.com/questions/35949877/how-to-include-excluded-rows-in-returning-from-insert-on-conflict/35953488#35953488
    def _insert_urls_duplicated(self, cursor, list_tuple_canonical_duplicate_urls):
        #####################
        ### Insert duplicated URLs
        #####################
        if (len(list_tuple_canonical_duplicate_urls) > 0):
            # Flatten, format, set to remove duplicates
            args_duplicated_urls_set = "(" + ', '.join( set( [ "'" + str(y).replace("'", "''") + "'" for x in list_tuple_canonical_duplicate_urls for y in x] ) ) + ")"
            # Dict: url -> id
            dict_url_to_id = {}
            # Get url -> id association to populate duplicated URLs
            for (id_, url_) in cursor.execute("SELECT id, url FROM URLS WHERE url IN {};".format(args_duplicated_urls_set)).fetchall():
                dict_url_to_id[url_] = id_
            # Convert tuples (url_canonical, url) -> (id_url_canonical, id_url) to insert in DB
            # ORIGINAL CODE. Issue, might not have found association to all urls
            ### list_tuple_canonical_duplicate_urls_ids = [ (dict_url_to_id[t[0]], dict_url_to_id[t[1]]) for t in list_tuple_canonical_duplicate_urls]
            list_tuple_canonical_duplicate_urls_ids = []
            for (url_1, url_2) in list_tuple_canonical_duplicate_urls:
                id_url_1, id_url_2 = dict_url_to_id.get(url_1), dict_url_to_id.get(url_2)
                if (id_url_1 is None) or (id_url_2 is None):
                    logger.debug("Skipping duplicate association due to no url -> id_url mapping available for tuple: {} {}".format(url_1, url_2))
                else:
                    list_tuple_canonical_duplicate_urls_ids.append( (id_url_1, id_url_2) )
            if (len(list_tuple_canonical_duplicate_urls_ids) > 0):
                insert_args = ', '.join( [ self._format(t) for t in list_tuple_canonical_duplicate_urls_ids] )
                # Insert. (id_url_canonical_1, id_url_1), ...
                sql_code = "INSERT INTO URLS_DUPLICATE {} VALUES {} ON CONFLICT DO NOTHING;".format("(id_url_canonical, id_url_duplicated)", insert_args)
                # logger.debug("SQL CODE: {}".format(sql_code))
                c = cursor.execute(sql_code)
    def _get_pattern_status_list(self):
        #####################
        ### Get list of domains to filter
        #####################
        # TODO: Cache on redis and query once every N hours? ...
        try:
            with psycopg.connect(self.db_connect_info) as conn:
                # Open cursor
                cursor = conn.cursor()
                # TODO: Cache on Redis
                list_pattern_status = cursor.execute("SELECT pattern, priority, status FROM STATUS_PATTERN_MATCHING;").fetchall()
        except Exception as e:
            logger.warning("Error getting pattern status list: {}".format(str(e)))
            list_pattern_status = []
        return list_pattern_status
    def _get_domains_to_filter(self):
        #####################
        ### Get list of domains to filter
        #####################
        # TODO: Cache on redis and query once every N hours? ...
        try:
            with psycopg.connect(self.db_connect_info) as conn:
                # Open cursor
                cursor = conn.cursor()
                # TODO: Cache on Redis
                sites_to_filter = [e[0] for e in cursor.execute("SELECT url_host FROM WEBSITE_TO_FILTER;").fetchall() ]
        except Exception as e:
            logger.warning("Error getting domains to filter: {}".format(str(e)))
            sites_to_filter = []
        return sites_to_filter
    def _get_cached_source_id(self, source):
        ### Redis: URL processed recently? -> Avoid increasing SERIAL counter & efficiency of DB
        try:
            source_id = self.redis_instance.get(source)
            if (source_id is not None):
                source_id = source_id.decode("utf-8")
        except Exception as e:
            logger.warning("Exception querying Redis: {}".format(str(e)))
            source_id = None
        return source_id
    def _get_source_id(self, cursor, source):
        #####################
        ### Get source corresponding id
        #####################
        # Cached?
        id_source = self._get_cached_source_id(source)
        if (id_source is None):
            c = cursor.execute("SELECT id FROM SOURCE WHERE source='{}'".format(source.replace("'", "''"))).fetchone()
            if (c is None) or (len(c) == 0):
                # Source does not exist, insert and get id
                c = cursor.execute("INSERT INTO SOURCE (source) VALUES ('{}') RETURNING id;".format(source.replace("'", "''"))).fetchone()
            # Decode source id
            id_source = c[0]
        # Cache
        print("*"*10, source, id_source)
        self.redis_instance.set(source, id_source, ex=self.redis_expiry_seconds)
        return id_source
    def _get_urls_id(self, cursor, urls_full):
        #####################
        ### Get id of inserted and filtered URLs
        #####################
        # TODO: Cache url -> url_id, url_canonical
        if (len(urls_full) == 0):
            return []
        # Get inserted and filtered URL ids (unnested). Filtered URLs are also retrieved since they might have been fetched from a new source
        in_inserted_filtered_urls = "(" + ', '.join(["'" + u.replace("'", "''") + "'" for u in urls_full]) + ")"
        id_urls_related = [ i[0] for i in cursor.execute("SELECT id FROM URLS WHERE url IN {};".format(in_inserted_filtered_urls)).fetchall() ]
        return id_urls_related
    def _insert_urls_source(self, cursor, id_urls_related, id_source):
        #####################
        ### Insert URL sources: (id_url_1, id_source), (id_url_2, id_source), ...
        #####################
        if (len(id_urls_related) == 0) or (id_source is None):
            return
        columns = "(id_url, id_source)"
        insert_args = ', '.join( [ self._format([id_url, id_source]) for id_url in id_urls_related ] )
        # Insert
        sql_code = "INSERT INTO URLS_SOURCE {} VALUES {} ON CONFLICT DO NOTHING;".format(columns, insert_args)
        # logger.debug("SQL CODE: {}".format(sql_code))
        c = cursor.execute(sql_code)
    def write_batch(self, urls_fetched, source):
        # Chunks of 50 elements
        n = 50
        # Divide in small chunks
        urls_fetched_chunks = [urls_fetched[i:i + n] for i in range(0, len(urls_fetched), n)]
        # Process
        for urls_fetched_chunk_i in urls_fetched_chunks:
            self._write_small_batch(urls_fetched_chunk_i, source)
    def _write_small_batch(self, urls_fetched, source):
        try:
            logger.info("Fetched #{} URLs, source: {}".format(len(urls_fetched), source))
            if (len(urls_fetched) == 0):
                logger.debug("Empty batch of urls (not writing to DB) for source: {}".format(source))
                return
            # Shuffle URLs to reduce continuous URLs of same URL host (minimize chance of being blocked for too many continuous requests)
            random.shuffle(urls_fetched)
            # Get list of domains to filter
            list_domains_to_filter = self._get_domains_to_filter()
            # Get list of (pattern, priority, status) tuples to override status if required
            list_pattern_status_tuple = self._get_pattern_status_list()
            # Sort pattern tuples by priority
            list_pattern_status_tuple.sort(key=lambda tup: tup[1], reverse=True)
            # Process URLs to update DB
            list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical = self._decode_urls(urls_fetched, list_domains_to_filter, list_pattern_status_tuple)
            # Full set of URL and its canonical form (to associate them to a search), both to insert and filter
            urls_full = set(dict_full_urls_to_canonical.keys()).union( set(dict_full_urls_to_canonical.values()) )
            # Insert
            with psycopg.connect(self.db_connect_info) as conn:
                # Open cursor
                cursor = conn.cursor()
                # Autocommit at end of transaction (Atomic insert of URLs and sources)
                with conn.transaction() as tx:
                    # Insert processed URLs
                    self._insert_urls(cursor, list_insert_url_tuple_args)
                    # Insert URLs duplicated (canonical != fetched url)
                    self._insert_urls_duplicated(cursor, list_tuple_canonical_duplicate_urls)
                    # Get source id in DB
                    id_source = self._get_source_id(cursor, source)
                    # Get IDs of all related URLs
                    id_urls_related = self._get_urls_id(cursor, urls_full)
                    # Insert search source associated to URLs
                    self._insert_urls_source(cursor, id_urls_related, id_source)
            # Update Redis status of inserted and filtered URLs after writing to DB
            for url, url_canonical in dict_full_urls_to_canonical.items():
                try:
                    # Set with updated expiry time
                    self.redis_instance.set(url, url_canonical, ex=self.redis_expiry_seconds)
                    if (url != url_canonical):
                        self.redis_instance.set(url_canonical, url_canonical, ex=self.redis_expiry_seconds)
                except Exception as e:
                    logger.warning("Exception running set in Redis: {}".format(str(e)))
            if (len(list_insert_url_tuple_args) > 0):
                try:
                    webhook_token = os.environ.get("CLIQ_WEBHOOK_TOKEN")
                    endpoint_message = "https://cliq.zoho.com/api/v2/channelsbyname/urlretrievalbot/message?zapikey={}".format(webhook_token)
                    payload = json.dumps({"text": "Fetched #{} new URLs, source: {}".format(len(list_insert_url_tuple_args), source) })
                    r = requests.post(endpoint_message, data=payload)
                except Exception as e:
                    logger.warning("Webhook failed: {}".format(str(e)))
            logger.debug("URL DB write finished")
        except Exception as e:
            logger.warning( "Exception writing to URL_DB:\n{}".format(traceback.format_exc()) )
            logger.debug( "Exception --- List of URLs: {}".format(str(urls_fetched)) )
--- a/OBSOLETE_app_fetcher/src/fetch_feed.py
+++ b/OBSOLETE_app_fetcher/src/fetch_feed.py
@@ -1,48 +0,0 @@
 from .db_utils import DB_Handler
 import feedparser
 import dateutil
 from .logger import get_logger
 logger = get_logger()
 class FetchFeed():
    def __init__(self, db_handler: DB_Handler) -> None:
        logger.debug("Initializing News feed")
        self.db_handler = db_handler
    def run(self):
        try:
            logger.debug("Starting NewsFeed.run()")
            # Get feeds
            list_url_feeds = self.db_handler._get_feed_urls()
            logger.debug("Fetching news from feeds: {}".format(str(list_url_feeds)))
            # Process via RSS feeds
            for url_feed in list_url_feeds:
                # Initialize
                urls_fetched, urls_publish_date = [], []
                # Fetch feeds
                feeds = feedparser.parse(url_feed)
                # Parse
                for f in feeds.get("entries", []):
                    # Get URL
                    url = f.get("link", None)
                    # Process?
                    if (url is not None):
                        # Available publish date?
                        publish_date_parsed = f.get("published_parsed")
                        if (publish_date_parsed is None):
                            publish_date = f.get("published", None)
                            if (publish_date is not None):
                                publish_date_parsed = dateutil.parser.parse(publish_date)
                        # Published date
                        urls_publish_date.append(publish_date_parsed)
                        # URL
                        urls_fetched.append(url)
                # URL fetching source
                source = "feed {}".format(url_feed)
                # Write to DB
                self.db_handler.write_batch(urls_fetched, source)
        except Exception as e:
            logger.warning("Exception in NewsFeed.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/fetch_parser.py
+++ b/OBSOLETE_app_fetcher/src/fetch_parser.py
@@ -1,45 +0,0 @@
 from .db_utils import DB_Handler
 import newspaper
 from .logger import get_logger
 logger = get_logger()
 class FetchParser():
    def __init__(self, db_handler: DB_Handler) -> None:
        logger.debug("Initializing News SiteParsing newspaper4k")
        self.db_handler = db_handler
    # TODO: MOVE LOGIC ELSEWHERE!
    def _postprocess(self, article_urls):
        return [url.replace("#comment-stream", "") for url in article_urls]
    def run(self):
        try:
            logger.debug("Starting NewsSiteParsing.run() for {}")
            # Get URL hosts
            list_url_hosts = self.db_handler._get_url_hosts()
            logger.info("Fetching news by parsing URL hosts: {}".format(str(list_url_hosts)))
            # Process newspaper4k build method
            for url_host_feed in list_url_hosts:
                # Protocol
                if not (url_host_feed.startswith("http")):
                    url_host_feed_formatted = "https://" + url_host_feed
                else:
                    url_host_feed_formatted = url_host_feed
                logger.debug("Fetching newspaper4k parsing based on URL: {}".format(url_host_feed_formatted))
                # Source object
                url_host_built = newspaper.build(url_host_feed_formatted)
                # Get articles URL list
                urls_fetched = url_host_built.article_urls()
                # TODO: MOVE!
                # Post-processing
                urls_fetched = self._postprocess(urls_fetched)
                # URL fetching source
                source = "newspaper4k {}".format(url_host_feed)
                # Write to DB
                self.db_handler.write_batch(urls_fetched, source)
        except Exception as e:
            logger.warning("Exception in NewsSiteParsing.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/fetch_search.py
+++ b/OBSOLETE_app_fetcher/src/fetch_search.py
@@ -1,73 +0,0 @@
 from .db_utils import DB_Handler
 from .utils import get_searxng_instances
 from .fetch_search_sources import FetcherDuckDuckGo, FetcherGNews, FetcherGoogleNews, FetcherSearxNews, FetcherPreSearch
 from .logger import get_logger
 logger = get_logger()
 class FetchSearch():
    def __init__(self, db_handler: DB_Handler, full=True) -> None:
        logger.debug("Initializing News feed")
        self.db_handler = db_handler
        self.full_search = full
    def _run_fetching(self, search_text):
        logger.debug("Starting _run_fetching() for {}".format(search_text))
        # Common parameters
        lang, region = "en", "US"
        ### PreSearch
        dict_params_news = {"search": search_text}
        FetcherPreSearch(**dict_params_news).fetch_articles(self.db_handler)
        ### DuckDuckGo
        period = "d"
        dict_params_news = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "news", "period": period}
        FetcherDuckDuckGo(**dict_params_news).fetch_articles(self.db_handler)
        dict_params_general = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "general", "period": period}
        FetcherDuckDuckGo(**dict_params_general).fetch_articles(self.db_handler)
        if (self.full_search):
            # Avoid site:{} search due to G-Bypass required time
            if ("site:" not in search_text):
                ### GNews
                dict_params = {"search": search_text, "lang": "wt", "region": "wt", "period": period}
                FetcherGNews(**dict_params).fetch_articles(self.db_handler)
                ### GoogleNews
                dict_params_news = {"search": search_text, "lang": lang, "region": region, "search_category": "news", "period": period}
                FetcherGoogleNews(**dict_params_news).fetch_articles(self.db_handler)
                # dict_params_general = {"search": search_text, "lang": lang, "region": region, "search_category": "general", "period": period}
            if False:
                ### SearxNG
                period = "day"
                for searx_instance in get_searxng_instances():
                    dict_params_news = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "news", "period": period}
                    dict_params_general = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "general", "period": period}
                    # Append thread
                    FetcherSearxNews(**dict_params_news).fetch_articles(self.db_handler)
                    FetcherSearxNews(**dict_params_general).fetch_articles(self.db_handler)
        logger.debug("Finished _run_fetching()")
    def run(self):
        try:
            logger.info("Fetching text searches & URL hosts of interest")
            # Get text searches of interest
            list_search_text_of_interest = self.db_handler._get_search_list()
            # Get URL host of interest
            list_url_host = self.db_handler._get_url_host_list()
            # Get text searches for URL hosts
            list_search_text_url_host = ["site:{}".format(l) for l in list_url_host]
            for search_text in list_search_text_of_interest + list_search_text_url_host:
                logger.debug("Fetching news for search: {}".format(search_text))
                self._run_fetching(search_text)
            logger.info("Finished fetching text searches & URL hosts of interest")
        except Exception as e:
            logger.warning("Exception in NewsSearch.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/fetch_search_sources.py
+++ b/OBSOLETE_app_fetcher/src/fetch_search_sources.py
@@ -1,384 +0,0 @@
 from duckduckgo_search import DDGS
 from gnews import GNews
 from GoogleNews import GoogleNews
 import requests
 from bs4 import BeautifulSoup
 import os
 import time
 import json
 import numpy as np
 import random
 from .google_bypass import GoogleByPass
 from abc import ABC, abstractmethod
 from .logger import get_logger
 logger = get_logger()
 # Generic fetcher (fetches articles, writes to DB)
 class FetcherAbstract(ABC):
    @abstractmethod
    def _fetch(self):
        pass
    def fetch_articles(self, db_writer):
        logger.debug("Starting fetch() for {}".format(self.name))
        # Fetch articles
        list_news = self._fetch()
        logger.info("Found #{} articles for search: {}".format(len(list_news), self.name))
        # Write to DB
        db_writer.write_batch(list_news, self.name)
 # https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
 user_agents_list = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/111.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; rv:111.0) Gecko/20100101 Firefox/111.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0",
    "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.44",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 OPR/96.0.0.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 OPR/97.0.0.0",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48",
    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.34",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.39",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; rv:112.0) Gecko/20100101 Firefox/112.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.51",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/112.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/110.0",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
    "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 YaBrowser/23.3.0.2246 Yowser/2.5 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36                       (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
    "Mozilla/5.0 (Windows NT 6.1; rv:102.0) Gecko/20100101 Goanna/6.0 Firefox/102.0 PaleMoon/32.0.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.41",
    "Mozilla/5.0 (Windows NT 10.0; rv:110.0) Gecko/20100101 Firefox/110.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 YaBrowser/23.1.5.708 Yowser/2.5 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
 ]
 class FetcherPreSearch(FetcherAbstract):
    def __init__(self, search):
        """
        # period ->
        - h = hours (eg: 12h)
        - d = days (eg: 7d)
        - m = months (eg: 6m)
        - y = years (eg: 1y)
        """
        self.search = search
        self.period = "1d" # TODO Fixed for the moment
        # self.lang = lang
        # self.region = region
        search_category = "news"
        self.name = "presearch {} {} {}".format(search, search_category, self.period)
    def _fetch(self):
        try:
            # PreSearch fetching endpoint, parameter search keyword
            presearch_fetch_endpoint = "http://selenium_app:80/fetch_presearch/?search_keyword={}".format(self.search)
            # Timeout: 15 minutes
            r = requests.get(presearch_fetch_endpoint, timeout=900)
            # Decode
            list_news = json.loads(r.text).get("list_urls", [])
        except Exception as e:
            logger.warning("Timeout on request: {}. {}".format(presearch_fetch_endpoint, str(e)))
            list_news = []
        return list_news
 class FetcherGNews(FetcherAbstract):
    def __init__(self, search, period, lang="en", region="US"):
        """
        # period ->
        - h = hours (eg: 12h)
        - d = days (eg: 7d)
        - m = months (eg: 6m)
        - y = years (eg: 1y)
        """
        self.search = search
        self.period = period
        self.lang = lang
        self.region = region
        search_category = "news"
        self.name = "gnews {} {} {} {}".format(search, search_category, period, "{}-{}".format(lang, region))
    def _fetch(self):
        try:
            list_dict_news = GNews(self.lang, self.region, period=self.period).get_news(self.search)
            # Decode
            list_news = []
            for l in list_dict_news:
                list_news.append(l.get("url"))
        except Exception as e:
            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
            list_news = []
        # Bypass Google links
        list_news_redirections = GoogleByPass().bypass_google_urls(list_news)
        return list_news_redirections
 class FetcherGoogleNews(FetcherAbstract):
    def __init__(self, search, search_category="news", period="1d", lang="en", region="US"):
        assert(search_category in ["news", "general"])
        self.lang = lang
        self.region = region
        self.period = period
        self.search_category = search_category
        self.search = search
        self.name = "googlenews {} {} {} {}".format(search, search_category, period, "{}-{}".format(lang, region))
    def _fetch(self):
        try:
            # Initialize
            g = GoogleNews(encode="utf-8", period=self.period, lang=self.lang, region=self.region)
            g.enableException(True)
            if (self.search_category == "general"):
                set_links = set()
                # Search
                g.search(self.search)
                # Iterate pages
                MAX_ITER_PAGES = 15
                for i in range(MAX_ITER_PAGES):
                    time.sleep(random.uniform(1, 1.5))
                    num_before = len(set_links)
                    # Get page
                    try:
                        links = g.page_at(i)
                    except Exception as e:
                        logger.warning("Exception fetching page in GoogleNews {}: {}".format(self.name, str(e)))
                        break
                    # Links
                    for l in links:
                        # '/url?esrc=s&q=&rct=j&sa=U&url=https://www.breitbart.com/news/scent-of-luxury-indias-jasmine-infuses-global-perfume/&ved=2ahUKEwjOybGSiN-AAxX1gv0HHfqSBpMQxfQBegQICBAC&usg=AOvVaw06GdoHyzPbIopUaEuUSQPQ'
                        url = l.get("link").split("url=")[-1]
                        set_links.add(url)
                    num_after = len(set_links)
                    # Finished?
                    if (num_before == num_after):
                        logger.debug("Iterated {} pages on GoogleNews general search".format(i))
                        break
                # To list
                list_news = list(set_links)
            elif (self.search_category == "news"):
                # Search
                g.get_news(self.search)
                # Fetch
                list_news = g.get_links()
        except Exception as e:
            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
            list_news = []
        # Bypass Google links
        list_news_redirections = GoogleByPass().bypass_google_urls(list_news)
        return list_news_redirections
 class FetcherDuckDuckGo(FetcherAbstract):
    def __init__(self, search, search_category, period, lang="wt", region="wt"):
        assert(search_category in ["news", "general"])
        assert(period in ["d", "w", "m", "y"])
        self.search = search
        self.search_category = search_category
        self.period = period
        self.lang_region = "{}-{}".format(lang, region)
        self.name = "duckduckgo {} {} {} {}".format(search, search_category, "1{}".format(period), region)
    def _fetch(self):
        try:
            list_news = []
            with DDGS(timeout=10) as ddgs:
                if (self.search_category == "general"):
                    generator_links = ddgs.text(keywords=self.search, timelimit=self.period, region=self.lang_region)
                elif (self.search_category == "news"):
                    generator_links = ddgs.news(keywords=self.search, timelimit=self.period, region=self.lang_region)
                for l in generator_links:
                    list_news.append( l.get("url", l.get("href")) )
        except Exception as e:
            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
            list_news = []
        return list_news
 class FetcherSearxNews(FetcherAbstract):
    def __init__(self, search="child abuse", searx_instance="https://serx.ml/", lang="en", region="US", search_category="news", period="day"):
        assert(search_category in ["news", "general"])
        assert(period in [None, "day", "week", "month", "year"])
        # Random header (minimize prob of web-scrapping detection)
        self.headers = {
            'User-agent': str(np.random.choice(user_agents_list)),
            'Accept-Encoding': 'gzip, deflate', 
            'Accept': '*/*', 
            'Connection': 'keep-alive',
        }
        """ # Optional header
        self.headers = {
            'User-agent': str(np.random.choice(user_agents_list)),
            'Accept-Encoding': 'gzip, deflate, br', 
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'TE': 'trailers',
            'Sec-Fetch-Site': 'cross-site',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Dest': 'document',
        }
        """
        self.search = search
        self.searx_instance = searx_instance
        self.lang_region = "{}-{}".format(lang, region)
        self.search_category = search_category
        self.period = period
        self.t_sleep_lower, self.t_sleep_higher = 0.5, 1.5
        self.request_timeout = 240
        period_name_mapping = {
            None: "no_date_range",
            "day": "1d",
            "week": "1w",
            "month": "1m",
            "year": "1y",
        }
        self.name = "searxng {} {} {} {} {}".format(searx_instance.replace("https://", "").replace("/", ""), search, search_category, period_name_mapping[period], self.lang_region)
        logger.info("SearX - Initialized SearX fetcher: {}".format(self.name))
    def _request_and_decode(self, url_search):
        # Initial random time sleep (minimize chance of getting blocked)
        time.sleep(random.uniform(self.t_sleep_lower, self.t_sleep_higher))
        # Request
        logger.debug("SearX - Searching: {}".format(url_search))
        try:
            r = requests.get(url_search, headers=self.headers, timeout=self.request_timeout)
        except Exception as e:
            logger.warning("SearX - Exception in request: {}".format(url_search), "\n", str(e))
            return []
        if (r.status_code == 200):
            # Status code Ok
            pass
        elif (r.status_code == 429):
            # TooManyRequests, "Rate limit exceeded"
            logger.warning("SearX {} - Too many requests while running: {}. Request output: {}".format(self.name, r.url, r.text))
            return []
        elif (r.status_code != 200):
            logger.warning("SearX {} - Status code: {}. Request output: {}".format(self.name, r.status_code, r.text))
            return []
        else:
            logger.debug("SearX - Status code: {}".format(r.status_code))
        # Decode request
        soup = BeautifulSoup(r.text, 'html.parser')
        page_url_set = set()
        # h3 links
        for elem in soup.find_all('h3'):
            # Get url
            url = elem.find('a').get('href')
            page_url_set.add(url)
        return page_url_set
    def _get_news_list(self):
        ############################################################
        # Domain & search parameter
        search_domain = os.path.join(self.searx_instance, "search?q=")
        # Search keywords
        search_formatted = self.search.replace(" ", "+").replace(":", "%3A")
        # Period formatted
        period_formatted = "&time_range={}".format(self.period) if self.period is not None else ""
        # Search parameters
        search_parameters = "&category_{}=on&language={}{}".format(self.search_category, self.lang_region, period_formatted)
        # Combined url search
        url_search_nopage = "{}{}{}".format(search_domain, search_formatted, search_parameters)
        ############################################################
        # Request and decode on page=1
        url_set = self._request_and_decode(url_search_nopage)
        # No results?
        if (len(url_set) == 0):
            logger.warning("SearX {} - Empty results on search: {}".format(self.name, url_search_nopage))
            return []
        # Iterate pages
        search_numpage = 2
        while True:
            # Combine url search with page number
            url_search_with_page = "{}&pageno={}".format(url_search_nopage, search_numpage)
            # Request and decode on page=X
            url_set_i = self._request_and_decode(url_search_with_page)
            # Length before merging
            length_current = len(url_set)
            # Merge
            url_set = url_set.union(url_set_i)
            # Length after merging
            length_merged = len(url_set)
            # No new elements?
            if (length_current == length_merged):
                logger.debug("SearX {} - Finished processing search, #pages: {}".format(self.name, search_numpage))
                break
            # Next page
            search_numpage += 1
        return list(url_set)
    def _fetch(self):
        try:
            # Fetch news
            list_news = self._get_news_list()
        except Exception as e:
            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
            list_news = []        
        return list_news
--- a/OBSOLETE_app_fetcher/src/google_bypass.py
+++ b/OBSOLETE_app_fetcher/src/google_bypass.py
@@ -1,26 +0,0 @@
 import requests
 import json
 from .logger import get_logger
 logger = get_logger()
 class GoogleByPass():
    def __init__(self) -> None:
        pass
    def bypass_google_urls(self, list_urls):
        if (len(list_urls) == 0):
            return []
        try:
            # Endpoint
            gbypass_endpoint = "http://selenium_app:80/get_redirection"
            # Timeout: 20 minutes
            timeout = 60*20
            r = requests.post(gbypass_endpoint, json={"list_urls": list_urls}, timeout=timeout)
            # Decode
            list_urls_redirections = json.loads(r.text).get("list_urls_redirections", [])
        except Exception as e:
            logger.warning("Exception on request: {}. {}".format(gbypass_endpoint, str(e)))
            list_urls_redirections = []
        return list_urls_redirections
--- a/OBSOLETE_app_fetcher/src/logger.py
+++ b/OBSOLETE_app_fetcher/src/logger.py
@@ -1,22 +0,0 @@
 import logging
 import os
 os.makedirs("logs", exist_ok=True)
 logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')
 logger = logging.getLogger("news_fetcher")
 logger.setLevel(logging.INFO)
 # To file log: INFO / WARNING / ERROR
 fh = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher.log", mode="a", maxBytes=10000000, backupCount=4)
 fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
 logger.addHandler(fh)
 # To file log: WARNING / ERROR
 fh_ = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_error.log", mode="a", maxBytes=10000000, backupCount=1)
 fh_.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
 fh_.setLevel(logging.WARNING)
 logger.addHandler(fh_)
 def get_logger():
    return logger
--- a/OBSOLETE_app_fetcher/src/missing_kids_fetch.py
+++ b/OBSOLETE_app_fetcher/src/missing_kids_fetch.py
@@ -1,36 +0,0 @@
 from .db_utils import DB_Handler
 import requests
 import json
 from .logger import get_logger
 logger = get_logger()
 class MissingKidsFetch():
    def __init__(self, db_handler: DB_Handler, num_pages) -> None:
        logger.debug("Initializing News MissingKids")
        self.db_handler = db_handler
        self.num_pages = num_pages
        self.missingkids_fetch_endpoint = "http://selenium_app:80/get_missing_kids/?pages={}"
    def run(self):
        try:
            logger.debug("Starting NewsMissingKids.run()")
            try:
                # Timeout
                if (self.num_pages > 15):
                    timeout = 60*90 # 1.5h
                else:
                    timeout = 60*5  # 5 min
                # Request
                r = requests.get(self.missingkids_fetch_endpoint.format(self.num_pages), timeout=timeout)
                # Decode
                urls_fetched = json.loads(r.text).get("list_urls", [])
            except Exception as e:
                logger.warning("Timeout on request: {}. {}".format(missingkids_fetch_endpoint, str(e)))
                urls_fetched = []
            # URL fetching source
            source = "missingkids fetcher"
            # Write to DB
            self.db_handler.write_batch(urls_fetched, source)
        except Exception as e:
            logger.warning("Exception in NewsMissingKids.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/missing_kids_status.py
+++ b/OBSOLETE_app_fetcher/src/missing_kids_status.py
@@ -1,98 +0,0 @@
 from .db_utils import URL_DB_Writer
 from .url_utils import get_missing_kid_status
 from .logger import get_logger
 logger = get_logger()
 def get_missing_kid_status(url, return_canonical_url=False):
    import time
    import requests
    # Sleep
    time.sleep(0.75)
    try:
        # Request
        r = requests.get(url, timeout=300)
        # Decode
        status_code = r.status_code
        # Canonical URL removing parameters
        url_canonical = r.url
    except Exception as e:
        logger.warning("Exception on get URL status request: {}. {}".format(url, str(e)))
        status_code = None
        url_canonical = url
    if (status_code == 200):
        status = "valid"
    elif (status_code == 404):
        status = "invalid"
    else:
        status = "unknown"
    logger.debug("Missing Kid URL {} status: {}".format(url, status))
    if (return_canonical_url):
        return status, url_canonical
    else:
        return status
 class MissingKidsStatus():
    def __init__(self, db_connect_info, redis_connect_info, num_urls) -> None:
        self.num_urls = num_urls
        self.db_connect_info = db_connect_info
        self.redis_connect_info = redis_connect_info
        self.db_writer = URL_DB_Writer(db_connect_info, redis_connect_info)
    def update_missing_kids_status(self):
        try:
            logger.info("Starting updating status to Missing Kids URLs, limit #URLs: {}".format(self.num_urls))
            # List of URLs
            list_ids_and_urls = self.db_writer._get_missing_kids_urls(self.num_urls)
            # Dict: status -> IDs to update to new status
            dict_status_ids, dict_status_urls = {}, {}
            # Check URLs with invalid status?
            skip_invalid_check = False
            flush_every, flush_current = 20, 0
            # Iterate URLs
            for (id, url, current_status) in list_ids_and_urls:
                # Skip duplicate URLs
                if (current_status == "duplicate"):
                    continue
                # Skip invalid URLs?
                if (skip_invalid_check):
                    if (current_status == "invalid"):
                        continue
                # Get status
                new_status = get_missing_kid_status(url)
                # Different? Update
                if (current_status != new_status):
                    # Extend array
                    dict_status_ids[new_status] = dict_status_ids.get(new_status, []) + [id]
                    # Debugging dict
                    dict_status_urls[new_status] = dict_status_urls.get(new_status, []) + [url]
                    # +1 processed
                    flush_current += 1
                # Flush batch?
                if (flush_every == flush_current):
                    logger.info("Updating status to Missing Kids URLs: {}".format(dict_status_urls))
                    # Update DB
                    self.db_writer._update_urls_status(dict_status_ids)
                    # Reset
                    flush_current = 0
                    dict_status_ids, dict_status_urls = {}, {}
            # Flush remaining batch
            if (flush_current > 0):
                logger.info("Updating status to Missing Kids URLs: {}".format(dict_status_urls))
                # Update DB
                self.db_writer._update_urls_status(dict_status_ids)
                # Reset
                flush_current = 0
                dict_status_ids, dict_status_urls = {}, {}
            logger.info("Finished updating status to Missing Kids URLs")
        except Exception as e:
            logger.warning("Exception in MissingKidsStatus.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/url_status.py
+++ b/OBSOLETE_app_fetcher/src/url_status.py
@@ -1,62 +0,0 @@
 from .db_utils import URL_DB_Writer
 from .url_utils import process_article
 from .logger import get_logger
 logger = get_logger()
 class UpdateErrorURLs():
    def __init__(self, db_connect_info, redis_connect_info, num_urls) -> None:
        self.num_urls = num_urls
        self.db_connect_info = db_connect_info
        self.redis_connect_info = redis_connect_info
        self.db_writer = URL_DB_Writer(db_connect_info, redis_connect_info)
    def update_error_urls_status(self):
        try:
            logger.info("Starting updating status to URLs with error, limit #URLs: {}".format(self.num_urls))
            # List of URLs with status 'error'
            list_ids_and_urls = self.db_writer._get_error_urls(self.num_urls)
            # Current status
            current_status = "error"
            # Dict: status -> IDs to update to new status
            dict_status_ids, dict_status_urls = {}, {}
            # Get list of (pattern, priority, status) tuples to override status if required
            list_pattern_status_tuple = self.db_writer._get_pattern_status_list()
            # Sort pattern tuples by priority
            list_pattern_status_tuple.sort(key=lambda tup: tup[1], reverse=True)
            flush_every, flush_current = 20, 0
            # Iterate URLs
            for (id, url) in list_ids_and_urls:
                # Get status
                url_canonical, article_elements, new_status = process_article(url, list_pattern_status_tuple)
                # Different? Update
                if (current_status != new_status):
                    # Extend array
                    dict_status_ids[new_status] = dict_status_ids.get(new_status, []) + [id]
                    # Debugging dict
                    dict_status_urls[new_status] = dict_status_urls.get(new_status, []) + [url]
                    # +1 processed
                    flush_current += 1
                # Flush batch?
                if (flush_every == flush_current):
                    logger.info("Updating status to URLs with error: {}".format(dict_status_urls))
                    # Update DB
                    self.db_writer._update_urls_status(dict_status_ids)
                    # Reset
                    flush_current = 0
                    dict_status_ids, dict_status_urls = {}, {}
            # Flush remaining batch
            if (flush_current > 0):
                logger.info("Updating status to URLs with error: {}".format(dict_status_urls))
                # Update DB
                self.db_writer._update_urls_status(dict_status_ids)
                # Reset
                flush_current = 0
                dict_status_ids, dict_status_urls = {}, {}
            logger.info("Finished updating status to URLs with error")
        except Exception as e:
            logger.warning("Exception in UpdateErrorURLs.run(): {}".format(str(e)))
--- a/OBSOLETE_app_fetcher/src/url_utils.py
+++ b/OBSOLETE_app_fetcher/src/url_utils.py
@@ -1,262 +0,0 @@
 from gnews import GNews
 import dateutil.parser
 from datetime import datetime, timedelta
 from .utils import remove_http_s
 import time
 import random
 import traceback
 import requests
 import json
 import re
 from bs4 import BeautifulSoup
 from .logger import get_logger
 logger = get_logger()
 def get_published_date(article):
    try:
        """
        # Already fetched publish date information?
        if (publish_date_ is not None):
            return publish_date_
        """
        # List of potential publish dates
        potential_dates = []
        # Publish date is the best match
        potential_dates.append(article.publish_date)
        # Publish date metadata is the following best match
        potential_dates.append(article.meta_data.get('article', {}).get("published_time", None))
        # Iterate remaining keys
        for key in article.meta_data.keys():
            if ("date" in key):
                potential_dates.append(article.meta_data[key])
        def invalid_date(p_date):
            # Today + 2 days, article from the future?
            today_plus_two = datetime.utcnow() + timedelta(days=2)
            # Article from the future?
            return p_date.timestamp() > today_plus_two.timestamp()
        for date_ in potential_dates:
            # String date? parse
            if (type(date_) == str):
                try:
                    date_ = dateutil.parser.parse(date_)
                except Exception as e:
                    logger.info("Invalid date found while parsing potential date: {} for URL: {}".format(date_, article.url))
                    date_ = None
            # Valid?
            if (date_ is not None) and (not invalid_date(date_)):
                return date_
        logger.debug("Article with no published date: {}".format(article.url))
        return None
    except Exception as e:
        logger.info("Error while retrieving published date for URL: {}".format(article.url))
        return None
 def get_url_host(article_source_url, url):
    # https://www.blabla.com/blabla -> www.blabla.com
    if (article_source_url != ""):
        # Article source URL already extracted, save path if any
        return remove_http_s(article_source_url) # .split("/")[0]
    else:
        return remove_http_s(url).split("/")[0]
 def get_status_pattern_matching(url, article_status, list_pattern_status_tuple):
    # Regex pattern to update status on "valid", "invalid", and "unknown" status only
    # Status "raw", "duplicated" and "error" should remain the way they are
    # Assumption: List of patterns sorted by importance
    if (article_status in ["valid", "invalid", "unknown"]):
        # Regular expression pattern matching: https://regexr.com/
        for regex_pattern, regex_priority, status_if_match in list_pattern_status_tuple:
            # Matching?
            matching = bool(re.match(regex_pattern, url))
            # Update article status
            if (matching):
                if (status_if_match != article_status):
                    logger.debug("Regex pattern found, updating status from '{}' to '{}' for URL: {}".format(article_status, status_if_match, url))
                return status_if_match
    # Pattern matching not required or not found, original article status
    return article_status
 def bypass_google_link(article_url):
    def bypass_google_consent(article_url):
        # Sample URL: https://consent.google.com/m?continue=https://news.google.com/rss/articles/CBMiMGh0dHBzOi8vd3d3Lm1pc3NpbmdraWRzLm9yZy9wb3N0ZXIvbmNtYy84NjAxMTkvMdIBAA?oc%3D5&gl=NL&m=0&pc=n&cm=2&hl=en-US&src=1
        article_url_no_consent = article_url.replace("https://consent.google.com/m?continue=", "")
        # https://stackoverflow.com/questions/76063646/how-can-i-have-redirection-link-from-google-news-link-using-requests
        headers = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
        }
        cookies = {'CONSENT': 'YES+cb.20220419-08-p0.cs+FX+111'}
        try:
            # Request
            r = requests.get(article_url_no_consent, headers=headers, cookies=cookies, timeout=300)
            # Decode
            soup = BeautifulSoup(r.text, 'html.parser')
            url_of_interest = soup.a['href']
        except Exception as e:
            logger.warning("Exception on request trying to G_bypass with headers: {}. {}".format(article_url_no_consent, str(e)))
            url_of_interest = None
        # Not able to bypass?
        if (url_of_interest == "") or ("support.google.com" in url_of_interest) or ("news.google.com" in url_of_interest):
            url_of_interest = None
        return url_of_interest
    def bypass_google_using_service(article_url):
        try:
            # e.g.: url = "https://news.google.com/articles/CBMiX2h0dHBzOi8vd3d3LmZveGJ1c2luZXNzLmNvbS9wb2xpdGljcy9kaXNuZXktc3Vlcy1mbG9yaWRhLWdvdi1yb24tZGVzYW50aXMtbG9zcy1zcGVjaWFsLWRpc3RyaWN00gEA?hl=en-US&gl=US&ceid=US%3Aen"
            gbypass_endpoint = "http://selenium_app:80/get_redirection"
            # Timeout: 5 minutes
            r = requests.post(gbypass_endpoint, json={"url": article_url}, timeout=300)
            # Decode
            redirect_url = json.loads(r.text).get("redirect_url", "")
        except Exception as e:
            logger.warning("Exception on request: {}. {}".format(gbypass_endpoint, str(e)))
            redirect_url = ""
        return redirect_url
    logger.debug("Starting gbypass_endpoint()")
    article_url_bypassed = None
    # Bypass using request
    if ("consent.google.com" in article_url):
        article_url_bypassed = bypass_google_consent(article_url)
    # Not bypassed yet? Bypass using service
    if (article_url_bypassed is None):
        article_url_bypassed = bypass_google_using_service(article_url)
    # if (article_url_bypassed is None) or (article_url_bypassed == "") or ("news.google.com" in article_url_bypassed):
    if (article_url_bypassed == "") or (article_url_bypassed is None):
        # Empty URL returned by Gbypass
        logger.warning("Error while bypassing Gnews for URL: {}".format(article_url))
        return None
    else:
        logger.debug("Correctly bypassed GNews to URL_redirect, from URL: {} {}".format(article_url_bypassed, article_url))
        return article_url_bypassed
 def process_article(article_url, list_pattern_status_tuple, language="en"):
    # TODO:
    """
    https://github.com/fhamborg/news-please
    https://github.com/fhamborg/Giveme5W1H
    https://github.com/santhoshse7en/news-fetch
    """
    try:
        logger.debug("Starting process_article()")
        if ("news.google.com" in article_url) or ("consent.google.com" in article_url):
            # Bypass to get redirection
            article_url = bypass_google_link(article_url)
            # Error?
            if (article_url is None):
                return None, {}, "error"
        elif ("missingkids.org/poster" in article_url):
            # Get status
            article_status, url_canonical = get_missing_kid_status(article_url, return_canonical_url=True)
            article_elements = {
                "url_full": article_url,
                "url_canonical": url_canonical
            }
            return url_canonical, article_elements, article_status
        else:
            # Avoid Too many requests (feeds, ...)
            time.sleep(0.75)
        logger.debug("Processing: {}".format(article_url))
        # Default status unless something happens
        article_status = "valid"
        # Parse article
        # TODO: :param proxy: The proxy parameter is a dictionary with a single key-value pair. self._proxy = {'http': proxy, 'https': proxy} if proxy else None
        # TODO: Language per config
        article = GNews(language).get_full_article(url=article_url)
        # Article parsed?
        if (article is None) or (not article.is_parsed):
            logger.debug("Article not parsed: {}".format(article_url))
            return article_url, {}, "error"
        # Canonical link as main URL
        url_canonical = article.canonical_link
        # Empty canonical URL?
        if (article.canonical_link is None) or (article.canonical_link == ""):
            # URL with parameters? e.g. some zerohedge news fetched from newspaper3k end with #comment-stream -> Remove extra parameter in link
            if ("?" in article.url) or (article.url.endswith("#comment-stream")) or (article.url.endswith("#disqus_thread")):
                logger.debug("Article URL contains parameters, trying to clean URL: {}".format(article.url))
                try:
                    # Remove text after parameter call
                    url = article.url.split("?")[0]
                    # Remove comment-stream
                    url = url.replace("#comment-stream", "").replace("#disqus_thread", "")
                    # Article
                    article_attempt = GNews(language).get_full_article(url=url)
                    # Retrieving same title? Update article based on clean URL
                    if (article_attempt is not None) and (article_attempt.title == article.title):
                        article = article_attempt
                except Exception as e:
                    logger.info("Article parsing of URL without parameters failed: {}".format(article.url))
            else:  # Default behaviour
                logger.debug("Article canonical link is empty, assuming URL=URL_CANONICAL: {}".format(article.url))
            # By default, URL same as canonical
            url_canonical = article.url
        elif (article.url != article.canonical_link):
            # If different, stick to canonical URL
            logger.debug("Article URL and canonical link are different: {} {}".format(article.url, article.canonical_link))
        else:
            # If same, continue...
            pass
        # Update config to determine if content is valid
        article.config.MIN_WORD_COUNT = 150
        article.config.MIN_SENT_COUNT = 6
        # Valid URL?
        if (not article.is_valid_url()):
            logger.debug("Not a valid news article: {}".format(url_canonical))
            article_status = "invalid"
        # Is the article's body text is long enough to meet standard article requirements?
        if (not article.is_valid_body()):
            logger.debug("Article body not valid: {}".format(url_canonical))
            article_status = "unknown"
        if (article.images != article.imgs):
            logger.debug("Article images and imgs are different: {} {}".format(article.images, article.imgs))
        # article.keywords, article.meta_keywords, article.summary
        # article.movies 
        # article.top_image
        # Check if article status needs to be updated
        article_status = get_status_pattern_matching(url_canonical, article_status, list_pattern_status_tuple)
        article_elements = {
            'url_full': article.url,                          # https://www.breitbart.com/tech/2022/10/03/report-election-integrity-project-worked-with-feds-to-censor-news-sites-in-2020/
            'url_host': get_url_host(article.source_url, url_canonical),    # www.breitbart.com
            'title': article.title,                           # Report: ‘Election Integrity’ Partnership Worked with Feds to Censor News Sites in 2020
            'description': article.meta_description,          # Coalition committed to respond in ‘early 2022’ but failed to do so, while Labor has not issued a full response since taking office
            'text': article.text,                             # ${Article content}
            'published_date': get_published_date(article),    # python.datetime format, obtained from "YYYY-MM-DD" or '2022-10-03T20:54:17+00:00'
            'authors': article.authors,                       # ['Christopher Knaus']
            'language': article.meta_lang,                    # en
            'tags': list(article.tags),                       # ['Wide Open Border', '’My Son Hunter’ Movie', ...]
            'images': list(article.images),                   # [URL_IMAGE_1, URL_IMAGE_2, ...]
            'url_canonical': url_canonical,                   # Canonical URL (redirection)
            # 'html': article.html,                           # HTML article
        }
        logger.debug("Processing OK: {}".format(url_canonical))
        return url_canonical, article_elements, article_status
    except Exception as e:
        logger.warning("Exception processing url: {}\n{}".format(article_url, traceback.format_exc()))
        return None, {}, "error"
--- a/OBSOLETE_app_fetcher/src/utils.py
+++ b/OBSOLETE_app_fetcher/src/utils.py
@@ -1,33 +0,0 @@
 def remove_http_s(url):
    url = url.replace("https://", "") if url.startswith("https://") else url
    url = url.replace("http://", "") if url.startswith("http://") else url
    return url
 def is_valid_url(url):
    if (url.startswith("https://")):
        return True
    else:
        return False
 def get_searxng_instances():
    # SearxNG instances: https://searx.space/
    searx_instances = set()
    searx_instances.add("https://searx.work/")
    searx_instances.add("https://search.ononoki.org/")
    searx_instances.add("https://searxng.nicfab.eu/")
    searx_instances.add("https://searx.be/")    
    # searx_instances.add("https://searx.fmac.xyz/")
    # searx_instances.add("https://northboot.xyz/") # FIX
    # searx_instances.add("https://serx.ml/") # Offline
    # searx_instances.add("https://searx.ru/")
    # searx_instances.add("https://searx.sp-codes.de/")
    # searx_instances.add("https://searxng.nicfab.eu/")
    # searx_instances.add("https://s.frlt.one/")
    # searx_instances.add("https://search.sapti.me/")
    # To list
    list_searx_instances = list(searx_instances)
    return list_searx_instances
--- a/app_selenium/README.md
+++ b/app_selenium/README.md
@@ -0,0 +1,3 @@
 * Missing kids posters fetch (num_pages=X)
 * ...
--- a/app_urls/api/models.py
+++ b/app_urls/api/models.py
@@ -17,7 +17,7 @@ class Search(models.Model):
        db_table = 'search'
    def __str__(self):
-        return "[{}]->{}".format(self.type, self.search)
+        return "[{}: {}]".format(self.type, self.search)
 class Source(models.Model):
    id = models.SmallAutoField(primary_key=True)
--- a/app_urls/api/src/db_utils.py
+++ b/app_urls/api/src/db_utils.py
@@ -130,7 +130,7 @@ class DB_Handler():
            # Get or create URL with canonical form
            obj_url_canonical, created = Urls.objects.get_or_create(url=dict_url_data.get("url_canonical"))
            # Get the source-search IDs associated to obj_url.id
-            list_url_source_search = UrlsSourceSearch.objects.fiter(id_url=obj_url)
+            list_url_source_search = UrlsSourceSearch.objects.filter(id_url=obj_url)
            for obj_url_source_search in list_url_source_search:
                # Associate same sources to url_canonical (it might already exist)
                UrlsSourceSearch.objects.get_or_create(id_url=obj_url_canonical, id_source=obj_url_source_search.id_source, id_search=obj_url_source_search.id_search)
--- a/app_urls/api/templates/item_list.html
+++ b/app_urls/api/templates/item_list.html
@@ -9,7 +9,7 @@
    <script>
-        function getQueryString(pageNumber, itemsNumber, sources, statuses){
+        function getQueryString(pageNumber, itemsNumber, sources, searches, statuses){
            // Query parameters. If input is null, get most recent value
            let queryParams = new URLSearchParams(window.location.search);
            // page
@@ -21,6 +21,9 @@
            // sources
            if (sources == null) sources = queryParams.get("sources") ?? "all";
            queryParams.set("sources", sources);
            // searches
            if (searches == null) searches = queryParams.get("searches") ?? "all";
            queryParams.set("searches", searches);
            // status
            if (statuses == null) statuses = queryParams.get("status") ?? "all";
            queryParams.set("status", statuses);
@@ -33,11 +36,11 @@
            return queryParamsString;
        }
-        function loadPage(pageNumber, itemsNumber, sources, statuses) {
+        function loadPage(pageNumber, itemsNumber, sources, searches, statuses) {
            $("#item-list").fadeTo(100, 0.5); // Smooth fade effect
            $("#loading").show();
-            queryParamsString = getQueryString(pageNumber, itemsNumber, sources, statuses);
+            queryParamsString = getQueryString(pageNumber, itemsNumber, sources, searches, statuses);
            $.ajax({
                url: "?" + queryParamsString,
@@ -58,7 +61,7 @@
        $(document).on("click", ".pagination a", function (event) {
            event.preventDefault();
            let page = $(this).attr("data-page");
-            loadPage(pageNumber=page, itemsNumber=null, sources=null, statuses=null);
+            loadPage(pageNumber=page, itemsNumber=null, sources=null, searches=null, statuses=null);
        });
        $(document).ready(function () {
@@ -68,25 +71,63 @@
            ////////////////////////////////////////////////////////////////////////////
            const sourcesToggleAll = $("#toggle-all-sources");
            const sourcesCheckboxes = $(".source-checkbox");
            const searchesToggleAll = $("#toggle-all-searches");
            const searchesCheckboxes = $(".search-checkbox");
            const statusesToggleAll = $("#toggle-all-status");
            const statusCheckboxes = $(".status-checkbox");
            function updateFilters() {
-                // Get selected sources
+                // Get selected sources                
-                let selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
+                if (sourcesToggleAll.prop("checked")) {
-                    return $(this).val();
+                    selectedSources = "all";
-                }).get().join(",");
+                }
                else {
                    if (sourcesCheckboxes.filter(":checked").length > 0 ){
                        selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
                            return $(this).val();
                        }).get().join(",");
                    }
                    else {
                        selectedSources = "none";
                    }
                }
                // Get selected searches
                if (searchesToggleAll.prop("checked")) {
                    selectedSearches = "all";
                }
                else {
                    if (searchesCheckboxes.filter(":checked").length > 0 ){
                        selectedSearches = searchesCheckboxes.filter(":checked").map(function () {
                            return $(this).val();
                        }).get().join(",");
                    }
                    else {
                        selectedSearches = "none";
                    }
                }
                // Get selected URL statuses
-                let selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
+                if (statusesToggleAll.prop("checked")) {
-                    return $(this).val();
+                    selectedStatuses = "all";
-                }).get().join(",");
+                }
                else {
                    if (statusCheckboxes.filter(":checked").length > 0 ){
                        selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
                            return $(this).val();
                        }).get().join(",");
                    }
                    else {
                        selectedStatuses = "none";
                    }
                }
                // Get selected items per page
                let selectedItems = $("input[name='items']:checked").val();
                // Update pagination and reload data
-                loadPage(1, selectedItems, selectedSources, selectedStatuses);
+                loadPage(1, selectedItems, selectedSources, selectedSearches, selectedStatuses);
            }
            ////////////////////////////////////////////////////////////////////////////
@@ -101,6 +142,15 @@
                sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
                updateFilters();
            });
            // Searches
            searchesToggleAll.on("change", function () {
                searchesCheckboxes.prop("checked", searchesToggleAll.prop("checked"));
                updateFilters();
            });
            searchesCheckboxes.on("change", function () {
                searchesToggleAll.prop("checked", searchesCheckboxes.length === searchesCheckboxes.filter(":checked").length);
                updateFilters();
            });
            // Status
            statusesToggleAll.on("change", function () {
                statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
@@ -121,11 +171,15 @@
            // Sources
            sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
            sourcesToggleAll.prop("checked", true);
            // Searches
            searchesCheckboxes.each(function () { $(this).prop("checked", true); });
            searchesToggleAll.prop("checked", true);
            // Statuses
            statusCheckboxes.each(function () { $(this).prop("checked", true); });
            statusesToggleAll.prop("checked", true);
            // Items
-            $("input[name='items'][value='" + 15 + "']").prop("checked", true);
+            // $("input[name='items'][value='" + 15 + "']").prop("checked", true);
            // loadPage(pageNumber=page, itemsNumber=null, sources=null, searches=null, statuses=null);
        });
        ////////////////////////////////////////////////////////////////////////////
@@ -148,6 +202,23 @@
            let savedTheme = localStorage.getItem("theme") || 
                (window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
            setTheme(savedTheme);
            // Local browser timestamp aware for ts_fetch print
            document.querySelectorAll(".timestamp").forEach(function (el) {
                const ts = el.getAttribute("data-ts");
                if (ts) {
                    const options = {
                        day: "2-digit",
                        month: "2-digit",
                        year: "numeric",
                        hour: "2-digit",
                        minute: "2-digit",
                        second: "2-digit",
                        hour12: false  // Use 24-hour format
                    }; // "en-GB" for DD-MM-YYYY
                    const localDate = new Date(ts).toLocaleString("en-GB", options);  // Adjust to browser's timezone
                    el.innerHTML = `${localDate}`;
                }
            });
        });
        ////////////////////////////////////////////////////////////////////////////
    </script>
@@ -174,6 +245,9 @@
            box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
            padding: 15px;
            transition: width 0.3s ease;
            /* Enable scrolling */
            overflow-y: auto;
            max-height: 100vh;
        }
        #sidebar .nav-link {
@@ -313,10 +387,10 @@
        }
        th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
-        th:nth-child(2), td:nth-child(2) { width: 20%; } /* Fetch Date */
+        th:nth-child(2), td:nth-child(2) { width: 27.5%; } /* Fetch Date */
-        th:nth-child(3), td:nth-child(3) { width: 20%; } /* Sources */
+        th:nth-child(3), td:nth-child(3) { width: 10%; } /* Sources */
-        th:nth-child(4), td:nth-child(4) { width: 5%; } /* Status */
+        th:nth-child(4), td:nth-child(4) { width: 10%; } /* Searches */
-        th:nth-child(5), td:nth-child(5) { width: 5%; } /* Action */
+        th:nth-child(5), td:nth-child(5) { width: 2.5%; } /* Status */
        /* ============================= */
        /* Pagination Styling            */
@@ -407,33 +481,23 @@
                    <span id="theme-icon">🌙</span>
                </button>
            </div>
            <!-- Sources -->
            <div class="nav-item mt-3">
                <strong>Select sources</strong>
                <form id="source-filter-form">
                    <!-- Toggle All Checkbox -->
                    <div class="form-check">
                        <input class="form-check-input" type="checkbox" id="toggle-all-sources">
                        <label class="form-check-label fw-bold" for="toggle-all-sources">
                            Toggle all
                        </label>
                    </div>
-                    <!-- Individual Source Checkboxes -->
+            <!-- URLs per page -->
-                    {% for source in sources %}
+            <div class="nav-item mt-3">
-                        <div class="form-check">
+                <strong>URLs per page</strong>
-                            <input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
+                <div class="card-body">
-                            <label class="form-check-label" for="source-{{ source.id }}">
+                    <!-- Individual Status Checkboxes -->
-                                {{ source.source }}
+                    {% for url_per_page in list_urls_per_page %}
-                            </label>
+                        <div class="items-form-check">
                            <input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
                            <label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
                        </div>
                    {% empty %}
                    <tr>
-                        <td colspan="2" class="text-center">No sources available.</td>
+                        <td colspan="2" class="text-center">No options available.</td>
                    </tr>
                    {% endfor %}
-                </form>
+                </div>
            </div>
            <!-- Status -->
@@ -457,6 +521,33 @@
                            </label>
                        </div>
                    {% empty %}
                    <tr>
                        <td colspan="2" class="text-center">No statuses available.</td>
                    </tr>
                    {% endfor %}
                </form>
            </div>
            <!-- Sources -->
            <div class="nav-item mt-3">
                <strong>Select sources</strong>
                <form id="source-filter-form">
                    <!-- Toggle All Checkbox -->
                    <div class="form-check">
                        <input class="form-check-input" type="checkbox" id="toggle-all-sources">
                        <label class="form-check-label fw-bold" for="toggle-all-sources">
                            Toggle all
                        </label>
                    </div>
                    <!-- Individual Source Checkboxes -->
                    {% for source in sources %}
                        <div class="form-check">
                            <input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
                            <label class="form-check-label" for="source-{{ source.id }}">
                                {{ source.source }}
                            </label>
                        </div>
                    {% empty %}
                    <tr>
                        <td colspan="2" class="text-center">No sources available.</td>
                    </tr>
@@ -464,26 +555,34 @@
                </form>
            </div>
-            <!-- URLs per page -->
+            <!-- Searches -->
            <div class="nav-item mt-3">
-                <strong>URLs per page</strong>
+                <strong>Select searches</strong>
-                <div class="card-body">
+                <form id="search-filter-form">
-                    <!-- Individual Status Checkboxes -->
+                    <!-- Toggle All Checkbox -->
-                    {% for url_per_page in list_urls_per_page %}
+                    <div class="form-check">
-                        <div class="items-form-check">
+                        <input class="form-check-input" type="checkbox" id="toggle-all-searches">
-                            <input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
+                        <label class="form-check-label fw-bold" for="toggle-all-searches">
-                            <label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
+                            Toggle all
                        </label>
                    </div>
                    <!-- Individual Search Checkboxes -->
                    {% for search in searches %}
                        <div class="form-check">
                            <input class="form-check-input search-checkbox" type="checkbox" value="{{ search.id }}" id="search-{{ search.id }}">
                            <label class="form-check-label" for="search-{{ search.id }}">
                                [{{ search.type }}] {{ search.search }}
                            </label>
                        </div>
                    {% empty %}
                    <tr>
-                        <td colspan="2" class="text-center">No options available.</td>
+                        <td colspan="2" class="text-center">No search available.</td>
                    </tr>
                    {% endfor %}
-                </div>
+                </form>
            </div>
        </ul>
    </div>
--- a/app_urls/api/templates/item_list_partial.html
+++ b/app_urls/api/templates/item_list_partial.html
@@ -7,15 +7,18 @@
                <th scope="col"><strong>URL</strong></th>
                <th scope="col"><strong>Fetch date</strong></th>
                <th scope="col"><strong>Sources</strong></th>
                <th scope="col"><strong>Search</strong></th>
                <th scope="col"><strong>Status</strong></th>
                <th scope="col"><strong>Action</strong></th>
            </tr>
        </thead>
        <tbody>
            {% for item in page_obj %}
                <tr>
-                    <td><a href="{{ item.url }}/" target="_blank">{{ item.url }}</a></td>
+                    <td>
-                    <td>{{ item.ts_fetch }}</td>
+                        <a href="./{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">➤ </a>
                        <a href="{{ item.url }}/" target="_blank">{{ item.url }}</a>
                    </td>
                    <td class="timestamp" data-ts="{{ item.ts_fetch|date:'c' }}">{{ item.ts_fetch }}</td>
                    <td>
                        {% with sources_map|dict_get:item.id as sources %}
                            {% if sources %}
@@ -27,6 +30,17 @@
                            {% endif %}
                        {% endwith %}
                    </td>
                    <td>
                        {% with searches_map|dict_get:item.id as searches %}
                            {% if searches %}
                                {% for search in searches %}
                                    <span class="badge bg-secondary">{{ search }}</span>
                                {% endfor %}
                            {% else %}
                                <span class="text-muted">No searches</span>
                            {% endif %}
                        {% endwith %}
                    </td>
                    <td>
                        {% if item.status == 'raw' %}
                            <span class="badge bg-secondary">{{ item.status|capfirst }}</span>
@@ -43,11 +57,7 @@
                        {% else %}
                            <span class="badge bg-light">Unknown</span>
                        {% endif %}
-                    </td>
+                    </td>        
                    <td>
                        <a href="url/{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">Details</a>  
                    </td>
                </tr>
            {% empty %}
                <tr>
--- a/app_urls/api/templates/url_detail.html
+++ b/app_urls/api/templates/url_detail.html
@@ -54,7 +54,7 @@
        }
        // Fetch URL
-        let fetchUrl = `/news/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
+        let fetchUrl = `/api/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
        let resultContainer = $("#chat-output");
        resultContainer.html(""); // Clear previous content before fetching
@@ -99,12 +99,6 @@
                        // Render Markdown progressively (but safely)
                        messageContainer.html(marked.parse(accumulatedText));
                        //////////////////////////////////////
                        //////////////////////////////////////
                        // ORIGINAL:
                        //let text = decoder.decode(value).replace(/\n/g, "<br>");
                        //resultContainer.append(text); // Append streamed text
                        //////////////////////////////////////
                        resultContainer.scrollTop(resultContainer[0].scrollHeight); // Auto-scroll to bottom
                        return read();
@@ -135,12 +129,16 @@
            </tr>
            <tr>
                <th>Fetch Date</th>
-                <td>{{ url_item.ts_fetch }}</td>
+                <td>{{ url_item.ts_fetch }} UTC</td>
            </tr>
            <tr>
-                <th>Sources</th>
+                <th>Source</th>
                <td>{{ sources|join:", " }}</td>
            </tr>
            <tr>
                <th>Search</th>
                <td>{{ searches|join:", " }}</td>
            </tr>
            <tr>
                <th>Status</th>
                <td>{{ url_item.status }}</td>
@@ -175,7 +173,6 @@
        <form onsubmit="fetchDetailsWithSelection(event, {{ url_item.id }}, '{{ url_item.url }}')">
            <label for="options-{{ url_item.id }}">Model:</label>
            <select id="options-{{ url_item.id }}" class="form-control mb-2">
                <!-- <option value="">-- Select an option --</option> -->
                {% for model in models %}
                    <option value="{{ model }}">{{ model }}</option>
                {% endfor %}
@@ -185,21 +182,23 @@
        <!-- Input field with a default value -->        
        <label for="custom-input-{{ url_item.id }}">Prompt:</label>
        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="3">{{ prompt }} {{ url_item.url }}</textarea>
-
+        
-        <!-- Fetch details button -->
+        <div class="d-flex align-items-center">
-        <button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
+            <!-- Fetch details button -->
-            Fetch Details
+            <button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
-        </button>
+                Fetch Details
            </button>
            <!-- Loading Spinner (Hidden by Default) -->
            <div id="loading-spinner" class="spinner-border text-primary ms-2" role="status" style="display: none;">
                <span class="visually-hidden">Loading...</span>
            </div>
        </div>
        <!-- Chatbot-style response box -->
        <div class="chat-box mt-3 p-3 border rounded">
            <div id="chat-output"></div>
        </div>
        <!-- Loading Spinner (Hidden by Default) -->
        <div id="loading-spinner" class="spinner-border text-primary mt-3" role="status" style="display: none;">
            <span class="visually-hidden">Loading...</span>
        </div>
    </div>
--- a/app_urls/api/urls.py
+++ b/app_urls/api/urls.py
@@ -3,7 +3,7 @@ from . import views
 urlpatterns = [
    path('', views.link_list, name='link_list'),
-    path('url/', views.news, name='url_detail'),
+    path('url/', views.urls, name='url_detail'),
    path('url/<int:id>/', views.url_detail_view, name='url_detail'),
    path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
    path('task/<str:task>', views.trigger_task, name='trigger_task'),
--- a/app_urls/api/views.py
+++ b/app_urls/api/views.py
@@ -18,64 +18,80 @@ def link_list(request):
    prefix = "http://localhost:8000/api/task"
    links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]
-    db_links = ["http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500"]
+    list_links = [
-    return JsonResponse({"links": ["http://localhost:8000/api/url"] + db_links + [os.path.join(prefix, l) for l in links]})
+        # DB
        "http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
        # Admin panel
        "http://localhost:8000/admin",
        # URLs
        "http://localhost:8000/api/url",
        # API tasks
    ] + [os.path.join(prefix, l) for l in links]
    # Json
    return JsonResponse({"links": list_links })
-from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
+from django.http import StreamingHttpResponse, JsonResponse
 from django.shortcuts import render, get_object_or_404
 from django.core.paginator import Paginator
 import requests
 from django.http import StreamingHttpResponse
 import json
 import time
 import ollama
-from .models import Urls, Source, Search, UrlsSourceSearch, UrlContent
+from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch
 # Create your views here.
-def news(request):
+def urls(request):
    # URLs
    urls = Urls.objects.all()
    # Sources
    sources = Source.objects.all()
-    seaerches = Search.objects.all()
+    searches = Search.objects.all()
    # Parameters
    page_number = request.GET.get("page", 1)
    num_items = request.GET.get("items", 15)
    source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
    search_ids = request.GET.get("searches", ','.join([str(s.id) for s in searches]))
    status_filters = request.GET.get("status", None)
    # Filters
    if (status_filters) and (status_filters != "all"):
-        urls = urls.filter(status__in=status_filters.split(","))
+        if (status_filters == "none"):
            urls = []
        else:
            urls = urls.filter(status__in=status_filters.split(","))
    if (source_ids) and (source_ids != "all"):
-        # TODO: Distinct needed?
+        if (source_ids == "none"):
-        # urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
+            urls = []
-        pass
+        else:
            urls = urls.filter(urlssourcesearch__id_source__in=source_ids.split(",")) # .distinct()
    if (search_ids) and (search_ids != "all"):
        if (search_ids == "none"):
            urls = []
        else:
            urls = urls.filter(urlssourcesearch__id_search__in=search_ids.split(",")) # .distinct()
    # Pagination
    paginator = Paginator(urls, num_items)
    page_obj = paginator.get_page(page_number)
-    # Map URL IDs to their sources, only for subset of URLs (page of interest)
+    # Map URL IDs to their sources & searches, only for subset of URLs (page of interest)
    sources_map= {}
    """
    sources_map = {
-        url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
+        url.id: list(Source.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
-        for url in page_obj.object_list
+    }
    searches_map = {
        url.id: list(Search.objects.filter(urlssourcesearch__id_url=url).distinct()) for url in page_obj.object_list
    }
    """
    context = {
        "page_obj": page_obj,
        "sources": sources,
        "searches": searches,
        "sources_map": sources_map,
        "searches_map": searches_map,
        "list_status": Urls.STATUS_ENUM.values,
-        "list_urls_per_page": [15, 50, 100],
+        "list_urls_per_page": [15, 100, 500],
    }
-    
+
    # If request is AJAX, return JSON response
    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
        return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
@@ -83,32 +99,54 @@ def news(request):
    return render(request, "item_list.html", context)
 class OllamaClient():
    def __init__(self):
        self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))
    def _get_default_model(self):
        return "gemma3:1b"
    def get_models(self):
        models = sorted([m.model for m in self.client.list().models])
        if (self._get_default_model() in models):
            return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
        else:
            return models
    def get_prompt(self):
        return "Provide a summary of the content below, avoid mentioning the source of information, and only answer with the summary. The summary needs to be brief and compact, consisting of one paragraph."
        #return "Explain in a single and compact paragraph the what, why, when, where, who, and how of the content below. Also provide a single paragraph summary of the content:"
        #return "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:"
        #return "Provide two summaries of the content below, and avoid mentioning the source of information. First, provide a very brief and compact paragraph summary. Second, provide a larger and more detailed summary, which describe the what, why, when, where, who, and how of the content:"
        # return "Imagine you are a journalist, TLDR in a paragraph. Only answer with the summary:"
        #return "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
 def url_detail_view(request, id):
    url_item = get_object_or_404(Urls, id=id)
-    url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
+    url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
    url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
    # url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
    try:
        url_content = UrlContent.objects.get(pk=id)
    except UrlContent.DoesNotExist:
        url_content = {}
    # TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
-    # LLM models available
+    ollama = OllamaClient()
    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
    models = sorted([m.model for m in client.list().models])
    # default_model = "llama3.2:3b"
    context = {
        'url_item': url_item,
        'sources': url_sources,
-        'models': models,
+        'searches': url_searches,
-        #'default_model': default_model,
+        'models': ollama.get_models(),
-        'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
+        'prompt': ollama.get_prompt(),
        #"prompt": "Image you are a journalist, TLDR in a paragraph:",
        #"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
        'url_content': url_content,
    }
    return render(request, 'url_detail.html', context)
 # TODO: move to ollamajs...
 def fetch_details(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_param = request.GET.get("url", "")  # Get URL
@@ -116,14 +154,14 @@ def fetch_details(request, id):
    text = request.GET.get("text", "")  # Get LLM prompt
    # LLM
-    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
+    ollama = OllamaClient()
    def stream_response():
        msg_content = {
            "role": "user", 
            "content": text,
        }
-        response = client.chat(model=model, messages=[msg_content], stream=True)
+        response = ollama.client.chat(model=model, messages=[msg_content], stream=True)
        for chunk in response:
            yield chunk["message"]["content"]  # Stream each chunk of text
--- a/app_urls/core/settings.py
+++ b/app_urls/core/settings.py
@@ -124,9 +124,6 @@ SCHEDULER_QUEUES = {
        'PORT': os.environ.get("REDIS_PORT", 6379),
        'DB': os.environ.get("REDIS_DB", 0),
        'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15),
        #'USERNAME': 'some-user',
        #'PASSWORD': 'some-password',
        #'DEFAULT_TIMEOUT': 360,
  }
 }
 SCHEDULER_CONFIG = {
--- a/app_urls/core/urls.py
+++ b/app_urls/core/urls.py
@@ -20,6 +20,5 @@ from django.urls import path, include
 urlpatterns = [
    path('admin/', admin.site.urls),
    path('api/', include('api.urls')),
    #path('scheduler/', include('django_rq.urls')),
    path('scheduler/', include('scheduler.urls')),
 ]
--- a/app_web/manage.py
+++ b/app_web/manage.py
@@ -1,22 +0,0 @@
 #!/usr/bin/env python
 """Django's command-line utility for administrative tasks."""
 import os
 import sys
 def main():
    """Run administrative tasks."""
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
    try:
        from django.core.management import execute_from_command_line
    except ImportError as exc:
        raise ImportError(
            "Couldn't import Django. Are you sure it's installed and "
            "available on your PYTHONPATH environment variable? Did you "
            "forget to activate a virtual environment?"
        ) from exc
    execute_from_command_line(sys.argv)
 if __name__ == '__main__':
    main()
--- a/app_web/mysite/init.py
+++ b/app_web/mysite/init.py
--- a/app_web/mysite/asgi.py
+++ b/app_web/mysite/asgi.py
@@ -1,16 +0,0 @@
 """
 ASGI config for mysite project.
 It exposes the ASGI callable as a module-level variable named ``application``.
 For more information on this file, see
 https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
 """
 import os
 from django.core.asgi import get_asgi_application
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
 application = get_asgi_application()
--- a/app_web/mysite/settings.py
+++ b/app_web/mysite/settings.py
@@ -1,132 +0,0 @@
 """
 Django settings for mysite project.
 Generated by 'django-admin startproject' using Django 5.1.6.
 For more information on this file, see
 https://docs.djangoproject.com/en/5.1/topics/settings/
 For the full list of settings and their values, see
 https://docs.djangoproject.com/en/5.1/ref/settings/
 """
 import os
 from pathlib import Path
 # Build paths inside the project like this: BASE_DIR / 'subdir'.
 BASE_DIR = Path(__file__).resolve().parent.parent
 # Quick-start development settings - unsuitable for production
 # See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
 # SECURITY WARNING: keep the secret key used in production secret!
 SECRET_KEY = 'django-insecure-0+jg0u+%s@sj759i7@jn*%-#jl)8&#=siclb5908pwe!7=*$qb'
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = True
 ALLOWED_HOSTS = []
 # Application definition
 INSTALLED_APPS = [
    'news.apps.NewsConfig',
    'django.contrib.admin',
    'django.contrib.auth',
    'django.contrib.contenttypes',
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
 ]
 MIDDLEWARE = [
    'django.middleware.security.SecurityMiddleware',
    'django.contrib.sessions.middleware.SessionMiddleware',
    'django.middleware.common.CommonMiddleware',
    'django.middleware.csrf.CsrfViewMiddleware',
    'django.contrib.auth.middleware.AuthenticationMiddleware',
    'django.contrib.messages.middleware.MessageMiddleware',
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
 ]
 ROOT_URLCONF = 'mysite.urls'
 TEMPLATES = [
    {
        'BACKEND': 'django.template.backends.django.DjangoTemplates',
        'DIRS': [],
        'APP_DIRS': True,
        'OPTIONS': {
            'context_processors': [
                'django.template.context_processors.debug',
                'django.template.context_processors.request',
                'django.contrib.auth.context_processors.auth',
                'django.contrib.messages.context_processors.messages',
            ],
        },
    },
 ]
 WSGI_APPLICATION = 'mysite.wsgi.application'
 # Database
 # https://docs.djangoproject.com/en/5.1/ref/settings/#databases
 DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.postgresql',
        'NAME': os.environ.get("DJANGO_DB_NAME", "matitos"),
        'USER': os.environ.get("DJANGO_DB_USER", "supermatitos"),
        'PASSWORD': os.environ.get("DJANGO_DB_PASSWORD", "supermatitos"),
        'HOST': os.environ.get("DJANGO_DB_HOST", "localhost"),
        'PORT': os.environ.get("DJANGO_DB_PORT", "5432"),
        #'OPTIONS': {
        #    'options': '-c default_transaction_read_only=on'
        #}
    }
 }
 # Password validation
 # https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
 AUTH_PASSWORD_VALIDATORS = [
    {
        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
    },
 ]
 # Internationalization
 # https://docs.djangoproject.com/en/5.1/topics/i18n/
 LANGUAGE_CODE = 'en-us'
 TIME_ZONE = 'UTC'
 USE_I18N = True
 USE_TZ = True
 # Static files (CSS, JavaScript, Images)
 # https://docs.djangoproject.com/en/5.1/howto/static-files/
 STATIC_URL = 'static/'
 # Default primary key field type
 # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
 DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
--- a/app_web/mysite/urls.py
+++ b/app_web/mysite/urls.py
@@ -1,26 +0,0 @@
 """
 URL configuration for mysite project.
 The `urlpatterns` list routes URLs to views. For more information please see:
    https://docs.djangoproject.com/en/5.1/topics/http/urls/
 Examples:
 Function views
    1. Add an import:  from my_app import views
    2. Add a URL to urlpatterns:  path('', views.home, name='home')
 Class-based views
    1. Add an import:  from other_app.views import Home
    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
 Including another URLconf
    1. Import the include() function: from django.urls import include, path
    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
 """
 from django.contrib import admin
 from django.urls import include, path
 from django.views.generic.base import RedirectView
 urlpatterns = [
    path("", RedirectView.as_view(url='news/', permanent=False)),
    path("news/", include("news.urls")),
    path('admin/', admin.site.urls),
    # path("facerecognition", include("facerecognition.urls")),
 ]
--- a/app_web/mysite/wsgi.py
+++ b/app_web/mysite/wsgi.py
@@ -1,16 +0,0 @@
 """
 WSGI config for mysite project.
 It exposes the WSGI callable as a module-level variable named ``application``.
 For more information on this file, see
 https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
 """
 import os
 from django.core.wsgi import get_wsgi_application
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mysite.settings')
 application = get_wsgi_application()
--- a/app_web/news/init.py
+++ b/app_web/news/init.py
--- a/app_web/news/admin.py
+++ b/app_web/news/admin.py
@@ -1,9 +0,0 @@
 from django.contrib import admin
 # Register your models here.
 from .models import Urls, UrlsSource, Source
 admin.site.register(Urls)
 admin.site.register(UrlsSource)
 admin.site.register(Source)
--- a/app_web/news/apps.py
+++ b/app_web/news/apps.py
@@ -1,6 +0,0 @@
 from django.apps import AppConfig
 class NewsConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
    name = 'news'
--- a/app_web/news/migrations/0001_initial.py
+++ b/app_web/news/migrations/0001_initial.py
@@ -1,38 +0,0 @@
 # Generated by Django 5.1.6 on 2025-02-20 15:36
 import django.db.models.deletion
 from django.db import migrations, models
 class Migration(migrations.Migration):
    initial = True
    dependencies = [
    ]
    operations = [
        migrations.CreateModel(
            name='SOURCE',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('source', models.TextField()),
            ],
        ),
        migrations.CreateModel(
            name='URL',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('url', models.TextField()),
                ('pub_date', models.DateTimeField(verbose_name='date published')),
            ],
        ),
        migrations.CreateModel(
            name='URL_SOURCE',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('source', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.source')),
                ('url', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='news.url')),
            ],
        ),
    ]
--- a/app_web/news/migrations/0002_alter_source_table_alter_url_table_and_more.py
+++ b/app_web/news/migrations/0002_alter_source_table_alter_url_table_and_more.py
@@ -1,25 +0,0 @@
 # Generated by Django 5.1.6 on 2025-02-20 16:11
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('news', '0001_initial'),
    ]
    operations = [
        migrations.AlterModelTable(
            name='source',
            table='source',
        ),
        migrations.AlterModelTable(
            name='url',
            table='urls',
        ),
        migrations.AlterModelTable(
            name='url_source',
            table='urls_source',
        ),
    ]
--- a/app_web/news/migrations/0003_remove_url_pub_date_url_status_url_ts_fetch_and_more.py
+++ b/app_web/news/migrations/0003_remove_url_pub_date_url_status_url_ts_fetch_and_more.py
@@ -1,33 +0,0 @@
 # Generated by Django 5.1.6 on 2025-02-20 16:18
 import django.db.models.functions.datetime
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('news', '0002_alter_source_table_alter_url_table_and_more'),
    ]
    operations = [
        migrations.RemoveField(
            model_name='url',
            name='pub_date',
        ),
        migrations.AddField(
            model_name='url',
            name='status',
            field=models.CharField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw'),
        ),
        migrations.AddField(
            model_name='url',
            name='ts_fetch',
            field=models.DateTimeField(db_default=django.db.models.functions.datetime.Now(), verbose_name='Date fetched'),
        ),
        migrations.AlterField(
            model_name='url',
            name='url',
            field=models.TextField(verbose_name='URL'),
        ),
    ]
--- a/app_web/news/migrations/0004_alter_url_source_unique_together.py
+++ b/app_web/news/migrations/0004_alter_url_source_unique_together.py
@@ -1,17 +0,0 @@
 # Generated by Django 5.1.6 on 2025-02-20 16:32
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('news', '0003_remove_url_pub_date_url_status_url_ts_fetch_and_more'),
    ]
    operations = [
        migrations.AlterUniqueTogether(
            name='url_source',
            unique_together={('url', 'source')},
        ),
    ]
--- a/app_web/news/migrations/0005_urls_remove_url_source_url_and_more.py
+++ b/app_web/news/migrations/0005_urls_remove_url_source_url_and_more.py
@@ -1,59 +0,0 @@
 # Generated by Django 5.1.6 on 2025-02-20 16:53
 import django.db.models.deletion
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('news', '0004_alter_url_source_unique_together'),
    ]
    operations = [
        migrations.CreateModel(
            name='Urls',
            fields=[
                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('url', models.TextField(unique=True)),
                ('ts_fetch', models.DateTimeField()),
                ('status', models.TextField(choices=[('raw', 'Raw'), ('error', 'Error'), ('valid', 'Valid'), ('unknown', 'Unknown'), ('invalid', 'Invalid'), ('duplicate', 'Duplicate')], default='raw')),
            ],
            options={
                'db_table': 'urls',
                'managed': False,
            },
        ),
        migrations.RemoveField(
            model_name='url_source',
            name='url',
        ),
        migrations.AlterUniqueTogether(
            name='url_source',
            unique_together=None,
        ),
        migrations.RemoveField(
            model_name='url_source',
            name='source',
        ),
        migrations.AlterModelOptions(
            name='source',
            options={'managed': False},
        ),
        migrations.CreateModel(
            name='UrlsSource',
            fields=[
                ('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='news.urls')),
            ],
            options={
                'db_table': 'urls_source',
                'managed': False,
            },
        ),
        migrations.DeleteModel(
            name='URL',
        ),
        migrations.DeleteModel(
            name='URL_SOURCE',
        ),
    ]
--- a/app_web/news/migrations/0006_alter_urls_options.py
+++ b/app_web/news/migrations/0006_alter_urls_options.py
@@ -1,17 +0,0 @@
 # Generated by Django 5.1.6 on 2025-03-06 09:36
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ('news', '0005_urls_remove_url_source_url_and_more'),
    ]
    operations = [
        migrations.AlterModelOptions(
            name='urls',
            options={'managed': False, 'ordering': ['-ts_fetch']},
        ),
    ]
--- a/app_web/news/migrations/init.py
+++ b/app_web/news/migrations/init.py
--- a/app_web/news/models.py
+++ b/app_web/news/models.py
@@ -1,61 +0,0 @@
 from django.db import models
 from django.contrib.postgres.fields import ArrayField
 # Create your models here.
 class Urls(models.Model):
    class STATUS_ENUM(models.TextChoices):
        RAW = "raw"
        ERROR = "error"
        VALID = "valid"
        UNKNOWN = "unknown"
        INVALID = "invalid"
        DUPLICATE = "duplicate"
    url = models.TextField(unique=True)
    ts_fetch = models.DateTimeField()
    status = models.TextField(choices=STATUS_ENUM, default=STATUS_ENUM.RAW)  # This field type is a guess.
    def __str__(self):
        return self.url
    class Meta:
        managed = False
        db_table = 'urls' # db_table = '{}_urls'.format(project_name)
        ordering = ["-ts_fetch"]
 class Source(models.Model):
    id = models.SmallAutoField(primary_key=True)
    source = models.TextField(unique=True)
    def __str__(self):
        return self.source
    class Meta:
        managed = False
        db_table = 'source'
 class UrlsSource(models.Model):
    id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)  # The composite primary key (id_url, id_source) found, that is not supported. The first column is selected.
    id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
    def __str__(self):
        return "Source: {}, URL: {}".format(self.id_source, self.id_url)
    class Meta:
        managed = False
        db_table = 'urls_source'
        unique_together = (('id_url', 'id_source'),)
 class UrlContent(models.Model):
    id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)
    date_published = models.DateTimeField(blank=True, null=True)
    title = models.TextField(blank=True, null=True)
    description = models.TextField(blank=True, null=True)
    content = models.TextField(blank=True, null=True)
    tags = ArrayField(models.TextField(blank=True, null=True))
    authors = ArrayField(models.TextField(blank=True, null=True))
    image_urls = ArrayField(models.TextField(blank=True, null=True))
    class Meta:
        managed = False
        db_table = 'url_content'
--- a/app_web/news/templates/item_list.html
+++ b/app_web/news/templates/item_list.html
@@ -1,508 +0,0 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>News</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
    <script>
        function getQueryString(pageNumber, itemsNumber, sources, statuses){
            // Query parameters. If input is null, get most recent value
            let queryParams = new URLSearchParams(window.location.search);
            // page
            if (pageNumber == null) pageNumber = queryParams.get("page") ?? 1;
            queryParams.set("page", pageNumber);
            // items
            if (itemsNumber == null) itemsNumber = queryParams.get("items") ?? 15;
            queryParams.set("items", itemsNumber);
            // sources
            if (sources == null) sources = queryParams.get("sources") ?? "all";
            queryParams.set("sources", sources);
            // status
            if (statuses == null) statuses = queryParams.get("status") ?? "all";
            queryParams.set("status", statuses);
            // Encoding fix: %2C -> ,
            let queryParamsString = queryParams.toString();
            while (queryParamsString.includes("%2C")) {
                queryParamsString = queryParamsString.replace("%2C", ",");
            }
            return queryParamsString;
        }
        function loadPage(pageNumber, itemsNumber, sources, statuses) {
            $("#item-list").fadeTo(100, 0.5); // Smooth fade effect
            $("#loading").show();
            queryParamsString = getQueryString(pageNumber, itemsNumber, sources, statuses);
            $.ajax({
                url: "?" + queryParamsString,
                type: "GET",
                headers: { "X-Requested-With": "XMLHttpRequest" },
                success: function (data) {
                    $("#item-list").fadeTo(0, 1).html(data.items_html); // Restore opacity smoothly
                    $("#loading").hide();
                    // Update URL without reloading
                    window.history.pushState({}, "", "?" + queryParamsString);
                }
            });
        }
        ////////////////////////////////////////////////////////////////////////////
        // Pagination
        ////////////////////////////////////////////////////////////////////////////
        $(document).on("click", ".pagination a", function (event) {
            event.preventDefault();
            let page = $(this).attr("data-page");
            loadPage(pageNumber=page, itemsNumber=null, sources=null, statuses=null);
        });
        $(document).ready(function () {
            ////////////////////////////////////////////////////////////////////////////
            // Filter updates
            ////////////////////////////////////////////////////////////////////////////
            const sourcesToggleAll = $("#toggle-all-sources");
            const sourcesCheckboxes = $(".source-checkbox");
            const statusesToggleAll = $("#toggle-all-status");
            const statusCheckboxes = $(".status-checkbox");
            function updateFilters() {
                // Get selected sources
                let selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
                    return $(this).val();
                }).get().join(",");
                // Get selected URL statuses
                let selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
                    return $(this).val();
                }).get().join(",");
                // Get selected items per page
                let selectedItems = $("input[name='items']:checked").val();
                // Update pagination and reload data
                loadPage(1, selectedItems, selectedSources, selectedStatuses);
            }
            ////////////////////////////////////////////////////////////////////////////
            // Change triggers
            ////////////////////////////////////////////////////////////////////////////
            // Sources
            sourcesToggleAll.on("change", function () {
                sourcesCheckboxes.prop("checked", sourcesToggleAll.prop("checked"));
                updateFilters();
            });
            sourcesCheckboxes.on("change", function () {
                sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
                updateFilters();
            });
            // Status
            statusesToggleAll.on("change", function () {
                statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
                updateFilters();
            });
            statusCheckboxes.on("change", function () {
                // If all checkboxes are checked, mark "Toggle All" as checked
                statusesToggleAll.prop("checked", statusCheckboxes.length === statusCheckboxes.filter(":checked").length);
                updateFilters();
            });            
            // Items change trigger update
            $(".items").on("change", updateFilters);
            ////////////////////////////////////////////////////////////////////////////
            // Default values
            ////////////////////////////////////////////////////////////////////////////
            // Sources
            sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
            sourcesToggleAll.prop("checked", true);
            // Statuses
            statusCheckboxes.each(function () { $(this).prop("checked", true); });
            statusesToggleAll.prop("checked", true);
            // Items
            $("input[name='items'][value='" + 15 + "']").prop("checked", true);
        });
        ////////////////////////////////////////////////////////////////////////////
        // Theme logic
        ////////////////////////////////////////////////////////////////////////////
        function setTheme(mode) {
            document.documentElement.setAttribute("data-theme", mode);
            document.documentElement.setAttribute("data-bs-theme", mode);
            localStorage.setItem("theme", mode);
            document.getElementById("theme-icon").innerHTML = mode === "dark" ? "🌞" : "🌙";
            document.body.classList.toggle("dark-mode", mode === "dark");
        }
        function toggleTheme() {
            let currentTheme = document.documentElement.getAttribute("data-theme");
            setTheme(currentTheme === "dark" ? "light" : "dark");
        }
        document.addEventListener("DOMContentLoaded", function () {
            let savedTheme = localStorage.getItem("theme") || 
                (window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
            setTheme(savedTheme);
        });
        ////////////////////////////////////////////////////////////////////////////
    </script>
    <style>
        /* Content Area */
        #content {
            margin-left: 170px; /* Match sidebar width */
            min-width: calc(100vw - 170px); /* Ensure it doesn't shrink into the sidebar */
            width: calc(100vw - 170px); /* Expands based on screen size */
            padding: 20px;
            overflow-x: auto; /* Prevent content from being squeezed */
            transition: margin-left 0.3s ease;
        }
        /* Sidebar Styles */
        #sidebar {
            height: 100vh;
            position: fixed;
            top: 0;
            left: 0;
            width: 170px; /* Default width */
            background-color: var(--bg-color);
            box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
            padding: 15px;
            transition: width 0.3s ease;
        }
        #sidebar .nav-link {
            color: var(--text-color);
        }
        #sidebar .nav-link:hover {
            background-color: var(--pagination-hover-bg);
        }
        /* ============================= */
        /* Responsive Enhancements       */
        /* ============================= */
        @media (min-width: 1200px) {
            .table {
                width: 95%; /* Allows table to take more space */
                margin: 0 auto; /* Centers the table */
            }
        }
        @media (max-width: 768px) {
            #sidebar {
                width: 70px; /* Collapse sidebar to smaller width */
                /*padding: 10px;*/
            }
            #content {
                margin-left: 70px; /* Adjust margin to match collapsed sidebar */
                min-width: calc(100vw - 70px); /* Prevent overlap */
                /*padding: 10px;*/
            }
            /* Adjust table for small screens */
            .table-responsive {
                overflow-x: auto;
            }
            .table th,
            .table td {
                white-space: nowrap; /* Prevent text wrapping in cells */
            }
            .table a {
                word-break: break-word; /* Ensure long URLs break properly */
            }
        }
        /* ============================= */
        /* Global Styles                 */
        /* ============================= */
        body {
            background-color: var(--bg-color);
            color: var(--text-color);
            transition: background-color 0.3s, color 0.3s;
        }
        /* ============================= */
        /* Light & Dark Mode Variables   */
        /* ============================= */
        :root {
            --bg-color: #ffffff;
            --text-color: #212529;
            --table-bg: #ffffff;
            --table-text: #000000;
            --table-border: #dee2e6;
            --link-color: #007bff;
            --pagination-bg: #ffffff;
            --pagination-border: #dee2e6;
            --pagination-hover-bg: #f8f9fa;
            --pagination-active-bg: #007bff;
            --pagination-active-text: #ffffff;
            --button-bg: #f8f9fa;
            --button-border: #ced4da;
            --button-text: #212529;
        }
        [data-theme="dark"] {
            --bg-color: #121212;
            --text-color: #e0e0e0;
            --table-bg: #1e1e1e;
            --table-text: #ffffff;
            --table-border: #2c2c2c;
            --link-color: #9ec5fe;
            --pagination-bg: #1e1e1e;
            --pagination-border: #444;
            --pagination-hover-bg: #333;
            --pagination-active-bg: #007bff;
            --pagination-active-text: #ffffff;
            --button-bg: #1e1e1e;
            --button-border: #444;
            --button-text: #e0e0e0;
        }
        /* ============================= */
        /* Table Styling                 */
        /* ============================= */
        .table-responsive {
            width: 100%; /* Ensure it spans the full width of its container */
            max-width: 100%;
            overflow-x: auto;
        }
        .table {
            background-color: var(--table-bg);
            color: var(--table-text);
            border: 1px solid var(--table-border);
            transition: background-color 0.3s, color 0.3s;
            width: 100%; /* Ensures it takes full width of its container */
            table-layout: auto; /* Allows columns to adjust dynamically */
            /*white-space: nowrap;*/ /* Prevents text wrapping in cells */
        }
        .table th,
        .table td {
            border-color: var(--table-border);
        }
        .table thead {
            background-color: var(--pagination-active-bg);
            color: var(--pagination-active-text);
        }
        [data-theme="dark"] .table {
            background-color: var(--table-bg);
            color: var(--table-text);
        }
        [data-theme="dark"] .table th,
        [data-theme="dark"] .table td {
            border-color: var(--table-border);
        }
        [data-theme="dark"] .table thead {
            background-color: #333;
            color: #fff;
        }
        th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
        th:nth-child(2), td:nth-child(2) { width: 20%; } /* Fetch Date */
        th:nth-child(3), td:nth-child(3) { width: 20%; } /* Sources */
        th:nth-child(4), td:nth-child(4) { width: 5%; } /* Status */
        th:nth-child(5), td:nth-child(5) { width: 5%; } /* Action */
        /* ============================= */
        /* Pagination Styling            */
        /* ============================= */
        .pagination {
            display: flex;
            justify-content: center;
            padding: 10px 0;
        }
        .pagination .page-link {
            background-color: var(--pagination-bg);
            border-color: var(--pagination-border);
            color: var(--text-color);
            padding: 10px 14px;
            margin: 0 5px;
            border-radius: 8px;
            transition: background-color 0.3s, color 0.3s, transform 0.2s;
        }
        .pagination .page-link:hover {
            background-color: var(--pagination-hover-bg);
            transform: scale(1.05);
        }
        .pagination .active .page-link {
            background-color: var(--pagination-active-bg);
            color: var(--pagination-active-text);
            border-color: var(--pagination-active-bg);
        }
        /* ============================= */
        /* Theme Toggle Button           */
        /* ============================= */
        .theme-toggle-btn {
            background-color: var(--button-bg);
            border: 1px solid var(--button-border);
            color: var(--button-text);
            border-radius: 50%;
            width: 40px;
            height: 40px;
            font-size: 20px;
            display: flex;
            align-items: center;
            justify-content: center;
            transition: background-color 0.3s, color 0.3s, transform 0.2s;
            cursor: pointer;
        }
        .theme-toggle-btn:hover {
            background-color: var(--pagination-hover-bg);
            transform: rotate(20deg);
        }
        .theme-toggle-btn:active {
            transform: scale(0.95);
        }
        /* ============================= */
        /* Loading Spinner Styling       */
        /* ============================= */
        #loading {
            position: fixed;
            left: 50%;
            top: 50%;
            transform: translate(-50%, -50%);
            z-index: 1050;
            display: none;
        }
        .spinner-border {
            width: 4rem;
            height: 4rem;
        }
    </style>
 </head>
 <body>
    <!-- Left Sidebar -->
    <div id="sidebar" class="d-flex flex-column">
        <ul class="nav flex-column">
            <!-- Theme Toggle Button -->
            <div class="nav-item">
                <button onclick="toggleTheme()" class="theme-toggle-btn">
                    <span id="theme-icon">🌙</span>
                </button>
            </div>
            <!-- Sources -->
            <div class="nav-item mt-3">
                <strong>Select sources</strong>
                <form id="source-filter-form">
                    <!-- Toggle All Checkbox -->
                    <div class="form-check">
                        <input class="form-check-input" type="checkbox" id="toggle-all-sources">
                        <label class="form-check-label fw-bold" for="toggle-all-sources">
                            Toggle all
                        </label>
                    </div>
                    <!-- Individual Source Checkboxes -->
                    {% for source in sources %}
                        <div class="form-check">
                            <input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
                            <label class="form-check-label" for="source-{{ source.id }}">
                                {{ source.source }}
                            </label>
                        </div>
                    {% empty %}
                    <tr>
                        <td colspan="2" class="text-center">No sources available.</td>
                    </tr>
                    {% endfor %}
                </form>
            </div>
            <!-- Status -->
            <div class="nav-item mt-3">
                <strong>Select status</strong>
                <form id="status-filter-form">
                    <!-- Toggle All Checkbox -->
                    <div class="status-form-check">
                        <input class="form-check-input" type="checkbox" id="toggle-all-status">
                        <label class="form-check-label fw-bold" for="toggle-all-status">
                            Toggle all
                        </label>
                    </div>
                    <!-- Individual Status Checkboxes -->
                    {% for status in list_status %}
                        <div class="status-form-check">
                            <input class="form-check-input status-checkbox" type="checkbox" value="{{ status }}" id="status-{{ status }}">
                            <label class="form-check-label" for="status-{{ status }}">
                                {{ status }}
                            </label>
                        </div>
                    {% empty %}
                    <tr>
                        <td colspan="2" class="text-center">No sources available.</td>
                    </tr>
                    {% endfor %}
                </form>
            </div>
            <!-- URLs per page -->
            <div class="nav-item mt-3">
                <strong>URLs per page</strong>
                <div class="card-body">
                    <!-- Individual Status Checkboxes -->
                    {% for url_per_page in list_urls_per_page %}
                        <div class="items-form-check">
                            <input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
                            <label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
                        </div>
                    {% empty %}
                    <tr>
                        <td colspan="2" class="text-center">No options available.</td>
                    </tr>
                    {% endfor %}
                </div>
            </div>
        </ul>
    </div>
    <!-- Main Content Area -->
    <div id="content" class="main-content">
        <div class="container mt-4">
            <!-- Table -->
            <div id="item-list">
                {% include 'item_list_partial.html' %}
            </div>
            <!-- Loading... -->
            <div id="loading" class="text-center mt-3" style="display:none;">
                <div class="spinner-border text-primary" role="status">
                    <span class="visually-hidden">Loading...</span>
                </div>
            </div>
        </div>
    </div>
 </body>
 </html>
--- a/app_web/news/templates/item_list_partial.html
+++ b/app_web/news/templates/item_list_partial.html
@@ -1,87 +0,0 @@
 {% load custom_filters %}
 <div class="table-responsive">
    <table class="table table-hover">
        <thead>
            <tr>
                <th scope="col"><strong>URL</strong></th>
                <th scope="col"><strong>Fetch date</strong></th>
                <th scope="col"><strong>Sources</strong></th>
                <th scope="col"><strong>Status</strong></th>
                <th scope="col"><strong>Action</strong></th>
            </tr>
        </thead>
        <tbody>
            {% for item in page_obj %}
                <tr>
                    <td><a href="{{ item.url }}/" target="_blank">{{ item.url }}</a></td>
                    <td>{{ item.ts_fetch }}</td>
                    <td>
                        {% with sources_map|dict_get:item.id as sources %}
                            {% if sources %}
                                {% for source in sources %}
                                    <span class="badge bg-secondary">{{ source }}</span>
                                {% endfor %}
                            {% else %}
                                <span class="text-muted">No sources</span>
                            {% endif %}
                        {% endwith %}
                    </td>
                    <td>
                        {% if item.status == 'raw' %}
                            <span class="badge bg-secondary">{{ item.status|capfirst }}</span>
                        {% elif item.status == 'error' %}
                            <span class="badge bg-danger">{{ item.status|capfirst }}</span>
                        {% elif item.status == 'valid' %}
                            <span class="badge bg-success">{{ item.status|capfirst }}</span>
                        {% elif item.status == 'unknown' %}
                            <span class="badge bg-warning">{{ item.status|capfirst }}</span>
                        {% elif item.status == 'invalid' %}
                            <span class="badge bg-danger">{{ item.status|capfirst }}</span>
                        {% elif item.status == 'duplicate' %}
                            <span class="badge bg-info">{{ item.status|capfirst }}</span>
                        {% else %}
                            <span class="badge bg-light">Unknown</span>
                        {% endif %}
                    </td>
                    <td>
                        <a href="url/{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">Details</a>  
                    </td>
                </tr>
            {% empty %}
                <tr>
                    <td colspan="4" class="text-center">No items available.</td>
                </tr>
            {% endfor %}
        </tbody>
    </table>
 </div>
 <div class="d-flex justify-content-center mt-3">
    <nav>
        <ul class="pagination">
            {% if page_obj.has_previous %}
                <li class="page-item">
                    <a class="page-link" href="#" data-page="1">First</a>
                </li>
                <li class="page-item">
                    <a class="page-link" href="#" data-page="{{ page_obj.previous_page_number }}">Previous</a>
                </li>
            {% endif %}
            <li class="page-item active">
                <span class="page-link">Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}</span>
            </li>
            {% if page_obj.has_next %}
                <li class="page-item">
                    <a class="page-link" href="#" data-page="{{ page_obj.next_page_number }}">Next</a>
                </li>
                <li class="page-item">
                    <a class="page-link" href="#" data-page="{{ page_obj.paginator.num_pages }}">Last</a>
                </li>
            {% endif %}
        </ul>
    </nav>
 </div>
--- a/app_web/news/templates/url_detail.html
+++ b/app_web/news/templates/url_detail.html
@@ -1,211 +0,0 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{% block title %}News{% endblock %}</title>
    <!-- Bootstrap CSS -->
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <!-- Add jQuery from CDN (before other scripts) -->
    <script src="https://code.jquery.com/jquery-3.6.4.min.js"></script>
    <!-- Markdown -->
    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <!-- Custom Styles -->
    <style>
        body {
            background-color: #f4f4f4;
        }
        .navbar-dark .navbar-nav .nav-link {
            color: rgba(255,255,255,0.75);
        }
        .chat-box {
            background-color: #fff;
            border: 1px solid #ddd;
            padding: 15px;
            border-radius: 8px;
            overflow-y: auto;  /* Enable vertical scrolling */
            max-width: 100%;
            min-height: 150px;
            max-height: 450px;
            white-space: normal;
            word-wrap: break-word;
            word-break: break-word;
        }
    </style>
 </head>
 <script>
    function fetchDetails(urlId, url) {
        // Show the loading spinner
        document.getElementById("loading-spinner").style.display = "block";
        // Get the input value
        let inputText = document.getElementById(`custom-input-${urlId}`).value;
        // Get the input model
        let selectedModel = document.getElementById(`options-${urlId}`).value;
        // Check if a model is selected
        if (!selectedModel) {
            alert("Please select a model before fetching details.");
            return;
        }
        // Fetch URL
        let fetchUrl = `/news/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
        let resultContainer = $("#chat-output");
        resultContainer.html(""); // Clear previous content before fetching
        let fetchButton = $("button[onclick^='fetchDetails']"); // Select the button
        fetchButton.prop("disabled", true); // Disable button
        fetch(fetchUrl)
            .then(response => {
                if (!response.ok) {
                    throw new Error("Error on network response");
                }
                const reader = response.body.getReader();
                const decoder = new TextDecoder();
                //////////////////////////////////////
                let accumulatedText = ""; // Store streamed text before rendering Markdown
                // Create a temporary container for streaming response
                let messageContainer = $('<div class="chat-message"></div>');
                //let messageContainer = $('');
                resultContainer.append(messageContainer);
                //////////////////////////////////////
                function read() {
                    return reader.read().then(({ done, value }) => {
                        if (done) {
                            //////////////////////////////////////
                            messageContainer.html(marked.parse(accumulatedText));
                            //////////////////////////////////////
                            fetchButton.prop("disabled", false); // Re-enable button when done
                            return;
                        }
                        //////////////////////////////////////
                        // Decode the streamed chunk
                        let chunk = decoder.decode(value);
                        // Append to the accumulated text
                        accumulatedText += chunk;
                        // Render Markdown progressively (but safely)
                        messageContainer.html(marked.parse(accumulatedText));
                        //////////////////////////////////////
                        //////////////////////////////////////
                        // ORIGINAL:
                        //let text = decoder.decode(value).replace(/\n/g, "<br>");
                        //resultContainer.append(text); // Append streamed text
                        //////////////////////////////////////
                        resultContainer.scrollTop(resultContainer[0].scrollHeight); // Auto-scroll to bottom
                        return read();
                    });
                }
                return read();
            })
            .catch(error => {
                resultContainer.html(`<p class="text-danger">Error fetching details: ${error.message}</p>`);
                fetchButton.prop("disabled", false); // Re-enable button on error
            })
            .finally(() => {
                // Hide the loading spinner after request is complete
                document.getElementById("loading-spinner").style.display = "none";
            });
            ;
    }
 </script>
 <body>
    <!-- Main Content -->
    <div class="container mt-4">
        <h2>URL Details</h2>
        <table class="table table-bordered">
            <tr>
                <th>URL</th>
                <td><a href="{{ url_item.url }}" target="_blank">{{ url_item.url }}</a></td>
            </tr>
            <tr>
                <th>Fetch Date</th>
                <td>{{ url_item.ts_fetch }}</td>
            </tr>
            <tr>
                <th>Sources</th>
                <td>{{ sources|join:", " }}</td>
            </tr>
            <tr>
                <th>Status</th>
                <td>{{ url_item.status }}</td>
            </tr>
            <tr>
                <th>Title</th>
                <td>{{ url_content.title }}</td>
            </tr>
            <tr>
                <th>Description</th>
                <td>{{ url_content.description }}</td>
            </tr>
            <tr>
                <th>Content</th>
                <td>{{ url_content.content }}</td>
            </tr>
            <tr>
                <th>Tags</th>
                <td>{{ url_content.tags }}</td>
            </tr>
            <tr>
                <th>Authors</th>
                <td>{{ url_content.authors }}</td>
            </tr>
            <tr>
                <th>Image URLs</th>
                <td>{{ url_content.image_urls }}</td>
            </tr>
        </table>
        <!-- Independent form for optional values -->
        <form onsubmit="fetchDetailsWithSelection(event, {{ url_item.id }}, '{{ url_item.url }}')">
            <label for="options-{{ url_item.id }}">Model:</label>
            <select id="options-{{ url_item.id }}" class="form-control mb-2">
                <!-- <option value="">-- Select an option --</option> -->
                {% for model in models %}
                    <option value="{{ model }}">{{ model }}</option>
                {% endfor %}
            </select>
        </form>
        <!-- Input field with a default value -->        
        <label for="custom-input-{{ url_item.id }}">Prompt:</label>
        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="3">{{ prompt }} {{ url_item.url }}</textarea>
        <!-- Fetch details button -->
        <button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
            Fetch Details
        </button>
        <!-- Chatbot-style response box -->
        <div class="chat-box mt-3 p-3 border rounded">
            <div id="chat-output"></div>
        </div>
        <!-- Loading Spinner (Hidden by Default) -->
        <div id="loading-spinner" class="spinner-border text-primary mt-3" role="status" style="display: none;">
            <span class="visually-hidden">Loading...</span>
        </div>
    </div>
    <!-- Bootstrap JS -->
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
    {% block extra_js %}{% endblock %}
 </body>
 </html>
--- a/app_web/news/templatetags/init.py
+++ b/app_web/news/templatetags/init.py
--- a/app_web/news/templatetags/custom_filters.py
+++ b/app_web/news/templatetags/custom_filters.py
@@ -1,8 +0,0 @@
 from django import template
 register = template.Library()
@register.filter
 def dict_get(dictionary, key):
    """Custom filter to get a value from a dictionary in Django templates."""
    return dictionary.get(key, [])
--- a/app_web/news/tests.py
+++ b/app_web/news/tests.py
@@ -1,3 +0,0 @@
 from django.test import TestCase
 # Create your tests here.
--- a/app_web/news/urls.py
+++ b/app_web/news/urls.py
@@ -1,8 +0,0 @@
 from django.urls import path
 from . import views
 urlpatterns = [
    path("", views.news, name="home"),
    path('url/<int:id>/', views.url_detail_view, name='url_detail'),
    path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),]
--- a/app_web/news/views.py
+++ b/app_web/news/views.py
@@ -1,104 +0,0 @@
 from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
 from django.shortcuts import render, get_object_or_404
 from django.core.paginator import Paginator
 import requests
 from django.http import StreamingHttpResponse
 import json
 import time
 import ollama
 from .models import Urls, Source, UrlsSource, UrlContent
 # Create your views here.
 def index(request):
    return HttpResponse("Hello, world. You're at the news index.")
 def news(request):
    # URLs
    urls = Urls.objects.all()
    # Sources
    sources = Source.objects.all()
    # Parameters
    page_number = request.GET.get("page", 1)
    num_items = request.GET.get("items", 15)
    source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
    status_filters = request.GET.get("status", None)
    # Filters
    if (status_filters) and (status_filters != "all"):
        urls = urls.filter(status__in=status_filters.split(","))
    if (source_ids) and (source_ids != "all"):
        # TODO: Distinct needed?
        urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
    # Pagination
    paginator = Paginator(urls, num_items)
    page_obj = paginator.get_page(page_number)
    # Map URL IDs to their sources, only for subset of URLs (page of interest)
    sources_map = {
        url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
        for url in page_obj.object_list
    }
    context = {
        "page_obj": page_obj,
        "sources": sources,
        "sources_map": sources_map,
        "list_status": Urls.STATUS_ENUM.values,
        "list_urls_per_page": [15, 50, 100],
    }
    # If request is AJAX, return JSON response
    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
        return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
    return render(request, "item_list.html", context)
 def url_detail_view(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
    try:
        url_content = UrlContent.objects.get(pk=id)
    except UrlContent.DoesNotExist:
        url_content = {}
    # TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
    # LLM models available
    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
    models = sorted([m.model for m in client.list().models])
    # default_model = "llama3.2:3b"
    context = {
        'url_item': url_item,
        'sources': url_sources,
        'models': models,
        #'default_model': default_model,
        'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
        #"prompt": "Image you are a journalist, TLDR in a paragraph:",
        #"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
        'url_content': url_content,
    }
    return render(request, 'url_detail.html', context)
 def fetch_details(request, id):
    url_item = get_object_or_404(Urls, id=id)
    url_param = request.GET.get("url", "")  # Get URL
    model = request.GET.get("model", "")  # Get LLM model
    text = request.GET.get("text", "")  # Get LLM prompt
    # LLM
    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
    def stream_response():
        msg_content = {
            "role": "user", 
            "content": text,
        }
        response = client.chat(model=model, messages=[msg_content], stream=True)
        for chunk in response:
            yield chunk["message"]["content"]  # Stream each chunk of text
    return StreamingHttpResponse(stream_response(), content_type="text/plain")
		`@@ -0,0 +1,3 @@`

							`* Missing kids posters fetch (num_pages=X)`
							`* ...`
		`@@ -1,3 +0,0 @@`
			`from django.test import TestCase`

			`# Create your tests here.`