tasks scheduler, news views, Obsolete code cleaning

2025-03-21 16:13:56 +01:00
parent 8050773906
commit 0c6b5f1ea4
27 changed files with 1051 additions and 1669 deletions
--- a/1-DB.ipynb
+++ b/1-DB.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -11,7 +11,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -21,24 +21,16 @@
      "db_postgres\n",
      "db_redis\n",
      "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 1/0\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
+      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.1s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.3s \u001b[0m\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.3s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
-      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
-      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.4s \u001b[0m\n",
-      " ⠿ Container db_postgres  \u001b[39mStarting\u001b[0m                                         \u001b[34m0.4s \u001b[0m\n",
+      " ⠿ Container db_redis     \u001b[39mStarting\u001b[0m                                         \u001b[34m0.2s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container db_redis     \u001b[32mStarted\u001b[0m                                          \u001b[34m0.4s \u001b[0m\n",
-      " \u001b[32m✔\u001b[0m Container db_postgres  \u001b[32mStarted\u001b[0m                                          \u001b[34m0.4s \u001b[0m\n",
+      " \u001b[32m✔\u001b[0m Container db_postgres  \u001b[32mStarted\u001b[0m                                          \u001b[34m0.2s \u001b[0m\n",
+      " \u001b[32m✔\u001b[0m Container db_redis     \u001b[32mStarted\u001b[0m                                          \u001b[34m0.2s \u001b[0m\n",
      " \u001b[32m✔\u001b[0m Container adminer      \u001b[32mRunning\u001b[0m                                          \u001b[34m0.0s \u001b[0m\n",
      "\u001b[?25h"
     ]
@@ -50,7 +42,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -167,7 +159,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -219,7 +211,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@@ -268,7 +260,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@@ -293,7 +285,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
--- a/app_urls/README.md
+++ b/app_urls/README.md
@@ -2,8 +2,13 @@
 ```
 conda create -n matitos_urls python=3.12
 conda activate matitos_urls
-pip install django psycopg[binary] django-redis django-rq
+# Core
+pip install django psycopg[binary] django-redis django-tasks-scheduler
+# django-rq
+# Fetcher
 pip install feedparser python-dateutil newspaper4k lxml[html_clean] googlenewsdecoder gnews duckduckgo_search GoogleNews
+# News visualization
+pip install ollama
 ```

 * From automated inspectdb
@@ -89,6 +94,8 @@ RQ_DEFAULT_RESULT_TTL=${RQ_DEFAULT_RESULT_TTL:-3600}
 python manage.py inspectdb
 # Migrations
 python manage.py makemigrations api; python manage.py migrate --fake-initial
+# Create user
+python manage.py createsuperuser
 ```

 * Deploy
--- a/app_urls/api/models.py
+++ b/app_urls/api/models.py
@@ -17,7 +17,7 @@ class Search(models.Model):
        db_table = 'search'

    def __str__(self):
-        return "[{}] {}".format(self.type, self.search)
+        return "[{}]->{}".format(self.type, self.search)

 class Source(models.Model):
    id = models.SmallAutoField(primary_key=True)
@@ -28,7 +28,7 @@ class Source(models.Model):
        db_table = 'source'

    def __str__(self):
-        return self.source
+        return "[{}]".format(self.source)

 class StatusPatternMatching(models.Model):
    pattern = models.TextField(primary_key=True)
@@ -82,7 +82,7 @@ class Urls(models.Model):
        ordering = ["-ts_fetch"]

    def __str__(self):
-        return "{} {} {}".format(self.url, self.ts_fetch, self.status)
+        return "URL: {} Fetch:{} Status:{}".format(self.url, self.ts_fetch, self.status)


 class UrlsDuplicate(models.Model):
@@ -95,8 +95,7 @@ class UrlsDuplicate(models.Model):
        unique_together = (('id_url_canonical', 'id_url_duplicated'),)

    def __str__(self):
-        return Urls(id=self.id_url_duplicated), Urls(id=self.id_url_canonical)
-
+        return "{} {} ".format(self.id_url_duplicated, self.id_url_canonical)

 class UrlsSourceSearch(models.Model):
    id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True)  # The composite primary key (id_url, id_source, id_search) found, that is not supported. The first column is selected.
@@ -109,4 +108,4 @@ class UrlsSourceSearch(models.Model):
        unique_together = (('id_url', 'id_source', 'id_search'),)

    def __str__(self):
-        return Urls(id=self.id_url), Source(id=self.id_source), Search(id=self.id_search)
+        return "{} {} {}".format(self.id_source, self.id_search, self.id_url)
--- a/app_urls/api/obsolete_src/db_utils.py
+++ b/app_urls/api/obsolete_src/db_utils.py
@@ -1,502 +0,0 @@
-import psycopg
-import redis
-import traceback
-import random
-import requests
-import json
-import os
-from .url_utils import process_article
-from .logger import get_logger
-logger = get_logger()
-
-# TODO: URL_DB_HANDLER, _get_search_list, _get_url_host, _get_url_host_list, ...
-# The rest, elsewhere
-
-class DB_Handler():
-    def __init__(self, db_connect_info, redis_connect_info):
-        logger.debug("Initializing URL DB writer")
-        self.db_connect_info = db_connect_info
-        self.redis_instance = redis.Redis(host=redis_connect_info.get("host"), port=redis_connect_info.get("port"))
-        self.redis_expiry_seconds = redis_connect_info.get("expiry_seconds", 172800) # Default: 48 hours
-        
-        try:
-            self.redis_instance.ping()
-            logger.debug("Succesfully pinged Redis")
-        except Exception as e:
-            logger.warning("Error trying to ping Redis: {}".format(str(e)))
-
-    def get_urls_count(self, last_minutes_check):
-        #####################
-        ### Get number of URLs within last X minutes
-        #####################
-        try:
-            # Update
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                num_urls = cursor.execute("SELECT COUNT(*) FROM URLS WHERE ts_fetch >= current_timestamp - interval '{} minutes';".format(last_minutes_check)).fetchone()[0]
-        except Exception as e:
-            logger.warning("Error updating URLs status: {}".format(str(e)))
-            num_urls = None
-        return num_urls
-    
-    def _get_url_host_list(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # List of URL host
-                list_url_host = [l[0] for l in conn.execute("SELECT url_host FROM WEBSITE_OF_INTEREST;").fetchall()]
-            # Clean http / https from URLs
-            list_url_host = [l.replace("https://", "").replace("http://", "") for l in list_url_host]
-            # Clean last slash if exists
-            list_url_host = [ l if not l.endswith("/") else l[:-1] for l in list_url_host]
-        except Exception as e:
-            logger.warning("Exception fetching URL host list: " + str(e))
-            list_url_host = []
-        return list_url_host
-
-    def _get_search_list(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # List of keyword searches
-                list_search_text = [l[0] for l in conn.execute("SELECT keyword_search FROM SEARCH;").fetchall()]
-        except Exception as e:
-            logger.warning("Exception fetching searches list: " + str(e))
-            list_search_text = []
-        return list_search_text
-
-    def _get_feed_urls(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                list_url_feeds = conn.execute("SELECT rss_feed FROM FEED;").fetchall()
-                # Decode (tuple with 1 element)
-                list_url_feeds = [l[0] for l in list_url_feeds]
-        except Exception as e:
-            logger.warning("Exception fetching RSS sites: " + str(e))
-            list_url_feeds = []
-        return list_url_feeds
-    
-    def _get_url_hosts(self):
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                list_url_hosts = conn.execute("SELECT url_host FROM WEBSITE_OF_INTEREST;").fetchall()
-                # Decode (tuple with 1 element)
-                list_url_hosts = [l[0] for l in list_url_hosts]
-        except Exception as e:
-            logger.warning("Exception fetching RSS sites: " + str(e))
-            list_url_hosts = []
-        return list_url_hosts
-
-    def _format(self, values):
-        # Repalce single quote ' with ''. Based on https://stackoverflow.com/a/12320729
-        # String -> 'string', Int -> '1' (string-based), None -> NULL (no quotes for pgSQL to interpret Null value)
-        if (type(values) == list) or (type(values) == tuple):
-            insert_args = "(" + ", ".join([ "NULL" if v is None else "'" + str(v).replace("'", "''") + "'" for v in values]) + ")"
-        elif (type(values) == str):
-            insert_args = "({})".format( "NULL" if values is None else "'" + values.replace("'", "''") + "'" )
-        else:
-            logger.warning("Error formatting input values: {}".format(values))
-            assert False
-        return insert_args
-
-    def _get_cached_canonical_url(self, url):
-        ### Redis: URL processed recently? -> Avoid increasing SERIAL counter & efficiency of DB
-        try:
-            filter_url = self.redis_instance.get(url)
-            if (filter_url is not None):
-                filter_url = filter_url.decode("utf-8")
-        except Exception as e:
-            logger.warning("Exception querying Redis: {}".format(str(e)))
-            filter_url = None
-        return filter_url
-    
-    def _update_urls_status(self, dict_status_ids):
-        #####################
-        ### Update status to array of URL IDs
-        #####################
-        try:
-            # Update
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # Autocommit at end of transaction (Atomic insert of URLs and sources)
-                with conn.transaction() as tx:
-                    for key_status, value_ids in dict_status_ids.items():
-                        cursor.execute("UPDATE URLS SET status='{}' WHERE id IN ({});".format(key_status, ",".join([str(v) for v in value_ids])))
-        except Exception as e:
-            logger.warning("Error updating URLs status: {}".format(str(e)))
-
-    def _get_missing_kids_urls(self, num_urls=None):
-        #####################
-        ### Get list of Missing Kids URLs
-        #####################
-        try:
-            missing_kids_ids_and_urls = []
-            if (num_urls is None):
-                limit = 500
-            else:
-                limit = num_urls
-            offset = 0
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                while True:
-                    # Query
-                    missing_kids_ids_and_urls_query = cursor.execute("SELECT id, url, status FROM URLS WHERE url LIKE '%missingkids.org/poster%' ORDER BY ts_fetch DESC LIMIT {} OFFSET {};".format(limit, offset)).fetchall()
-                    # Finished?
-                    if (len(missing_kids_ids_and_urls_query) == 0):
-                        break
-                    # Extend
-                    missing_kids_ids_and_urls = missing_kids_ids_and_urls + missing_kids_ids_and_urls_query
-                    # Offset
-                    offset += len(missing_kids_ids_and_urls_query)
-                    # Stop?
-                    if (num_urls is not None) and (len(missing_kids_ids_and_urls) >= num_urls):
-                        break
-                        
-        except Exception as e:
-            logger.warning("Error getting Missing Kids URLs: {}".format(str(e)))
-            missing_kids_ids_and_urls = []
-        return missing_kids_ids_and_urls
-    
-    def _get_error_urls(self, num_urls=None):
-        #####################
-        ### Get list of Missing Kids URLs
-        #####################
-        try:
-            error_urls = []
-            if (num_urls is None):
-                limit = 500
-            else:
-                limit = num_urls
-            offset = 0
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                while True:
-                    # Query
-                    error_urls_query = cursor.execute("SELECT id, url FROM URLS WHERE status='error' ORDER BY ts_fetch DESC LIMIT {} OFFSET {};".format(limit, offset)).fetchall()
-                    # Finished?
-                    if (len(error_urls_query) == 0):
-                        break
-                    # Extend
-                    error_urls = error_urls + error_urls_query
-                    # Offset
-                    offset += len(error_urls_query)
-                    # Stop?
-                    if (num_urls is not None) and (len(error_urls) >= num_urls):
-                        break
-                        
-        except Exception as e:
-            logger.warning("Error getting Error URLs: {}".format(str(e)))
-            error_urls = []
-        return error_urls
-
-    def _decode_urls(self, urls_fetched, list_domains_to_filter, list_pattern_status_tuple): # TODO: language for urls_fetched...
-        """
-        # TODO: REFACTOR
-        For each input url
-
-            Already processed? 
-                -> Update on Redis expire time
-                -> Associate to source
-            Not processed? Get main URL:
-                -> URL Canonical valid? 
-                    -> Rely on this as main URL
-                -> URL Canonical not valid?
-                    -> Use input url, unless it's a news.google.com link
-                        -> If news.google.com link, filter out. REDIS?
-            Main URL processing:
-                -> Update in REDIS, association url -> url_canonical
-                -> url != url_canonical: Add in duplicate table
-                    If both != news.google.com
-        """
-
-        # URLs to insert, URLs duplicated association, URL to Canonical form
-        list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical = [], [], {}
-
-        # URL VS CANONICAL:
-        # News URL returned: https://news.google.com/articles/CBMifmh0dHBzOi8vd3d3LmJyZWl0YmFydC5jb20vMm5kLWFtZW5kbWVudC8yMDIzLzA0LzAzL2dvdi1kZXNhbnRpcy1zaWducy1iaWxsLW1ha2luZy1mbG9yaWRhLXRoZS0yNnRoLWNvbnN0aXR1dGlvbmFsLWNhcnJ5LXN0YXRlL9IBAA?hl=en-US&gl=US&ceid=US%3Aen
-        # Corresponds to canonical URL: https://www.breitbart.com/2nd-amendment/2023/04/03/gov-desantis-signs-bill-making-florida-the-26th-constitutional-carry-state/
-
-        for url in urls_fetched:
-            # Domain to filter? Input url
-            filter_due_to_domain = False
-            for domain_to_filter in list_domains_to_filter:
-                if (domain_to_filter in url):
-                    logger.debug("Domain filter applied based on {} for input URL: {}".format(domain_to_filter, url))
-                    filter_due_to_domain = True
-            if (filter_due_to_domain):
-                continue
-
-            # URL processed recently? -> Filter and avoid increasing SERIAL counter & efficiency of DB
-            cached_canonical_url = self._get_cached_canonical_url(url)
-            if (cached_canonical_url is not None):
-                # Even if url processed, need to add url_canonical to list_filtered_urls, so as to associate search source to canonical URL (canonical is the main URL entry)
-                dict_full_urls_to_canonical[url] = cached_canonical_url # X -> Y
-                # If url has been processed, so was its canonical form
-                logger.debug("Filtering out already inserted (processed) URL and its canonical form: {} {}".format(url, cached_canonical_url))
-                continue
-
-            # Process TODO: Add language...
-            url_canonical, article_elements, article_status = process_article(url, list_pattern_status_tuple)
-            # TODO: Store article_elements information to insert into OS after inserted into DB (and therefore having associated url_id)
-
-            # Could not retrieve redirection for news.google.com based URL? Continue (avoid inserting in DB)
-            if (url_canonical is None) and ("news.google.com" in url):
-                logger.debug("Filtering empty canonical link for base URL based on news.google.com: {}".format(url))
-                continue
-            # Canonical URL still news.google.com? Continue (avoid inserting in DB)
-            if (url_canonical is not None) and ("news.google.com" in url_canonical):
-                logger.debug("Filtering canonical news.google.com based URL: {}".format(url_canonical))
-                continue
-
-            # Domain to filter? Input canonical_url
-            filter_due_to_domain = False
-            for domain_to_filter in list_domains_to_filter:
-                if (url_canonical is not None) and (domain_to_filter in url_canonical):
-                    filter_due_to_domain = True
-            if (filter_due_to_domain):
-                logger.info("Filtering due to domain input URL, Canonical_URL: {} {}".format(url, url_canonical))
-                continue
-
-            if (url_canonical is None) or (article_status == "error"):
-                logger.debug("Processing failed for URL: {}".format(url))
-                # Still insert URL with "error"? -> If processed later, might have inconsistent sources (url vs url_canonical). Only store if not news.google.com based
-                if ("news.google.com" in url) or ("consent.google.com" in url):
-                    logging.debug("Not able to process Google News link, skipping: {}".format(url))
-                else:
-                    dict_full_urls_to_canonical[url] = url # X -> X
-                    list_insert_url_tuple_args.append( (url, article_status) )
-                continue
-            
-            # URL was not processed (not sure canonical yet). Generate URL_CANONICAL <-> URL_ORIGINAL association if they're different
-            if (url_canonical != url):
-                list_tuple_canonical_duplicate_urls.append( (url_canonical, url) )
-            # Dict: url -> canonical (update association)
-            dict_full_urls_to_canonical[url] = url_canonical # X -> Y or X
-
-            # Canonical URL processed recently? -> Filter and avoid increasing SERIAL counter & efficiency of DB
-            if (self._get_cached_canonical_url(url_canonical) is not None):
-                # Canonical URL was already processed
-                logger.debug("Filtering out already inserted (processed) URL canonical: {}".format(url_canonical))
-            else:
-                # Insert url_canonical to DB formatted
-                list_insert_url_tuple_args.append( (url_canonical, article_status) )
-                # Canonical URL different? Process
-                if (url_canonical != url):
-                    if ("news.google.com" in url) or ("consent.google.com" in url):
-                        logging.debug("Not adding google.news.com based link, skipping: {}".format(url))
-                    else:
-                        # Fetched url -> duplicate (using canonical as main link)
-                        article_status = "duplicate"
-                        # Insert url (non-canonical) to DB formatted
-                        list_insert_url_tuple_args.append( (url, article_status) )
-
-        return list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical
-    
-    def _insert_urls(self, cursor, list_insert_url_tuple_args):
-        #####################
-        ### Insert URLs with status
-        #####################
-        if (len(list_insert_url_tuple_args) > 0):
-            insert_args = ', '.join( [ self._format(t) for t in list_insert_url_tuple_args] )
-            # Insert. (url_1, status_1), (url_2, status_2), ...
-            sql_code = "INSERT INTO URLS {} VALUES {} ON CONFLICT (url) DO NOTHING;".format("(url, status)", insert_args)
-            # logger.debug("SQL CODE: {}".format(sql_code))
-            c = cursor.execute(sql_code)
-            # NOTE: Not using "RETURNING id" since previously inserted URLs are not returned (ON CONFLICT)
-            # https://stackoverflow.com/questions/35949877/how-to-include-excluded-rows-in-returning-from-insert-on-conflict/35953488#35953488
-    
-    def _insert_urls_duplicated(self, cursor, list_tuple_canonical_duplicate_urls):
-        #####################
-        ### Insert duplicated URLs
-        #####################
-        if (len(list_tuple_canonical_duplicate_urls) > 0):
-            # Flatten, format, set to remove duplicates
-            args_duplicated_urls_set = "(" + ', '.join( set( [ "'" + str(y).replace("'", "''") + "'" for x in list_tuple_canonical_duplicate_urls for y in x] ) ) + ")"
-
-            # Dict: url -> id
-            dict_url_to_id = {}
-            # Get url -> id association to populate duplicated URLs
-            for (id_, url_) in cursor.execute("SELECT id, url FROM URLS WHERE url IN {};".format(args_duplicated_urls_set)).fetchall():
-                dict_url_to_id[url_] = id_
-            
-            # Convert tuples (url_canonical, url) -> (id_url_canonical, id_url) to insert in DB
-            # ORIGINAL CODE. Issue, might not have found association to all urls
-            ### list_tuple_canonical_duplicate_urls_ids = [ (dict_url_to_id[t[0]], dict_url_to_id[t[1]]) for t in list_tuple_canonical_duplicate_urls]
-
-            list_tuple_canonical_duplicate_urls_ids = []
-            for (url_1, url_2) in list_tuple_canonical_duplicate_urls:
-                id_url_1, id_url_2 = dict_url_to_id.get(url_1), dict_url_to_id.get(url_2)
-                if (id_url_1 is None) or (id_url_2 is None):
-                    logger.debug("Skipping duplicate association due to no url -> id_url mapping available for tuple: {} {}".format(url_1, url_2))
-                else:
-                    list_tuple_canonical_duplicate_urls_ids.append( (id_url_1, id_url_2) )
-            
-            if (len(list_tuple_canonical_duplicate_urls_ids) > 0):
-                insert_args = ', '.join( [ self._format(t) for t in list_tuple_canonical_duplicate_urls_ids] )
-                # Insert. (id_url_canonical_1, id_url_1), ...
-                sql_code = "INSERT INTO URLS_DUPLICATE {} VALUES {} ON CONFLICT DO NOTHING;".format("(id_url_canonical, id_url_duplicated)", insert_args)
-                # logger.debug("SQL CODE: {}".format(sql_code))
-                c = cursor.execute(sql_code)
-
-    def _get_pattern_status_list(self):
-        #####################
-        ### Get list of domains to filter
-        #####################
-        # TODO: Cache on redis and query once every N hours? ...
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # TODO: Cache on Redis
-                list_pattern_status = cursor.execute("SELECT pattern, priority, status FROM STATUS_PATTERN_MATCHING;").fetchall()
-        except Exception as e:
-            logger.warning("Error getting pattern status list: {}".format(str(e)))
-            list_pattern_status = []
-        return list_pattern_status
-
-    def _get_domains_to_filter(self):
-        #####################
-        ### Get list of domains to filter
-        #####################
-        # TODO: Cache on redis and query once every N hours? ...
-        try:
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # TODO: Cache on Redis
-                sites_to_filter = [e[0] for e in cursor.execute("SELECT url_host FROM WEBSITE_TO_FILTER;").fetchall() ]
-        except Exception as e:
-            logger.warning("Error getting domains to filter: {}".format(str(e)))
-            sites_to_filter = []
-        return sites_to_filter
-
-    def _get_cached_source_id(self, source):
-        ### Redis: URL processed recently? -> Avoid increasing SERIAL counter & efficiency of DB
-        try:
-            source_id = self.redis_instance.get(source)
-            if (source_id is not None):
-                source_id = source_id.decode("utf-8")
-        except Exception as e:
-            logger.warning("Exception querying Redis: {}".format(str(e)))
-            source_id = None
-        return source_id
-    
-    def _get_source_id(self, cursor, source):
-        #####################
-        ### Get source corresponding id
-        #####################
-        # Cached?
-        id_source = self._get_cached_source_id(source)
-        if (id_source is None):
-            c = cursor.execute("SELECT id FROM SOURCE WHERE source='{}'".format(source.replace("'", "''"))).fetchone()
-            if (c is None) or (len(c) == 0):
-                # Source does not exist, insert and get id
-                c = cursor.execute("INSERT INTO SOURCE (source) VALUES ('{}') RETURNING id;".format(source.replace("'", "''"))).fetchone()
-            # Decode source id
-            id_source = c[0]
-        # Cache
-        print("*"*10, source, id_source)
-        self.redis_instance.set(source, id_source, ex=self.redis_expiry_seconds)
-        return id_source
-    
-    def _get_urls_id(self, cursor, urls_full):
-        #####################
-        ### Get id of inserted and filtered URLs
-        #####################
-        # TODO: Cache url -> url_id, url_canonical
-        if (len(urls_full) == 0):
-            return []
-        # Get inserted and filtered URL ids (unnested). Filtered URLs are also retrieved since they might have been fetched from a new source
-        in_inserted_filtered_urls = "(" + ', '.join(["'" + u.replace("'", "''") + "'" for u in urls_full]) + ")"
-        id_urls_related = [ i[0] for i in cursor.execute("SELECT id FROM URLS WHERE url IN {};".format(in_inserted_filtered_urls)).fetchall() ]
-        return id_urls_related
-
-    def _insert_urls_source(self, cursor, id_urls_related, id_source):
-        #####################
-        ### Insert URL sources: (id_url_1, id_source), (id_url_2, id_source), ...
-        #####################
-        if (len(id_urls_related) == 0) or (id_source is None):
-            return
-        columns = "(id_url, id_source)"
-        insert_args = ', '.join( [ self._format([id_url, id_source]) for id_url in id_urls_related ] )
-        # Insert
-        sql_code = "INSERT INTO URLS_SOURCE {} VALUES {} ON CONFLICT DO NOTHING;".format(columns, insert_args)
-        # logger.debug("SQL CODE: {}".format(sql_code))
-        c = cursor.execute(sql_code)
-
-    def write_batch(self, urls_fetched, source):
-        # Chunks of 50 elements
-        n = 50
-        # Divide in small chunks
-        urls_fetched_chunks = [urls_fetched[i:i + n] for i in range(0, len(urls_fetched), n)]
-        # Process
-        for urls_fetched_chunk_i in urls_fetched_chunks:
-            self._write_small_batch(urls_fetched_chunk_i, source)
-
-    def _write_small_batch(self, urls_fetched, source):
-        try:
-            logger.info("Fetched #{} URLs, source: {}".format(len(urls_fetched), source))
-
-            if (len(urls_fetched) == 0):
-                logger.debug("Empty batch of urls (not writing to DB) for source: {}".format(source))
-                return
-            
-            # Shuffle URLs to reduce continuous URLs of same URL host (minimize chance of being blocked for too many continuous requests)
-            random.shuffle(urls_fetched)
-            
-            # Get list of domains to filter
-            list_domains_to_filter = self._get_domains_to_filter()
-            # Get list of (pattern, priority, status) tuples to override status if required
-            list_pattern_status_tuple = self._get_pattern_status_list()
-            # Sort pattern tuples by priority
-            list_pattern_status_tuple.sort(key=lambda tup: tup[1], reverse=True)
-            
-            # Process URLs to update DB
-            list_insert_url_tuple_args, list_tuple_canonical_duplicate_urls, dict_full_urls_to_canonical = self._decode_urls(urls_fetched, list_domains_to_filter, list_pattern_status_tuple)
-            # Full set of URL and its canonical form (to associate them to a search), both to insert and filter
-            urls_full = set(dict_full_urls_to_canonical.keys()).union( set(dict_full_urls_to_canonical.values()) )
-
-            # Insert
-            with psycopg.connect(self.db_connect_info) as conn:
-                # Open cursor
-                cursor = conn.cursor()
-                # Autocommit at end of transaction (Atomic insert of URLs and sources)
-                with conn.transaction() as tx:
-                    # Insert processed URLs
-                    self._insert_urls(cursor, list_insert_url_tuple_args)
-                    # Insert URLs duplicated (canonical != fetched url)
-                    self._insert_urls_duplicated(cursor, list_tuple_canonical_duplicate_urls)
-
-                    # Get source id in DB
-                    id_source = self._get_source_id(cursor, source)
-                    # Get IDs of all related URLs
-                    id_urls_related = self._get_urls_id(cursor, urls_full)
-                    # Insert search source associated to URLs
-                    self._insert_urls_source(cursor, id_urls_related, id_source)
-
-            # Update Redis status of inserted and filtered URLs after writing to DB
-            for url, url_canonical in dict_full_urls_to_canonical.items():
-                try:
-                    # Set with updated expiry time
-                    self.redis_instance.set(url, url_canonical, ex=self.redis_expiry_seconds)
-                    if (url != url_canonical):
-                        self.redis_instance.set(url_canonical, url_canonical, ex=self.redis_expiry_seconds)
-                except Exception as e:
-                    logger.warning("Exception running set in Redis: {}".format(str(e)))
-
-            if (len(list_insert_url_tuple_args) > 0):
-                try:
-                    webhook_token = os.environ.get("CLIQ_WEBHOOK_TOKEN")
-                    endpoint_message = "https://cliq.zoho.com/api/v2/channelsbyname/urlretrievalbot/message?zapikey={}".format(webhook_token)
-                    
-                    payload = json.dumps({"text": "Fetched #{} new URLs, source: {}".format(len(list_insert_url_tuple_args), source) })
-                    r = requests.post(endpoint_message, data=payload)
-                except Exception as e:
-                    logger.warning("Webhook failed: {}".format(str(e)))
-
-            logger.debug("URL DB write finished")
-        except Exception as e:
-            logger.warning( "Exception writing to URL_DB:\n{}".format(traceback.format_exc()) )
-            logger.debug( "Exception --- List of URLs: {}".format(str(urls_fetched)) )
--- a/app_urls/api/obsolete_src/fetch_feed.py
+++ b/app_urls/api/obsolete_src/fetch_feed.py
@@ -1,48 +0,0 @@
-from .db_utils import DB_Handler
-import feedparser
-import dateutil
-from .logger import get_logger
-logger = get_logger()
-
-class FetchFeeds():
-    def __init__(self, db_handler: DB_Handler) -> None:
-        logger.debug("Initializing News feed")
-        self.db_handler = db_handler
-    
-    def run(self):
-        try:
-            logger.debug("Starting NewsFeed.run()")
-            # Get feeds
-            list_url_feeds = self.db_handler._get_feed_urls()
-            logger.debug("Fetching news from feeds: {}".format(str(list_url_feeds)))
-
-            # Process via RSS feeds
-            for url_feed in list_url_feeds:
-                # Initialize
-                urls_fetched, urls_publish_date = [], []
-                # Fetch feeds
-                feeds = feedparser.parse(url_feed)
-                # Parse
-                for f in feeds.get("entries", []):
-                    # Get URL
-                    url = f.get("link", None)
-                    # Process?
-                    if (url is not None):
-                        # Available publish date?
-                        publish_date_parsed = f.get("published_parsed")
-                        if (publish_date_parsed is None):
-                            publish_date = f.get("published", None)
-                            if (publish_date is not None):
-                                publish_date_parsed = dateutil.parser.parse(publish_date)
-                        
-                        # Published date
-                        urls_publish_date.append(publish_date_parsed)
-                        # URL
-                        urls_fetched.append(url)
-
-                # URL fetching source
-                source = "feed {}".format(url_feed)
-                # Write to DB
-                self.db_handler.write_batch(urls_fetched, source)
-        except Exception as e:
-            logger.warning("Exception in NewsFeed.run(): {}".format(str(e)))
--- a/app_urls/api/obsolete_src/fetch_parser.py
+++ b/app_urls/api/obsolete_src/fetch_parser.py
@@ -1,45 +0,0 @@
-from .db_utils import DB_Handler
-import newspaper
-from .logger import get_logger
-logger = get_logger()
-
-class FetchParser():
-    def __init__(self, db_handler: DB_Handler) -> None:
-        logger.debug("Initializing News SiteParsing newspaper4k")
-        self.db_handler = db_handler
-
-    # TODO: MOVE LOGIC ELSEWHERE!
-    def _postprocess(self, article_urls):
-        return [url.replace("#comment-stream", "") for url in article_urls]
-    
-    def run(self):
-        try:
-            logger.debug("Starting NewsSiteParsing.run() for {}")
-
-            # Get URL hosts
-            list_url_hosts = self.db_handler._get_url_hosts()
-            logger.info("Fetching news by parsing URL hosts: {}".format(str(list_url_hosts)))
-
-            # Process newspaper4k build method
-            for url_host_feed in list_url_hosts:
-                # Protocol
-                if not (url_host_feed.startswith("http")):
-                    url_host_feed_formatted = "https://" + url_host_feed
-                else:
-                    url_host_feed_formatted = url_host_feed
-
-                logger.debug("Fetching newspaper4k parsing based on URL: {}".format(url_host_feed_formatted))
-                # Source object
-                url_host_built = newspaper.build(url_host_feed_formatted)
-                # Get articles URL list
-                urls_fetched = url_host_built.article_urls()
-                # TODO: MOVE!
-                # Post-processing
-                urls_fetched = self._postprocess(urls_fetched)
-
-                # URL fetching source
-                source = "newspaper4k {}".format(url_host_feed)
-                # Write to DB
-                self.db_handler.write_batch(urls_fetched, source)
-        except Exception as e:
-            logger.warning("Exception in NewsSiteParsing.run(): {}".format(str(e)))
--- a/app_urls/api/obsolete_src/fetch_search.py
+++ b/app_urls/api/obsolete_src/fetch_search.py
@@ -1,73 +0,0 @@
-from .db_utils import DB_Handler
-from .utils import get_searxng_instances
-from .fetch_search_sources import FetcherDuckDuckGo, FetcherGNews, FetcherGoogleNews, FetcherSearxNews, FetcherPreSearch
-from .logger import get_logger
-logger = get_logger()
-
-class FetchSearcher():
-    def __init__(self, db_handler: DB_Handler, full=True) -> None:
-        logger.debug("Initializing News feed")
-        self.db_handler = db_handler
-        self.full_search = full
-    
-    def _run_fetching(self, search_text):
-        logger.debug("Starting _run_fetching() for {}".format(search_text))
-        
-        # Common parameters
-        lang, region = "en", "US"
-
-        ### PreSearch
-        dict_params_news = {"search": search_text}
-        FetcherPreSearch(**dict_params_news).fetch_articles(self.db_handler)
-
-        ### DuckDuckGo
-        period = "d"
-        dict_params_news = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "news", "period": period}
-        FetcherDuckDuckGo(**dict_params_news).fetch_articles(self.db_handler)
-        dict_params_general = {"search": search_text, "lang": "wt", "region": "wt", "search_category": "general", "period": period}
-        FetcherDuckDuckGo(**dict_params_general).fetch_articles(self.db_handler)
-
-        if (self.full_search):
-            # Avoid site:{} search due to G-Bypass required time
-            if ("site:" not in search_text):
-                ### GNews
-                dict_params = {"search": search_text, "lang": "wt", "region": "wt", "period": period}
-                FetcherGNews(**dict_params).fetch_articles(self.db_handler)
-
-                ### GoogleNews
-                dict_params_news = {"search": search_text, "lang": lang, "region": region, "search_category": "news", "period": period}
-                FetcherGoogleNews(**dict_params_news).fetch_articles(self.db_handler)
-                # dict_params_general = {"search": search_text, "lang": lang, "region": region, "search_category": "general", "period": period}
-
-            if False:
-                ### SearxNG
-                period = "day"
-                for searx_instance in get_searxng_instances():
-                    dict_params_news = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "news", "period": period}
-                    dict_params_general = {"search": search_text, "searx_instance": searx_instance, "lang": lang, "region": region, "search_category": "general", "period": period}
-                    # Append thread
-                    FetcherSearxNews(**dict_params_news).fetch_articles(self.db_handler)
-                    FetcherSearxNews(**dict_params_general).fetch_articles(self.db_handler)
-
-        logger.debug("Finished _run_fetching()")
-                    
-    def run(self):
-        try:
-            logger.info("Fetching text searches & URL hosts of interest")
-
-            # Get text searches of interest
-            list_search_text_of_interest = self.db_handler._get_search_list()
-
-            # Get URL host of interest
-            list_url_host = self.db_handler._get_url_host_list()
-            # Get text searches for URL hosts
-            list_search_text_url_host = ["site:{}".format(l) for l in list_url_host]
-
-            for search_text in list_search_text_of_interest + list_search_text_url_host:
-                logger.debug("Fetching news for search: {}".format(search_text))
-                self._run_fetching(search_text)
-
-            logger.info("Finished fetching text searches & URL hosts of interest")
-        except Exception as e:
-            logger.warning("Exception in NewsSearch.run(): {}".format(str(e)))
-        
--- a/app_urls/api/obsolete_src/fetch_search_sources.py
+++ b/app_urls/api/obsolete_src/fetch_search_sources.py
@@ -1,384 +0,0 @@
-from duckduckgo_search import DDGS
-from gnews import GNews
-from GoogleNews import GoogleNews
-
-import requests
-from bs4 import BeautifulSoup
-import os
-import time
-import json
-import numpy as np
-import random
-from .google_bypass import GoogleByPass
-from abc import ABC, abstractmethod
-from .logger import get_logger
-logger = get_logger()
-
-
-
-# Generic fetcher (fetches articles, writes to DB)
-class FetcherAbstract(ABC):
-    @abstractmethod
-    def _fetch(self):
-        pass
-
-    def fetch_articles(self, db_writer):
-        logger.debug("Starting fetch() for {}".format(self.name))
-        # Fetch articles
-        list_news = self._fetch()
-        logger.info("Found #{} articles for search: {}".format(len(list_news), self.name))
-        # Write to DB
-        db_writer.write_batch(list_news, self.name)
-
-# https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
-
-user_agents_list = [
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; rv:111.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.44",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 OPR/96.0.0.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 OPR/97.0.0.0",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.34",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.39",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; rv:112.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.51",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
-    "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 YaBrowser/23.3.0.2246 Yowser/2.5 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36                       (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
-    "Mozilla/5.0 (Windows NT 6.1; rv:102.0) Gecko/20100101 Goanna/6.0 Firefox/102.0 PaleMoon/32.0.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.41",
-    "Mozilla/5.0 (Windows NT 10.0; rv:110.0) Gecko/20100101 Firefox/110.0",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 YaBrowser/23.1.5.708 Yowser/2.5 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
-    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
-]
-
-
-
-
-
-class FetcherPreSearch(FetcherAbstract):
-    def __init__(self, search):
-        """
-        # period ->
-        - h = hours (eg: 12h)
-        - d = days (eg: 7d)
-        - m = months (eg: 6m)
-        - y = years (eg: 1y)
-        """
-        self.search = search
-        self.period = "1d" # TODO Fixed for the moment
-        # self.lang = lang
-        # self.region = region
-        search_category = "news"
-        self.name = "presearch {} {} {}".format(search, search_category, self.period)
-
-    def _fetch(self):
-        try:
-            # PreSearch fetching endpoint, parameter search keyword
-            presearch_fetch_endpoint = "http://selenium_app:80/fetch_presearch/?search_keyword={}".format(self.search)
-            # Timeout: 15 minutes
-            r = requests.get(presearch_fetch_endpoint, timeout=900)
-            # Decode
-            list_news = json.loads(r.text).get("list_urls", [])
-        except Exception as e:
-            logger.warning("Timeout on request: {}. {}".format(presearch_fetch_endpoint, str(e)))
-            list_news = []
-        return list_news
-
-
-
-class FetcherGNews(FetcherAbstract):
-    def __init__(self, search, period, lang="en", region="US"):
-        """
-        # period ->
-        - h = hours (eg: 12h)
-        - d = days (eg: 7d)
-        - m = months (eg: 6m)
-        - y = years (eg: 1y)
-        """
-        self.search = search
-        self.period = period
-        self.lang = lang
-        self.region = region
-        search_category = "news"
-        self.name = "gnews {} {} {} {}".format(search, search_category, period, "{}-{}".format(lang, region))
-
-    def _fetch(self):
-        try:
-            list_dict_news = GNews(self.lang, self.region, period=self.period).get_news(self.search)
-            # Decode
-            list_news = []
-            for l in list_dict_news:
-                list_news.append(l.get("url"))
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []
-
-        # Bypass Google links
-        list_news_redirections = GoogleByPass().bypass_google_urls(list_news)
-
-        return list_news_redirections
-
-class FetcherGoogleNews(FetcherAbstract):
-    def __init__(self, search, search_category="news", period="1d", lang="en", region="US"):
-        assert(search_category in ["news", "general"])
-
-        self.lang = lang
-        self.region = region
-        self.period = period
-        self.search_category = search_category
-        self.search = search
-        self.name = "googlenews {} {} {} {}".format(search, search_category, period, "{}-{}".format(lang, region))
-
-    def _fetch(self):
-        try:
-            # Initialize
-            g = GoogleNews(encode="utf-8", period=self.period, lang=self.lang, region=self.region)
-            g.enableException(True)
-
-            if (self.search_category == "general"):
-                set_links = set()
-                # Search
-                g.search(self.search)
-
-                # Iterate pages
-                MAX_ITER_PAGES = 15
-                for i in range(MAX_ITER_PAGES):
-                    time.sleep(random.uniform(1, 1.5))
-                    num_before = len(set_links)
-
-                    # Get page
-                    try:
-                        links = g.page_at(i)
-                    except Exception as e:
-                        logger.warning("Exception fetching page in GoogleNews {}: {}".format(self.name, str(e)))
-                        break
-                    # Links
-                    for l in links:
-                        # '/url?esrc=s&q=&rct=j&sa=U&url=https://www.breitbart.com/news/scent-of-luxury-indias-jasmine-infuses-global-perfume/&ved=2ahUKEwjOybGSiN-AAxX1gv0HHfqSBpMQxfQBegQICBAC&usg=AOvVaw06GdoHyzPbIopUaEuUSQPQ'
-                        url = l.get("link").split("url=")[-1]
-                        set_links.add(url)
-                    
-                    num_after = len(set_links)
-
-                    # Finished?
-                    if (num_before == num_after):
-                        logger.debug("Iterated {} pages on GoogleNews general search".format(i))
-                        break
-                # To list
-                list_news = list(set_links)
-            elif (self.search_category == "news"):
-                # Search
-                g.get_news(self.search)
-                # Fetch
-                list_news = g.get_links()
-
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []
-
-        # Bypass Google links
-        list_news_redirections = GoogleByPass().bypass_google_urls(list_news)
-        
-        return list_news_redirections
-
-class FetcherDuckDuckGo(FetcherAbstract):
-    def __init__(self, search, search_category, period, lang="wt", region="wt"):
-        assert(search_category in ["news", "general"])
-        assert(period in ["d", "w", "m", "y"])
-        self.search = search
-        self.search_category = search_category
-        self.period = period
-        self.lang_region = "{}-{}".format(lang, region)
-        self.name = "duckduckgo {} {} {} {}".format(search, search_category, "1{}".format(period), region)
-
-    def _fetch(self):
-        try:
-            list_news = []
-            with DDGS(timeout=10) as ddgs:
-                if (self.search_category == "general"):
-                    generator_links = ddgs.text(keywords=self.search, timelimit=self.period, region=self.lang_region)
-                elif (self.search_category == "news"):
-                    generator_links = ddgs.news(keywords=self.search, timelimit=self.period, region=self.lang_region)
-                
-                for l in generator_links:
-                    list_news.append( l.get("url", l.get("href")) )
-                    
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []
-        return list_news
-
-
-class FetcherSearxNews(FetcherAbstract):
-    def __init__(self, search="child abuse", searx_instance="https://serx.ml/", lang="en", region="US", search_category="news", period="day"):
-        assert(search_category in ["news", "general"])
-        assert(period in [None, "day", "week", "month", "year"])
-        # Random header (minimize prob of web-scrapping detection)
-        self.headers = {
-            'User-agent': str(np.random.choice(user_agents_list)),
-            'Accept-Encoding': 'gzip, deflate', 
-            'Accept': '*/*', 
-            'Connection': 'keep-alive',
-        }
-        """ # Optional header
-        self.headers = {
-            'User-agent': str(np.random.choice(user_agents_list)),
-            'Accept-Encoding': 'gzip, deflate, br', 
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-            'TE': 'trailers',
-            'Sec-Fetch-Site': 'cross-site',
-            'Sec-Fetch-Mode': 'navigate',
-            'Sec-Fetch-Dest': 'document',
-        }
-        """
-        self.search = search
-        self.searx_instance = searx_instance
-        self.lang_region = "{}-{}".format(lang, region)
-        self.search_category = search_category
-        self.period = period
-        self.t_sleep_lower, self.t_sleep_higher = 0.5, 1.5
-        self.request_timeout = 240
-
-        period_name_mapping = {
-            None: "no_date_range",
-            "day": "1d",
-            "week": "1w",
-            "month": "1m",
-            "year": "1y",
-        }
-        self.name = "searxng {} {} {} {} {}".format(searx_instance.replace("https://", "").replace("/", ""), search, search_category, period_name_mapping[period], self.lang_region)
-        logger.info("SearX - Initialized SearX fetcher: {}".format(self.name))
-    
-    def _request_and_decode(self, url_search):
-        # Initial random time sleep (minimize chance of getting blocked)
-        time.sleep(random.uniform(self.t_sleep_lower, self.t_sleep_higher))
-        # Request
-        logger.debug("SearX - Searching: {}".format(url_search))
-        try:
-            r = requests.get(url_search, headers=self.headers, timeout=self.request_timeout)
-        except Exception as e:
-            logger.warning("SearX - Exception in request: {}".format(url_search), "\n", str(e))
-            return []
-        
-        if (r.status_code == 200):
-            # Status code Ok
-            pass
-        elif (r.status_code == 429):
-            # TooManyRequests, "Rate limit exceeded"
-            logger.warning("SearX {} - Too many requests while running: {}. Request output: {}".format(self.name, r.url, r.text))
-            return []
-        elif (r.status_code != 200):
-            logger.warning("SearX {} - Status code: {}. Request output: {}".format(self.name, r.status_code, r.text))
-            return []
-        else:
-            logger.debug("SearX - Status code: {}".format(r.status_code))
-
-        # Decode request
-        soup = BeautifulSoup(r.text, 'html.parser')
-        page_url_set = set()
-        # h3 links
-        for elem in soup.find_all('h3'):
-            # Get url
-            url = elem.find('a').get('href')
-            page_url_set.add(url)
-        return page_url_set
-
-    def _get_news_list(self):
-        ############################################################
-        # Domain & search parameter
-        search_domain = os.path.join(self.searx_instance, "search?q=")
-        # Search keywords
-        search_formatted = self.search.replace(" ", "+").replace(":", "%3A")
-        # Period formatted
-        period_formatted = "&time_range={}".format(self.period) if self.period is not None else ""
-        # Search parameters
-        search_parameters = "&category_{}=on&language={}{}".format(self.search_category, self.lang_region, period_formatted)
-        # Combined url search
-        url_search_nopage = "{}{}{}".format(search_domain, search_formatted, search_parameters)
-        ############################################################
-
-        # Request and decode on page=1
-        url_set = self._request_and_decode(url_search_nopage)
-        # No results?
-        if (len(url_set) == 0):
-            logger.warning("SearX {} - Empty results on search: {}".format(self.name, url_search_nopage))
-            return []
-
-        # Iterate pages
-        search_numpage = 2
-        while True:
-            # Combine url search with page number
-            url_search_with_page = "{}&pageno={}".format(url_search_nopage, search_numpage)
-            # Request and decode on page=X
-            url_set_i = self._request_and_decode(url_search_with_page)
-
-            # Length before merging
-            length_current = len(url_set)
-            # Merge
-            url_set = url_set.union(url_set_i)
-            # Length after merging
-            length_merged = len(url_set)
-
-            # No new elements?
-            if (length_current == length_merged):
-                logger.debug("SearX {} - Finished processing search, #pages: {}".format(self.name, search_numpage))
-                break
-            # Next page
-            search_numpage += 1
-        
-        return list(url_set)
-
-    def _fetch(self):
-        try:
-            # Fetch news
-            list_news = self._get_news_list()
-        except Exception as e:
-            logger.warning("Exception fetching {}: {}".format(self.name, str(e)))
-            list_news = []        
-        return list_news
--- a/app_urls/api/obsolete_src/google_bypass.py
+++ b/app_urls/api/obsolete_src/google_bypass.py
@@ -1,26 +0,0 @@
-import requests
-import json
-from .logger import get_logger
-logger = get_logger()
-
-class GoogleByPass():
-    def __init__(self) -> None:
-        pass
-
-    def bypass_google_urls(self, list_urls):
-        if (len(list_urls) == 0):
-            return []
-        
-        try:
-            # Endpoint
-            gbypass_endpoint = "http://selenium_app:80/get_redirection"
-            # Timeout: 20 minutes
-            timeout = 60*20
-            r = requests.post(gbypass_endpoint, json={"list_urls": list_urls}, timeout=timeout)
-            # Decode
-            list_urls_redirections = json.loads(r.text).get("list_urls_redirections", [])
-        except Exception as e:
-            logger.warning("Exception on request: {}. {}".format(gbypass_endpoint, str(e)))
-            list_urls_redirections = []
-            
-        return list_urls_redirections
--- a/app_urls/api/obsolete_src/logger.py
+++ b/app_urls/api/obsolete_src/logger.py
@@ -1,22 +0,0 @@
-import logging
-
-import os
-os.makedirs("logs", exist_ok=True)
-
-logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')
-logger = logging.getLogger("news_fetcher")
-logger.setLevel(logging.INFO)
-
-# To file log: INFO / WARNING / ERROR
-fh = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher.log", mode="a", maxBytes=10000000, backupCount=4)
-fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
-logger.addHandler(fh)
-
-# To file log: WARNING / ERROR
-fh_ = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_error.log", mode="a", maxBytes=10000000, backupCount=1)
-fh_.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
-fh_.setLevel(logging.WARNING)
-logger.addHandler(fh_)
-
-def get_logger():
-    return logger
--- a/app_urls/api/obsolete_src/missing_kids_fetch.py
+++ b/app_urls/api/obsolete_src/missing_kids_fetch.py
@@ -1,36 +0,0 @@
-from .db_utils import DB_Handler
-import requests
-import json
-from .logger import get_logger
-logger = get_logger()
-
-class MissingKidsFetch():
-    def __init__(self, db_handler: DB_Handler, num_pages) -> None:
-        logger.debug("Initializing News MissingKids")
-        self.db_handler = db_handler
-        self.num_pages = num_pages
-        self.missingkids_fetch_endpoint = "http://selenium_app:80/get_missing_kids/?pages={}"
-    
-    def run(self):
-        try:
-            logger.debug("Starting NewsMissingKids.run()")
-            try:
-                # Timeout
-                if (self.num_pages > 15):
-                    timeout = 60*90 # 1.5h
-                else:
-                    timeout = 60*5  # 5 min
-                # Request
-                r = requests.get(self.missingkids_fetch_endpoint.format(self.num_pages), timeout=timeout)
-                # Decode
-                urls_fetched = json.loads(r.text).get("list_urls", [])
-            except Exception as e:
-                logger.warning("Timeout on request: {}. {}".format(missingkids_fetch_endpoint, str(e)))
-                urls_fetched = []
-
-            # URL fetching source
-            source = "missingkids fetcher"
-            # Write to DB
-            self.db_handler.write_batch(urls_fetched, source)
-        except Exception as e:
-            logger.warning("Exception in NewsMissingKids.run(): {}".format(str(e)))
--- a/app_urls/api/obsolete_src/missing_kids_status.py
+++ b/app_urls/api/obsolete_src/missing_kids_status.py
@@ -1,98 +0,0 @@
-from .db_utils import URL_DB_Writer
-from .url_utils import get_missing_kid_status
-from .logger import get_logger
-logger = get_logger()
-
-
-def get_missing_kid_status(url, return_canonical_url=False):
-    import time
-    import requests
-
-    # Sleep
-    time.sleep(0.75)
-    try:
-        # Request
-        r = requests.get(url, timeout=300)
-        # Decode
-        status_code = r.status_code
-        # Canonical URL removing parameters
-        url_canonical = r.url
-    except Exception as e:
-        logger.warning("Exception on get URL status request: {}. {}".format(url, str(e)))
-        status_code = None
-        url_canonical = url
-    
-    if (status_code == 200):
-        status = "valid"
-    elif (status_code == 404):
-        status = "invalid"
-    else:
-        status = "unknown"
-
-    logger.debug("Missing Kid URL {} status: {}".format(url, status))
-    if (return_canonical_url):
-        return status, url_canonical
-    else:
-        return status
-
-class MissingKidsStatus():
-    def __init__(self, db_connect_info, redis_connect_info, num_urls) -> None:
-        self.num_urls = num_urls
-        self.db_connect_info = db_connect_info
-        self.redis_connect_info = redis_connect_info
-        self.db_writer = URL_DB_Writer(db_connect_info, redis_connect_info)
-
-    def update_missing_kids_status(self):
-        try:
-            logger.info("Starting updating status to Missing Kids URLs, limit #URLs: {}".format(self.num_urls))
-            # List of URLs
-            list_ids_and_urls = self.db_writer._get_missing_kids_urls(self.num_urls)
-            # Dict: status -> IDs to update to new status
-            dict_status_ids, dict_status_urls = {}, {}
-            # Check URLs with invalid status?
-            skip_invalid_check = False
-
-            flush_every, flush_current = 20, 0
-            # Iterate URLs
-            for (id, url, current_status) in list_ids_and_urls:
-                # Skip duplicate URLs
-                if (current_status == "duplicate"):
-                    continue
-                # Skip invalid URLs?
-                if (skip_invalid_check):
-                    if (current_status == "invalid"):
-                        continue
-
-                # Get status
-                new_status = get_missing_kid_status(url)
-                # Different? Update
-                if (current_status != new_status):
-                    # Extend array
-                    dict_status_ids[new_status] = dict_status_ids.get(new_status, []) + [id]
-                    # Debugging dict
-                    dict_status_urls[new_status] = dict_status_urls.get(new_status, []) + [url]
-                    # +1 processed
-                    flush_current += 1
-
-                # Flush batch?
-                if (flush_every == flush_current):
-                    logger.info("Updating status to Missing Kids URLs: {}".format(dict_status_urls))
-                    # Update DB
-                    self.db_writer._update_urls_status(dict_status_ids)
-                    # Reset
-                    flush_current = 0
-                    dict_status_ids, dict_status_urls = {}, {}
-
-            # Flush remaining batch
-            if (flush_current > 0):
-                logger.info("Updating status to Missing Kids URLs: {}".format(dict_status_urls))
-                # Update DB
-                self.db_writer._update_urls_status(dict_status_ids)
-                # Reset
-                flush_current = 0
-                dict_status_ids, dict_status_urls = {}, {}
-            
-            logger.info("Finished updating status to Missing Kids URLs")
-        except Exception as e:
-            logger.warning("Exception in MissingKidsStatus.run(): {}".format(str(e)))
-        
--- a/app_urls/api/obsolete_src/url_status.py
+++ b/app_urls/api/obsolete_src/url_status.py
@@ -1,62 +0,0 @@
-from .db_utils import URL_DB_Writer
-from .url_utils import process_article
-from .logger import get_logger
-logger = get_logger()
-
-class UpdateErrorURLs():
-    def __init__(self, db_connect_info, redis_connect_info, num_urls) -> None:
-        self.num_urls = num_urls
-        self.db_connect_info = db_connect_info
-        self.redis_connect_info = redis_connect_info
-        self.db_writer = URL_DB_Writer(db_connect_info, redis_connect_info)
-
-    def update_error_urls_status(self):
-        try:
-            logger.info("Starting updating status to URLs with error, limit #URLs: {}".format(self.num_urls))
-            # List of URLs with status 'error'
-            list_ids_and_urls = self.db_writer._get_error_urls(self.num_urls)
-            # Current status
-            current_status = "error"
-            # Dict: status -> IDs to update to new status
-            dict_status_ids, dict_status_urls = {}, {}
-
-            # Get list of (pattern, priority, status) tuples to override status if required
-            list_pattern_status_tuple = self.db_writer._get_pattern_status_list()
-            # Sort pattern tuples by priority
-            list_pattern_status_tuple.sort(key=lambda tup: tup[1], reverse=True)
-
-            flush_every, flush_current = 20, 0
-            # Iterate URLs
-            for (id, url) in list_ids_and_urls:
-                # Get status
-                url_canonical, article_elements, new_status = process_article(url, list_pattern_status_tuple)
-                # Different? Update
-                if (current_status != new_status):
-                    # Extend array
-                    dict_status_ids[new_status] = dict_status_ids.get(new_status, []) + [id]
-                    # Debugging dict
-                    dict_status_urls[new_status] = dict_status_urls.get(new_status, []) + [url]
-                    # +1 processed
-                    flush_current += 1
-
-                # Flush batch?
-                if (flush_every == flush_current):
-                    logger.info("Updating status to URLs with error: {}".format(dict_status_urls))
-                    # Update DB
-                    self.db_writer._update_urls_status(dict_status_ids)
-                    # Reset
-                    flush_current = 0
-                    dict_status_ids, dict_status_urls = {}, {}
-
-            # Flush remaining batch
-            if (flush_current > 0):
-                logger.info("Updating status to URLs with error: {}".format(dict_status_urls))
-                # Update DB
-                self.db_writer._update_urls_status(dict_status_ids)
-                # Reset
-                flush_current = 0
-                dict_status_ids, dict_status_urls = {}, {}
-            
-            logger.info("Finished updating status to URLs with error")
-        except Exception as e:
-            logger.warning("Exception in UpdateErrorURLs.run(): {}".format(str(e)))
--- a/app_urls/api/obsolete_src/url_utils.py
+++ b/app_urls/api/obsolete_src/url_utils.py
@@ -1,263 +0,0 @@
-from gnews import GNews
-import dateutil.parser
-from datetime import datetime, timedelta
-from .utils import remove_http_s
-import time
-import random
-import traceback
-import requests
-import json
-import re
-from bs4 import BeautifulSoup
-
-from .logger import get_logger
-logger = get_logger()
-
-def get_published_date(article):
-    try:
-        """
-        # Already fetched publish date information?
-        if (publish_date_ is not None):
-            return publish_date_
-        """
-        
-        # List of potential publish dates
-        potential_dates = []
-        # Publish date is the best match
-        potential_dates.append(article.publish_date)
-        # Publish date metadata is the following best match
-        potential_dates.append(article.meta_data.get('article', {}).get("published_time", None))
-        # Iterate remaining keys
-        for key in article.meta_data.keys():
-            if ("date" in key):
-                potential_dates.append(article.meta_data[key])
-
-        def invalid_date(p_date):
-            # Today + 2 days, article from the future?
-            today_plus_two = datetime.utcnow() + timedelta(days=2)
-            # Article from the future?
-            return p_date.timestamp() > today_plus_two.timestamp()
-        
-        for date_ in potential_dates:
-            # String date? parse
-            if (type(date_) == str):
-                try:
-                    date_ = dateutil.parser.parse(date_)
-                except Exception as e:
-                    logger.info("Invalid date found while parsing potential date: {} for URL: {}".format(date_, article.url))
-                    date_ = None
-            # Valid?
-            if (date_ is not None) and (not invalid_date(date_)):
-                return date_
-            
-        logger.debug("Article with no published date: {}".format(article.url))
-        return None
-    except Exception as e:
-        logger.info("Error while retrieving published date for URL: {}".format(article.url))
-        return None
-
-def get_url_host(article_source_url, url):
-    # https://www.blabla.com/blabla -> www.blabla.com
-    if (article_source_url != ""):
-        # Article source URL already extracted, save path if any
-        return remove_http_s(article_source_url) # .split("/")[0]
-    else:
-        return remove_http_s(url).split("/")[0]
-
-def get_status_pattern_matching(url, article_status, list_pattern_status_tuple):
-    # Regex pattern to update status on "valid", "invalid", and "unknown" status only
-    # Status "raw", "duplicated" and "error" should remain the way they are
-    # Assumption: List of patterns sorted by importance
-    if (article_status in ["valid", "invalid", "unknown"]):
-        # Regular expression pattern matching: https://regexr.com/
-        for regex_pattern, regex_priority, status_if_match in list_pattern_status_tuple:
-            # Matching?
-            matching = bool(re.match(regex_pattern, url))
-            # Update article status
-            if (matching):
-                if (status_if_match != article_status):
-                    logger.debug("Regex pattern found, updating status from '{}' to '{}' for URL: {}".format(article_status, status_if_match, url))
-                return status_if_match
-    # Pattern matching not required or not found, original article status
-    return article_status
-
-
-
-def bypass_google_link(article_url):
-
-    def bypass_google_consent(article_url):
-        # Sample URL: https://consent.google.com/m?continue=https://news.google.com/rss/articles/CBMiMGh0dHBzOi8vd3d3Lm1pc3NpbmdraWRzLm9yZy9wb3N0ZXIvbmNtYy84NjAxMTkvMdIBAA?oc%3D5&gl=NL&m=0&pc=n&cm=2&hl=en-US&src=1
-        article_url_no_consent = article_url.replace("https://consent.google.com/m?continue=", "")
-
-        # https://stackoverflow.com/questions/76063646/how-can-i-have-redirection-link-from-google-news-link-using-requests
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
-        }
-        cookies = {'CONSENT': 'YES+cb.20220419-08-p0.cs+FX+111'}
-
-        try:
-            # Request
-            r = requests.get(article_url_no_consent, headers=headers, cookies=cookies, timeout=300)
-            # Decode
-            soup = BeautifulSoup(r.text, 'html.parser')
-            url_of_interest = soup.a['href']
-        except Exception as e:
-            logger.warning("Exception on request trying to G_bypass with headers: {}. {}".format(article_url_no_consent, str(e)))
-            url_of_interest = None
-        
-        # Not able to bypass?
-        if (url_of_interest == "") or ("support.google.com" in url_of_interest) or ("news.google.com" in url_of_interest):
-            url_of_interest = None
-        return url_of_interest
-
-    def bypass_google_using_service(article_url):
-        try:
-            # e.g.: url = "https://news.google.com/articles/CBMiX2h0dHBzOi8vd3d3LmZveGJ1c2luZXNzLmNvbS9wb2xpdGljcy9kaXNuZXktc3Vlcy1mbG9yaWRhLWdvdi1yb24tZGVzYW50aXMtbG9zcy1zcGVjaWFsLWRpc3RyaWN00gEA?hl=en-US&gl=US&ceid=US%3Aen"
-            gbypass_endpoint = "http://selenium_app:80/get_redirection"
-            # Timeout: 5 minutes
-            r = requests.post(gbypass_endpoint, json={"url": article_url}, timeout=300)
-            # Decode
-            redirect_url = json.loads(r.text).get("redirect_url", "")
-        except Exception as e:
-            logger.warning("Exception on request: {}. {}".format(gbypass_endpoint, str(e)))
-            redirect_url = ""
-            
-        return redirect_url
-    
-    logger.debug("Starting gbypass_endpoint()")
-    
-    article_url_bypassed = None
-    # Bypass using request
-    if ("consent.google.com" in article_url):
-        article_url_bypassed = bypass_google_consent(article_url)
-    # Not bypassed yet? Bypass using service
-    if (article_url_bypassed is None):
-        article_url_bypassed = bypass_google_using_service(article_url)
-
-    # if (article_url_bypassed is None) or (article_url_bypassed == "") or ("news.google.com" in article_url_bypassed):
-    if (article_url_bypassed == "") or (article_url_bypassed is None):
-        # Empty URL returned by Gbypass
-        logger.warning("Error while bypassing Gnews for URL: {}".format(article_url))
-        return None
-    else:
-        logger.debug("Correctly bypassed GNews to URL_redirect, from URL: {} {}".format(article_url_bypassed, article_url))
-        return article_url_bypassed
-
-def process_article(article_url, list_pattern_status_tuple, language="en"):
-    # TODO:
-    """
-    https://github.com/fhamborg/news-please
-    https://github.com/fhamborg/Giveme5W1HQwer123$
-    
-    https://github.com/santhoshse7en/news-fetch
-    """
-    try:
-        logger.debug("Starting process_article()")
-
-        if ("news.google.com" in article_url) or ("consent.google.com" in article_url):
-            # Bypass to get redirection
-            article_url = bypass_google_link(article_url)
-            # Error?
-            if (article_url is None):
-                return None, {}, "error"
-        elif ("missingkids.org/poster" in article_url):
-            # Get status
-            article_status, url_canonical = get_missing_kid_status(article_url, return_canonical_url=True)
-            article_elements = {
-                "url_full": article_url,
-                "url_canonical": url_canonical
-            }
-            return url_canonical, article_elements, article_status
-        else:
-            # Avoid Too many requests (feeds, ...)
-            time.sleep(0.75)
-
-        logger.debug("Processing: {}".format(article_url))
-
-        # Default status unless something happens
-        article_status = "valid"
-
-        # Parse article
-        # TODO: :param proxy: The proxy parameter is a dictionary with a single key-value pair. self._proxy = {'http': proxy, 'https': proxy} if proxy else None
-        # TODO: Language per config
-        article = GNews(language).get_full_article(url=article_url)
-
-        # Article parsed?
-        if (article is None) or (not article.is_parsed):
-            logger.debug("Article not parsed: {}".format(article_url))
-            return article_url, {}, "error"
-
-        # Canonical link as main URL
-        url_canonical = article.canonical_link
-        # Empty canonical URL?
-        if (article.canonical_link is None) or (article.canonical_link == ""):
-            # URL with parameters? e.g. some zerohedge news fetched from newspaper3k end with #comment-stream -> Remove extra parameter in link
-            if ("?" in article.url) or (article.url.endswith("#comment-stream")) or (article.url.endswith("#disqus_thread")):
-                logger.debug("Article URL contains parameters, trying to clean URL: {}".format(article.url))
-                try:
-                    # Remove text after parameter call
-                    url = article.url.split("?")[0]
-                    # Remove comment-stream
-                    url = url.replace("#comment-stream", "").replace("#disqus_thread", "")
-                    # Article
-                    article_attempt = GNews(language).get_full_article(url=url)
-                    # Retrieving same title? Update article based on clean URL
-                    if (article_attempt is not None) and (article_attempt.title == article.title):
-                        article = article_attempt
-                except Exception as e:
-                    logger.info("Article parsing of URL without parameters failed: {}".format(article.url))
-            else:  # Default behaviour
-                logger.debug("Article canonical link is empty, assuming URL=URL_CANONICAL: {}".format(article.url))
-
-            # By default, URL same as canonical
-            url_canonical = article.url
-
-        elif (article.url != article.canonical_link):
-            # If different, stick to canonical URL
-            logger.debug("Article URL and canonical link are different: {} {}".format(article.url, article.canonical_link))
-        else:
-            # If same, continue...
-            pass
-        
-        # Update config to determine if content is valid
-        article.config.MIN_WORD_COUNT = 150
-        article.config.MIN_SENT_COUNT = 6
-        
-        # Valid URL?
-        if (not article.is_valid_url()):
-            logger.debug("Not a valid news article: {}".format(url_canonical))
-            article_status = "invalid"
-        # Is the article's body text is long enough to meet standard article requirements?
-        if (not article.is_valid_body()):
-            logger.debug("Article body not valid: {}".format(url_canonical))
-            article_status = "unknown"
-
-        if (article.images != article.imgs):
-            logger.debug("Article images and imgs are different: {} {}".format(article.images, article.imgs))
-
-        # article.keywords, article.meta_keywords, article.summary
-        # article.movies 
-        # article.top_image
-
-        # Check if article status needs to be updated
-        article_status = get_status_pattern_matching(url_canonical, article_status, list_pattern_status_tuple)
-
-        article_elements = {
-            'url_full': article.url,                          # https://www.breitbart.com/tech/2022/10/03/report-election-integrity-project-worked-with-feds-to-censor-news-sites-in-2020/
-            'url_host': get_url_host(article.source_url, url_canonical),    # www.breitbart.com
-            'title': article.title,                           # Report: ‘Election Integrity’ Partnership Worked with Feds to Censor News Sites in 2020
-            'description': article.meta_description,          # Coalition committed to respond in ‘early 2022’ but failed to do so, while Labor has not issued a full response since taking office
-            'text': article.text,                             # ${Article content}
-            'published_date': get_published_date(article),    # python.datetime format, obtained from "YYYY-MM-DD" or '2022-10-03T20:54:17+00:00'
-            'authors': article.authors,                       # ['Christopher Knaus']
-            'language': article.meta_lang,                    # en
-            'tags': list(article.tags),                       # ['Wide Open Border', '’My Son Hunter’ Movie', ...]
-            'images': list(article.images),                   # [URL_IMAGE_1, URL_IMAGE_2, ...]
-            'url_canonical': url_canonical,                   # Canonical URL (redirection)
-            # 'html': article.html,                           # HTML article
-        }
-        logger.debug("Processing OK: {}".format(url_canonical))
-        return url_canonical, article_elements, article_status
-    except Exception as e:
-        logger.warning("Exception processing url: {}\n{}".format(article_url, traceback.format_exc()))
-        return None, {}, "error"
--- a/app_urls/api/obsolete_src/utils.py
+++ b/app_urls/api/obsolete_src/utils.py
@@ -1,33 +0,0 @@
-
-def remove_http_s(url):
-    url = url.replace("https://", "") if url.startswith("https://") else url
-    url = url.replace("http://", "") if url.startswith("http://") else url
-    return url
-
-def is_valid_url(url):
-    if (url.startswith("https://")):
-        return True
-    else:
-        return False
-    
-def get_searxng_instances():
-    # SearxNG instances: https://searx.space/
-    searx_instances = set()
-    searx_instances.add("https://searx.work/")
-    searx_instances.add("https://search.ononoki.org/")
-    searx_instances.add("https://searxng.nicfab.eu/")
-    searx_instances.add("https://searx.be/")    
-    
-    # searx_instances.add("https://searx.fmac.xyz/")
-    # searx_instances.add("https://northboot.xyz/") # FIX
-    
-    # searx_instances.add("https://serx.ml/") # Offline
-    # searx_instances.add("https://searx.ru/")
-    # searx_instances.add("https://searx.sp-codes.de/")
-    # searx_instances.add("https://searxng.nicfab.eu/")
-    # searx_instances.add("https://s.frlt.one/")
-    # searx_instances.add("https://search.sapti.me/")
-    
-    # To list
-    list_searx_instances = list(searx_instances)
-    return list_searx_instances
--- a/app_urls/api/src/db_utils.py
+++ b/app_urls/api/src/db_utils.py
@@ -14,8 +14,6 @@ class DB_Handler():
        self._cache_timeout_insert_url = 86400
        # Processing error URL, cache time: 2 days
        self._cache_timeout_error_url = 86400*2
-        # URL host slowdown
-        self.url_host_slowdown_seconds = 5

    def insert_raw_urls(self, urls, obj_source, obj_search):        
        try:
@@ -90,13 +88,6 @@ class DB_Handler():
            if (obj_url.status != status):
                obj_url.status = status
                obj_url.save()
-                # updating_urls.append(obj_url)
-
-            # TODO: Fix enum type issue. Bulk update instead of .save() for each object
-            # List of objects to bulk update
-            # updating_urls = []
-            # ... general processing, append to updating_urls
-            # Urls.objects.bulk_update(updating_urls, ['status'])

        ##### Filter URL? -> Invalid
        if (status_pattern_match == "invalid"):
--- a/app_urls/api/tasks.py
+++ b/app_urls/api/tasks.py
@@ -1,11 +1,11 @@
-from django_rq import job
+# from django_rq import job
+from scheduler import job

 from .src.fetch_feed import FetchFeeds
 from .src.fetch_parser import FetchParser
 from .src.fetch_search import FetchSearcher
 from .src.db_utils import DB_Handler
 '''
-from src.fetch_search import FetchSearcher
 from src.missing_kids_fetch import MissingKidsFetch
 from src.missing_kids_status import MissingKidsStatus
 '''
@@ -13,12 +13,56 @@ from src.missing_kids_status import MissingKidsStatus
 from .src.logger import get_logger
 logger = get_logger()

-@job
+@job('default')
 def fetch_feeds():
-    logger.info("Task triggered: {}".format("FetchFeeds"))
+    task = "Fetch Feeds"
+    logger.info("Task triggered: {}".format(task))
    FetchFeeds().run()
+    logger.info("Task completed: {}".format(task))

-@job
+@job('default')
+def fetch_parser():
+    task = "Fetch Parser"
+    logger.info("Task triggered: {}".format(task))
+    FetchParser().run()
+    logger.info("Task completed: {}".format(task))
+
+@job('default')
+def fetch_search():
+    task = "Fetch Search"
+    logger.info("Task triggered: {}".format(task))
+    FetchSearcher().run()
+    logger.info("Task completed: {}".format(task))
+
+# TODO: fetch_missing_kids()
+
+@job('default')
+def process_raw_urls(batch_size=50):
+    task = "Process raw URLs"
+    logger.info("Task triggered: {}".format(task))
+    DB_Handler().process_raw_urls(batch_size=batch_size)
+    logger.info("Task completed: {}".format(task))
+
+@job('default')
+def process_error_urls(batch_size=50):
+    task = "Process error URLs"
+    logger.info("Task triggered: {}".format(task))
+    DB_Handler().process_error_urls(batch_size=batch_size)
+    logger.info("Task completed: {}".format(task))
+
+@job('default')
+def process_missing_kids_urls(batch_size=50):
+    task = "Process Missing Kids URLs"
+    logger.info("Task triggered: {}".format(task))
+    DB_Handler().process_missing_kids_urls(batch_size=batch_size)
+    logger.info("Task completed: {}".format(task))
+
+
+
+
+
+
+@job('default')
 def background_task(process_type: str):
    logger.info("Task triggered: {}".format(process_type))

@@ -46,21 +90,11 @@ def background_task(process_type: str):

        
        '''
-            
-        elif (process_type == "search") or (process_type == "search_full"):
-            FetchSearcher(cred.db_connect_info, cred.redis_connect_info, full=True).run()
-        elif (process_type == "search_reduced"):
-            FetchSearcher(cred.db_connect_info, cred.redis_connect_info, full=False).run()
-
        # Selenium based
        elif (process_type == "fetch_missing_kids_reduced"):
            MissingKidsFetch(db_handler, num_pages=4).run()
        elif (process_type == "fetch_missing_kids_full"):
            MissingKidsFetch(db_handler, num_pages=100000).run()
-
-        else:
-            logger.error("Task error, unknown type: {}".format(process_type))
-            return
        '''

        logger.info("Task completed: {}".format(process_type))
--- a/app_urls/api/templates/item_list.html
+++ b/app_urls/api/templates/item_list.html
@@ -0,0 +1,508 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>News</title>
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
+
+    <script>
+        
+        function getQueryString(pageNumber, itemsNumber, sources, statuses){
+            // Query parameters. If input is null, get most recent value
+            let queryParams = new URLSearchParams(window.location.search);
+            // page
+            if (pageNumber == null) pageNumber = queryParams.get("page") ?? 1;
+            queryParams.set("page", pageNumber);
+            // items
+            if (itemsNumber == null) itemsNumber = queryParams.get("items") ?? 15;
+            queryParams.set("items", itemsNumber);
+            // sources
+            if (sources == null) sources = queryParams.get("sources") ?? "all";
+            queryParams.set("sources", sources);
+            // status
+            if (statuses == null) statuses = queryParams.get("status") ?? "all";
+            queryParams.set("status", statuses);
+            
+            // Encoding fix: %2C -> ,
+            let queryParamsString = queryParams.toString();
+            while (queryParamsString.includes("%2C")) {
+                queryParamsString = queryParamsString.replace("%2C", ",");
+            }
+            return queryParamsString;
+        }
+
+        function loadPage(pageNumber, itemsNumber, sources, statuses) {
+            $("#item-list").fadeTo(100, 0.5); // Smooth fade effect
+            $("#loading").show();
+            
+            queryParamsString = getQueryString(pageNumber, itemsNumber, sources, statuses);
+
+            $.ajax({
+                url: "?" + queryParamsString,
+                type: "GET",
+                headers: { "X-Requested-With": "XMLHttpRequest" },
+                success: function (data) {
+                    $("#item-list").fadeTo(0, 1).html(data.items_html); // Restore opacity smoothly
+                    $("#loading").hide();
+                    // Update URL without reloading
+                    window.history.pushState({}, "", "?" + queryParamsString);
+                }
+            });
+        }
+
+        ////////////////////////////////////////////////////////////////////////////
+        // Pagination
+        ////////////////////////////////////////////////////////////////////////////
+        $(document).on("click", ".pagination a", function (event) {
+            event.preventDefault();
+            let page = $(this).attr("data-page");
+            loadPage(pageNumber=page, itemsNumber=null, sources=null, statuses=null);
+        });
+
+        $(document).ready(function () {
+
+            ////////////////////////////////////////////////////////////////////////////
+            // Filter updates
+            ////////////////////////////////////////////////////////////////////////////
+            const sourcesToggleAll = $("#toggle-all-sources");
+            const sourcesCheckboxes = $(".source-checkbox");
+            const statusesToggleAll = $("#toggle-all-status");
+            const statusCheckboxes = $(".status-checkbox");
+
+            function updateFilters() {
+                // Get selected sources
+                let selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
+                    return $(this).val();
+                }).get().join(",");
+
+                // Get selected URL statuses
+                let selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
+                    return $(this).val();
+                }).get().join(",");
+
+                // Get selected items per page
+                let selectedItems = $("input[name='items']:checked").val();
+
+                // Update pagination and reload data
+                loadPage(1, selectedItems, selectedSources, selectedStatuses);
+            }
+
+            ////////////////////////////////////////////////////////////////////////////
+            // Change triggers
+            ////////////////////////////////////////////////////////////////////////////
+            // Sources
+            sourcesToggleAll.on("change", function () {
+                sourcesCheckboxes.prop("checked", sourcesToggleAll.prop("checked"));
+                updateFilters();
+            });
+            sourcesCheckboxes.on("change", function () {
+                sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
+                updateFilters();
+            });
+            // Status
+            statusesToggleAll.on("change", function () {
+                statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
+                updateFilters();
+            });
+            statusCheckboxes.on("change", function () {
+                // If all checkboxes are checked, mark "Toggle All" as checked
+                statusesToggleAll.prop("checked", statusCheckboxes.length === statusCheckboxes.filter(":checked").length);
+                updateFilters();
+            });            
+
+            // Items change trigger update
+            $(".items").on("change", updateFilters);
+
+            ////////////////////////////////////////////////////////////////////////////
+            // Default values
+            ////////////////////////////////////////////////////////////////////////////
+            // Sources
+            sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
+            sourcesToggleAll.prop("checked", true);
+            // Statuses
+            statusCheckboxes.each(function () { $(this).prop("checked", true); });
+            statusesToggleAll.prop("checked", true);
+            // Items
+            $("input[name='items'][value='" + 15 + "']").prop("checked", true);
+        });
+
+        ////////////////////////////////////////////////////////////////////////////
+        // Theme logic
+        ////////////////////////////////////////////////////////////////////////////
+        function setTheme(mode) {
+            document.documentElement.setAttribute("data-theme", mode);
+            document.documentElement.setAttribute("data-bs-theme", mode);
+            localStorage.setItem("theme", mode);
+            document.getElementById("theme-icon").innerHTML = mode === "dark" ? "🌞" : "🌙";
+            document.body.classList.toggle("dark-mode", mode === "dark");
+        }
+
+        function toggleTheme() {
+            let currentTheme = document.documentElement.getAttribute("data-theme");
+            setTheme(currentTheme === "dark" ? "light" : "dark");
+        }
+
+        document.addEventListener("DOMContentLoaded", function () {
+            let savedTheme = localStorage.getItem("theme") || 
+                (window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
+            setTheme(savedTheme);
+        });
+        ////////////////////////////////////////////////////////////////////////////
+    </script>
+
+    <style>
+        /* Content Area */
+        #content {
+            margin-left: 170px; /* Match sidebar width */
+            min-width: calc(100vw - 170px); /* Ensure it doesn't shrink into the sidebar */
+            width: calc(100vw - 170px); /* Expands based on screen size */
+            padding: 20px;
+            overflow-x: auto; /* Prevent content from being squeezed */
+            transition: margin-left 0.3s ease;
+        }
+
+        /* Sidebar Styles */
+        #sidebar {
+            height: 100vh;
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 170px; /* Default width */
+            background-color: var(--bg-color);
+            box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
+            padding: 15px;
+            transition: width 0.3s ease;
+        }
+
+        #sidebar .nav-link {
+            color: var(--text-color);
+        }
+
+        #sidebar .nav-link:hover {
+            background-color: var(--pagination-hover-bg);
+        }
+
+        /* ============================= */
+        /* Responsive Enhancements       */
+        /* ============================= */
+        @media (min-width: 1200px) {
+            .table {
+                width: 95%; /* Allows table to take more space */
+                margin: 0 auto; /* Centers the table */
+            }
+        }
+        
+        @media (max-width: 768px) {
+            #sidebar {
+                width: 70px; /* Collapse sidebar to smaller width */
+                /*padding: 10px;*/
+            }
+
+            #content {
+                margin-left: 70px; /* Adjust margin to match collapsed sidebar */
+                min-width: calc(100vw - 70px); /* Prevent overlap */
+                /*padding: 10px;*/
+            }
+
+            /* Adjust table for small screens */
+            .table-responsive {
+                overflow-x: auto;
+            }
+
+            .table th,
+            .table td {
+                white-space: nowrap; /* Prevent text wrapping in cells */
+            }
+
+            .table a {
+                word-break: break-word; /* Ensure long URLs break properly */
+            }
+        }
+
+        /* ============================= */
+        /* Global Styles                 */
+        /* ============================= */
+        body {
+            background-color: var(--bg-color);
+            color: var(--text-color);
+            transition: background-color 0.3s, color 0.3s;
+        }
+
+        /* ============================= */
+        /* Light & Dark Mode Variables   */
+        /* ============================= */
+        :root {
+            --bg-color: #ffffff;
+            --text-color: #212529;
+            --table-bg: #ffffff;
+            --table-text: #000000;
+            --table-border: #dee2e6;
+            --link-color: #007bff;
+            --pagination-bg: #ffffff;
+            --pagination-border: #dee2e6;
+            --pagination-hover-bg: #f8f9fa;
+            --pagination-active-bg: #007bff;
+            --pagination-active-text: #ffffff;
+            --button-bg: #f8f9fa;
+            --button-border: #ced4da;
+            --button-text: #212529;
+        }
+
+        [data-theme="dark"] {
+            --bg-color: #121212;
+            --text-color: #e0e0e0;
+            --table-bg: #1e1e1e;
+            --table-text: #ffffff;
+            --table-border: #2c2c2c;
+            --link-color: #9ec5fe;
+            --pagination-bg: #1e1e1e;
+            --pagination-border: #444;
+            --pagination-hover-bg: #333;
+            --pagination-active-bg: #007bff;
+            --pagination-active-text: #ffffff;
+            --button-bg: #1e1e1e;
+            --button-border: #444;
+            --button-text: #e0e0e0;
+        }
+
+        /* ============================= */
+        /* Table Styling                 */
+        /* ============================= */
+        .table-responsive {
+            width: 100%; /* Ensure it spans the full width of its container */
+            max-width: 100%;
+            overflow-x: auto;
+        }
+
+        .table {
+            background-color: var(--table-bg);
+            color: var(--table-text);
+            border: 1px solid var(--table-border);
+            transition: background-color 0.3s, color 0.3s;
+            
+            width: 100%; /* Ensures it takes full width of its container */
+            table-layout: auto; /* Allows columns to adjust dynamically */
+            /*white-space: nowrap;*/ /* Prevents text wrapping in cells */
+        }
+
+        .table th,
+        .table td {
+            border-color: var(--table-border);
+        }
+
+        .table thead {
+            background-color: var(--pagination-active-bg);
+            color: var(--pagination-active-text);
+        }
+
+        [data-theme="dark"] .table {
+            background-color: var(--table-bg);
+            color: var(--table-text);
+        }
+
+        [data-theme="dark"] .table th,
+        [data-theme="dark"] .table td {
+            border-color: var(--table-border);
+        }
+
+        [data-theme="dark"] .table thead {
+            background-color: #333;
+            color: #fff;
+        }
+        
+        th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
+        th:nth-child(2), td:nth-child(2) { width: 20%; } /* Fetch Date */
+        th:nth-child(3), td:nth-child(3) { width: 20%; } /* Sources */
+        th:nth-child(4), td:nth-child(4) { width: 5%; } /* Status */
+        th:nth-child(5), td:nth-child(5) { width: 5%; } /* Action */
+
+        /* ============================= */
+        /* Pagination Styling            */
+        /* ============================= */
+        .pagination {
+            display: flex;
+            justify-content: center;
+            padding: 10px 0;
+        }
+
+        .pagination .page-link {
+            background-color: var(--pagination-bg);
+            border-color: var(--pagination-border);
+            color: var(--text-color);
+            padding: 10px 14px;
+            margin: 0 5px;
+            border-radius: 8px;
+            transition: background-color 0.3s, color 0.3s, transform 0.2s;
+        }
+
+        .pagination .page-link:hover {
+            background-color: var(--pagination-hover-bg);
+            transform: scale(1.05);
+        }
+
+        .pagination .active .page-link {
+            background-color: var(--pagination-active-bg);
+            color: var(--pagination-active-text);
+            border-color: var(--pagination-active-bg);
+        }
+
+        /* ============================= */
+        /* Theme Toggle Button           */
+        /* ============================= */
+        .theme-toggle-btn {
+            background-color: var(--button-bg);
+            border: 1px solid var(--button-border);
+            color: var(--button-text);
+            border-radius: 50%;
+            width: 40px;
+            height: 40px;
+            font-size: 20px;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            transition: background-color 0.3s, color 0.3s, transform 0.2s;
+            cursor: pointer;
+        }
+
+        .theme-toggle-btn:hover {
+            background-color: var(--pagination-hover-bg);
+            transform: rotate(20deg);
+        }
+
+        .theme-toggle-btn:active {
+            transform: scale(0.95);
+        }
+
+        /* ============================= */
+        /* Loading Spinner Styling       */
+        /* ============================= */
+        #loading {
+            position: fixed;
+            left: 50%;
+            top: 50%;
+            transform: translate(-50%, -50%);
+            z-index: 1050;
+            display: none;
+        }
+
+        .spinner-border {
+            width: 4rem;
+            height: 4rem;
+        }
+
+    </style>
+
+</head>
+<body>
+
+    <!-- Left Sidebar -->
+    <div id="sidebar" class="d-flex flex-column">
+        <ul class="nav flex-column">
+
+            <!-- Theme Toggle Button -->
+            <div class="nav-item">
+                <button onclick="toggleTheme()" class="theme-toggle-btn">
+                    <span id="theme-icon">🌙</span>
+                </button>
+            </div>
+            
+            <!-- Sources -->
+            <div class="nav-item mt-3">
+                <strong>Select sources</strong>
+                <form id="source-filter-form">
+                    <!-- Toggle All Checkbox -->
+                    <div class="form-check">
+                        <input class="form-check-input" type="checkbox" id="toggle-all-sources">
+                        <label class="form-check-label fw-bold" for="toggle-all-sources">
+                            Toggle all
+                        </label>
+                    </div>
+
+                    <!-- Individual Source Checkboxes -->
+                    {% for source in sources %}
+                        <div class="form-check">
+                            <input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
+                            <label class="form-check-label" for="source-{{ source.id }}">
+                                {{ source.source }}
+                            </label>
+                        </div>
+                    {% empty %}
+                    <tr>
+                        <td colspan="2" class="text-center">No sources available.</td>
+                    </tr>
+                    {% endfor %}
+                </form>
+            </div>
+
+            <!-- Status -->
+            <div class="nav-item mt-3">
+                <strong>Select status</strong>
+                <form id="status-filter-form">
+                    <!-- Toggle All Checkbox -->
+                    <div class="status-form-check">
+                        <input class="form-check-input" type="checkbox" id="toggle-all-status">
+                        <label class="form-check-label fw-bold" for="toggle-all-status">
+                            Toggle all
+                        </label>
+                    </div>
+
+                    <!-- Individual Status Checkboxes -->
+                    {% for status in list_status %}
+                        <div class="status-form-check">
+                            <input class="form-check-input status-checkbox" type="checkbox" value="{{ status }}" id="status-{{ status }}">
+                            <label class="form-check-label" for="status-{{ status }}">
+                                {{ status }}
+                            </label>
+                        </div>
+                    {% empty %}
+                    <tr>
+                        <td colspan="2" class="text-center">No sources available.</td>
+                    </tr>
+                    {% endfor %}
+                </form>
+            </div>
+
+            <!-- URLs per page -->
+            <div class="nav-item mt-3">
+                <strong>URLs per page</strong>
+                <div class="card-body">
+                    <!-- Individual Status Checkboxes -->
+                    {% for url_per_page in list_urls_per_page %}
+                        <div class="items-form-check">
+                            <input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
+                            <label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
+                        </div>
+                    {% empty %}
+                    <tr>
+                        <td colspan="2" class="text-center">No options available.</td>
+                    </tr>
+                    {% endfor %}
+                </div>
+            </div>
+
+
+
+        </ul>
+    </div>
+
+    <!-- Main Content Area -->
+    <div id="content" class="main-content">
+        <div class="container mt-4">
+
+            <!-- Table -->
+            <div id="item-list">
+                {% include 'item_list_partial.html' %}
+            </div>
+            <!-- Loading... -->
+            <div id="loading" class="text-center mt-3" style="display:none;">
+                <div class="spinner-border text-primary" role="status">
+                    <span class="visually-hidden">Loading...</span>
+                </div>
+            </div>
+        </div>
+    </div>
+
+</body>
+</html>
--- a/app_urls/api/templates/item_list_partial.html
+++ b/app_urls/api/templates/item_list_partial.html
@@ -0,0 +1,87 @@
+{% load custom_filters %}
+
+<div class="table-responsive">
+    <table class="table table-hover">
+        <thead>
+            <tr>
+                <th scope="col"><strong>URL</strong></th>
+                <th scope="col"><strong>Fetch date</strong></th>
+                <th scope="col"><strong>Sources</strong></th>
+                <th scope="col"><strong>Status</strong></th>
+                <th scope="col"><strong>Action</strong></th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for item in page_obj %}
+                <tr>
+                    <td><a href="{{ item.url }}/" target="_blank">{{ item.url }}</a></td>
+                    <td>{{ item.ts_fetch }}</td>
+                    <td>
+                        {% with sources_map|dict_get:item.id as sources %}
+                            {% if sources %}
+                                {% for source in sources %}
+                                    <span class="badge bg-secondary">{{ source }}</span>
+                                {% endfor %}
+                            {% else %}
+                                <span class="text-muted">No sources</span>
+                            {% endif %}
+                        {% endwith %}
+                    </td>
+                    <td>
+                        {% if item.status == 'raw' %}
+                            <span class="badge bg-secondary">{{ item.status|capfirst }}</span>
+                        {% elif item.status == 'error' %}
+                            <span class="badge bg-danger">{{ item.status|capfirst }}</span>
+                        {% elif item.status == 'valid' %}
+                            <span class="badge bg-success">{{ item.status|capfirst }}</span>
+                        {% elif item.status == 'unknown' %}
+                            <span class="badge bg-warning">{{ item.status|capfirst }}</span>
+                        {% elif item.status == 'invalid' %}
+                            <span class="badge bg-danger">{{ item.status|capfirst }}</span>
+                        {% elif item.status == 'duplicate' %}
+                            <span class="badge bg-info">{{ item.status|capfirst }}</span>
+                        {% else %}
+                            <span class="badge bg-light">Unknown</span>
+                        {% endif %}
+                    </td>
+                    <td>
+                        <a href="url/{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">Details</a>  
+                    </td>
+                    
+                </tr>
+            {% empty %}
+                <tr>
+                    <td colspan="4" class="text-center">No items available.</td>
+                </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
+
+<div class="d-flex justify-content-center mt-3">
+    <nav>
+        <ul class="pagination">
+            {% if page_obj.has_previous %}
+                <li class="page-item">
+                    <a class="page-link" href="#" data-page="1">First</a>
+                </li>
+                <li class="page-item">
+                    <a class="page-link" href="#" data-page="{{ page_obj.previous_page_number }}">Previous</a>
+                </li>
+            {% endif %}
+
+            <li class="page-item active">
+                <span class="page-link">Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}</span>
+            </li>
+
+            {% if page_obj.has_next %}
+                <li class="page-item">
+                    <a class="page-link" href="#" data-page="{{ page_obj.next_page_number }}">Next</a>
+                </li>
+                <li class="page-item">
+                    <a class="page-link" href="#" data-page="{{ page_obj.paginator.num_pages }}">Last</a>
+                </li>
+            {% endif %}
+        </ul>
+    </nav>
+</div>
--- a/app_urls/api/templates/url_detail.html
+++ b/app_urls/api/templates/url_detail.html
@@ -0,0 +1,211 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{% block title %}News{% endblock %}</title>
+
+    <!-- Bootstrap CSS -->
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+    <!-- Add jQuery from CDN (before other scripts) -->
+    <script src="https://code.jquery.com/jquery-3.6.4.min.js"></script>
+    <!-- Markdown -->
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+
+    <!-- Custom Styles -->
+    <style>
+        body {
+            background-color: #f4f4f4;
+        }
+        .navbar-dark .navbar-nav .nav-link {
+            color: rgba(255,255,255,0.75);
+        }
+        .chat-box {
+            background-color: #fff;
+            border: 1px solid #ddd;
+            padding: 15px;
+            border-radius: 8px;
+            overflow-y: auto;  /* Enable vertical scrolling */
+            max-width: 100%;
+            min-height: 150px;
+            max-height: 450px;
+            white-space: normal;
+            word-wrap: break-word;
+            word-break: break-word;
+        }
+
+    </style>
+
+</head>
+<script>
+
+    function fetchDetails(urlId, url) {
+        // Show the loading spinner
+        document.getElementById("loading-spinner").style.display = "block";
+
+        // Get the input value
+        let inputText = document.getElementById(`custom-input-${urlId}`).value;
+        // Get the input model
+        let selectedModel = document.getElementById(`options-${urlId}`).value;
+        // Check if a model is selected
+        if (!selectedModel) {
+            alert("Please select a model before fetching details.");
+            return;
+        }
+
+        // Fetch URL
+        let fetchUrl = `/news/url/${urlId}/fetch/?url=${encodeURIComponent(url)}&model=${encodeURIComponent(selectedModel)}&text=${encodeURIComponent(inputText)}`;
+
+        let resultContainer = $("#chat-output");
+        resultContainer.html(""); // Clear previous content before fetching
+
+        let fetchButton = $("button[onclick^='fetchDetails']"); // Select the button
+        fetchButton.prop("disabled", true); // Disable button
+
+
+        fetch(fetchUrl)
+            .then(response => {
+                if (!response.ok) {
+                    throw new Error("Error on network response");
+                }
+                const reader = response.body.getReader();
+                const decoder = new TextDecoder();
+                
+                
+                //////////////////////////////////////
+
+                let accumulatedText = ""; // Store streamed text before rendering Markdown
+                // Create a temporary container for streaming response
+                let messageContainer = $('<div class="chat-message"></div>');
+                //let messageContainer = $('');
+                resultContainer.append(messageContainer);
+                //////////////////////////////////////
+
+                function read() {
+                    return reader.read().then(({ done, value }) => {
+                        if (done) {
+                            //////////////////////////////////////
+                            messageContainer.html(marked.parse(accumulatedText));
+                            //////////////////////////////////////
+                            fetchButton.prop("disabled", false); // Re-enable button when done
+                            return;
+                        }
+                        
+                        //////////////////////////////////////
+                        // Decode the streamed chunk
+                        let chunk = decoder.decode(value);
+                        // Append to the accumulated text
+                        accumulatedText += chunk;
+                        // Render Markdown progressively (but safely)
+                        messageContainer.html(marked.parse(accumulatedText));
+                        //////////////////////////////////////
+                    
+                        //////////////////////////////////////
+                        // ORIGINAL:
+                        //let text = decoder.decode(value).replace(/\n/g, "<br>");
+                        //resultContainer.append(text); // Append streamed text
+                        //////////////////////////////////////
+
+                        resultContainer.scrollTop(resultContainer[0].scrollHeight); // Auto-scroll to bottom
+                        return read();
+                    });
+                }
+                return read();
+            })
+            .catch(error => {
+                resultContainer.html(`<p class="text-danger">Error fetching details: ${error.message}</p>`);
+                fetchButton.prop("disabled", false); // Re-enable button on error
+            })
+            .finally(() => {
+                // Hide the loading spinner after request is complete
+                document.getElementById("loading-spinner").style.display = "none";
+            });
+            ;
+    }
+</script>
+<body>
+
+    <!-- Main Content -->
+    <div class="container mt-4">
+        <h2>URL Details</h2>
+        <table class="table table-bordered">
+            <tr>
+                <th>URL</th>
+                <td><a href="{{ url_item.url }}" target="_blank">{{ url_item.url }}</a></td>
+            </tr>
+            <tr>
+                <th>Fetch Date</th>
+                <td>{{ url_item.ts_fetch }}</td>
+            </tr>
+            <tr>
+                <th>Sources</th>
+                <td>{{ sources|join:", " }}</td>
+            </tr>
+            <tr>
+                <th>Status</th>
+                <td>{{ url_item.status }}</td>
+            </tr>
+            <tr>
+                <th>Title</th>
+                <td>{{ url_content.title }}</td>
+            </tr>
+            <tr>
+                <th>Description</th>
+                <td>{{ url_content.description }}</td>
+            </tr>
+            <tr>
+                <th>Content</th>
+                <td>{{ url_content.content }}</td>
+            </tr>
+            <tr>
+                <th>Tags</th>
+                <td>{{ url_content.tags }}</td>
+            </tr>
+            <tr>
+                <th>Authors</th>
+                <td>{{ url_content.authors }}</td>
+            </tr>
+            <tr>
+                <th>Image URLs</th>
+                <td>{{ url_content.image_urls }}</td>
+            </tr>
+        </table>
+
+        <!-- Independent form for optional values -->
+        <form onsubmit="fetchDetailsWithSelection(event, {{ url_item.id }}, '{{ url_item.url }}')">
+            <label for="options-{{ url_item.id }}">Model:</label>
+            <select id="options-{{ url_item.id }}" class="form-control mb-2">
+                <!-- <option value="">-- Select an option --</option> -->
+                {% for model in models %}
+                    <option value="{{ model }}">{{ model }}</option>
+                {% endfor %}
+            </select>
+        </form>
+        
+        <!-- Input field with a default value -->        
+        <label for="custom-input-{{ url_item.id }}">Prompt:</label>
+        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="3">{{ prompt }} {{ url_item.url }}</textarea>
+
+        <!-- Fetch details button -->
+        <button class="btn btn-primary" onclick="fetchDetails({{ url_item.id }}, '{{ url_item.url }}')">
+            Fetch Details
+        </button>
+
+        <!-- Chatbot-style response box -->
+        <div class="chat-box mt-3 p-3 border rounded">
+            <div id="chat-output"></div>
+        </div>
+
+        <!-- Loading Spinner (Hidden by Default) -->
+        <div id="loading-spinner" class="spinner-border text-primary mt-3" role="status" style="display: none;">
+            <span class="visually-hidden">Loading...</span>
+        </div>
+        
+    </div>
+
+    <!-- Bootstrap JS -->
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+
+    {% block extra_js %}{% endblock %}
+</body>
+</html>
--- a/app_urls/api/templatetags/init.py
+++ b/app_urls/api/templatetags/init.py
--- a/app_urls/api/templatetags/custom_filters.py
+++ b/app_urls/api/templatetags/custom_filters.py
@@ -0,0 +1,8 @@
+from django import template
+
+register = template.Library()
+
+@register.filter
+def dict_get(dictionary, key):
+    """Custom filter to get a value from a dictionary in Django templates."""
+    return dictionary.get(key, [])
--- a/app_urls/api/urls.py
+++ b/app_urls/api/urls.py
@@ -1,7 +1,10 @@
 from django.urls import path
-from .views import trigger_task, link_list
+from . import views

 urlpatterns = [
-    path('links', link_list, name='link_list'),
-    path('<str:task>', trigger_task, name='trigger_task'),
+    path('', views.link_list, name='link_list'),
+    path('url/', views.news, name='url_detail'),
+    path('url/<int:id>/', views.url_detail_view, name='url_detail'),
+    path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
+    path('task/<str:task>', views.trigger_task, name='trigger_task'),
 ]
--- a/app_urls/api/views.py
+++ b/app_urls/api/views.py
@@ -1,30 +1,130 @@
-import django_rq
-from django.http import JsonResponse
+# import django_rq
 from .tasks import background_task
+from django.http import JsonResponse
 import os
-from .src.logger import get_logger
-logger = get_logger()
-
-# TODO: Queues with priorities, process_raw_urls, process_error_urls least priority due to slowdown logic

 def trigger_task(request, task):
-    """View that enqueues a task."""
+    # View that enqueues a task

-    """
-    if ("fetch_" in task):
-        priority = "low"
-        job_timeout="30m"
-    elif ("process_" in task):
-        priority = "medium"
-        job_timeout="30m"
-    """
+    # Enqueue function in "default" queue
+    background_task.delay(task)  
+    return JsonResponse({"message": "Task has been enqueued!", "task": task})

-    queue = django_rq.get_queue('default')  # Get the default queue
-    job = queue.enqueue(background_task, task, job_timeout="30m")
-    return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})
+    # queue = django_rq.get_queue('default')  # Get the default queue
+    # job = queue.enqueue(background_task, task, job_timeout="30m")
+    # return JsonResponse({"message": "Task has been enqueued!", "job_id": job.id})

 def link_list(request):
-    prefix = "http://localhost:8000/api"
+    prefix = "http://localhost:8000/api/task"
    links = ["fetch_feeds", "fetch_parser", "fetch_search", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_500000"]
+
    db_links = ["http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500"]
-    return JsonResponse({"links": db_links + [os.path.join(prefix, l) for l in links]})
+    return JsonResponse({"links": ["http://localhost:8000/api/url"] + db_links + [os.path.join(prefix, l) for l in links]})
+
+
+from django.http import StreamingHttpResponse, HttpResponse, JsonResponse
+from django.shortcuts import render, get_object_or_404
+from django.core.paginator import Paginator
+import requests
+from django.http import StreamingHttpResponse
+import json
+import time
+import ollama
+
+from .models import Urls, Source, Search, UrlsSourceSearch, UrlContent
+
+# Create your views here.
+def news(request):
+    # URLs
+    urls = Urls.objects.all()
+    # Sources
+    sources = Source.objects.all()
+    seaerches = Search.objects.all()
+
+    # Parameters
+    page_number = request.GET.get("page", 1)
+    num_items = request.GET.get("items", 15)
+    source_ids = request.GET.get("sources", ','.join([str(s.id) for s in sources]))
+    status_filters = request.GET.get("status", None)
+
+    # Filters
+    if (status_filters) and (status_filters != "all"):
+        urls = urls.filter(status__in=status_filters.split(","))
+    if (source_ids) and (source_ids != "all"):
+        # TODO: Distinct needed?
+        # urls = urls.filter(urlssource__id_source__in=source_ids.split(",")).distinct()
+        pass
+
+    # Pagination
+    paginator = Paginator(urls, num_items)
+    page_obj = paginator.get_page(page_number)
+
+    # Map URL IDs to their sources, only for subset of URLs (page of interest)
+    sources_map= {}
+    """
+    sources_map = {
+        url.id: list(Source.objects.filter(urlssource__id_url=url).values_list('source', flat=True))
+        for url in page_obj.object_list
+    }
+    """
+
+    context = {
+        "page_obj": page_obj,
+        "sources": sources,
+        "sources_map": sources_map,
+        "list_status": Urls.STATUS_ENUM.values,
+        "list_urls_per_page": [15, 50, 100],
+    }
+    
+    # If request is AJAX, return JSON response
+    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
+        return JsonResponse({'items_html': render(request, 'item_list_partial.html', context).content.decode('utf-8')})
+
+    return render(request, "item_list.html", context)
+
+
+def url_detail_view(request, id):
+    url_item = get_object_or_404(Urls, id=id)
+    url_sources = list(Source.objects.filter(urlssource__id_url=url_item).values_list('source', flat=True))
+    try:
+        url_content = UrlContent.objects.get(pk=id)
+    except UrlContent.DoesNotExist:
+        url_content = {}
+    
+    # TODO: https://github.com/ollama/ollama-python?tab=readme-ov-file#async-client
+    # LLM models available
+    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
+    models = sorted([m.model for m in client.list().models])
+    # default_model = "llama3.2:3b"
+
+    context = {
+        'url_item': url_item,
+        'sources': url_sources,
+        'models': models,
+        #'default_model': default_model,
+        'prompt': "Provide in one paragraph the what, why, when, where, who, and how of the content below. Also provide a one paragraph summary of the content:",
+        #"prompt": "Image you are a journalist, TLDR in a paragraph:",
+        #"prompt": "Below you will find the whole content of a news article:\n{}\nProvide a concise summary of one paragraph maximum of the content.".format(content)
+        'url_content': url_content,
+    }
+    return render(request, 'url_detail.html', context)
+
+def fetch_details(request, id):
+    url_item = get_object_or_404(Urls, id=id)
+    url_param = request.GET.get("url", "")  # Get URL
+    model = request.GET.get("model", "")  # Get LLM model
+    text = request.GET.get("text", "")  # Get LLM prompt
+
+    # LLM
+    client = ollama.Client(host = 'https://ollamamodel.matitos.org')
+
+    def stream_response():
+        msg_content = {
+            "role": "user", 
+            "content": text,
+        }
+        response = client.chat(model=model, messages=[msg_content], stream=True)
+        for chunk in response:
+            yield chunk["message"]["content"]  # Stream each chunk of text
+    
+    return StreamingHttpResponse(stream_response(), content_type="text/plain")
--- a/app_urls/core/settings.py
+++ b/app_urls/core/settings.py
@@ -38,7 +38,8 @@ INSTALLED_APPS = [
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
-    'django_rq',
+    # 'django_rq',
+    'scheduler',
    'api',
 ]

@@ -92,7 +93,6 @@ DATABASES = {

 CACHES = {
    "default": {
-        #"BACKEND": "django.core.cache.backends.redis.RedisCache",
        "BACKEND": "django_redis.cache.RedisCache",
        "LOCATION": "redis://{}:{}".format(
            os.environ.get("REDIS_HOST", "localhost"), 
@@ -105,15 +105,36 @@ CACHES = {
    }
 }

+'''
 RQ_QUEUES = {
    'default': {
        'HOST': os.environ.get("REDIS_HOST", "localhost"),
        'PORT': os.environ.get("REDIS_PORT", 6379),
        'DB': os.environ.get("REDIS_DB", 0),
        'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 900),
-        'DEFAULT_RESULT_TTL': os.environ.get("RQ_DEFAULT_RESULT_TTL", 3600),
+        # 'DEFAULT_RESULT_TTL': os.environ.get("RQ_DEFAULT_RESULT_TTL", 3600),
    }
 }
+'''
+
+# https://django-tasks-scheduler.readthedocs.io/en/latest/configuration/
+SCHEDULER_QUEUES = {
+    'default': {
+        'HOST': os.environ.get("REDIS_HOST", "localhost"),
+        'PORT': os.environ.get("REDIS_PORT", 6379),
+        'DB': os.environ.get("REDIS_DB", 0),
+        'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15),
+        #'USERNAME': 'some-user',
+        #'PASSWORD': 'some-password',
+        #'DEFAULT_TIMEOUT': 360,
+  }
+}
+SCHEDULER_CONFIG = {
+    'EXECUTIONS_IN_PAGE': 20,
+    'DEFAULT_RESULT_TTL': 60*60*12, # 12 hours
+    'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15),  # 15 minutes
+    'SCHEDULER_INTERVAL': 10,  # 10 seconds
+}

 # Password validation
 # https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
--- a/app_urls/core/urls.py
+++ b/app_urls/core/urls.py
@@ -19,5 +19,7 @@ from django.urls import path, include

 urlpatterns = [
    path('admin/', admin.site.urls),
-    path('api/', include('api.urls'))
+    path('api/', include('api.urls')),
+    #path('scheduler/', include('django_rq.urls')),
+    path('scheduler/', include('scheduler.urls')),
 ]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -41,6 +41,17 @@ services:
    ports:
      - 8080:8080

+  matitos_dozzle:
+    container_name: dozzle
+    image: amir20/dozzle:latest
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+    ports:
+      - 8888:8080
+    environment:
+      - DOZZLE_FILTER="name=matitos_" # Need container name matitos_ ?
+
+
 # django:
 # Env: DB_HOST=matitos_db
 # DJANGO_DB_NAME=${DB_DATABASE_NAME:-matitos}