Notify status bot token

Notify status schedule
Notifier fix
2025-10-16 10:38:32 +02:00 · 2025-10-16 10:12:17 +02:00 · 2025-10-14 13:10:54 +02:00 · 2025-10-14 12:23:05 +02:00 · 2025-10-14 11:36:19 +02:00 · 2025-10-14 11:33:17 +02:00
59 changed files with 3167 additions and 924 deletions
--- a/.env.sample
+++ b/.env.sample
@@ -1,3 +1,7 @@
+# AutoSSH DB
+REMOTE_HOST=''
+REMOTE_USERNAME=''
+
 # Initialization
 INITIALIZE_DB=true
 DJANGO_SUPERUSER_USERNAME=matitos
@@ -18,13 +22,12 @@ PATH_LOGS_DIRECTORY=/opt/logs
 DB_NAME=matitos
 DB_PASSWORD=supermatitos
 DB_USER=supermatitos
-PATH_DB_DATA=.
-
-# Database: Django
 DB_HOST=fetcher_db
 DB_PORT=5432
-REDIS_HOST=fetcher_redis
-REDIS_PORT=6379
+REDIS_CACHE_HOST=fetcher_redis_cache
+REDIS_CACHE_PORT=6379
+REDIS_CELERY_HOST=fetcher_redis_celery
+REDIS_CELERY_PORT=6379

 # Job timeout: 30 min
 JOB_DEFAULT_TIMEOUT=1800
@@ -40,18 +43,23 @@ FETCHER_ERROR_URL_CACHE_TIME=172800

 # Selenium
 SELENIUM_ENDPOINT=http://fetcher_app_selenium:80
-ENDPOINT_OLLAMA=https://ollamamodel.matitos.org
-
-# APP: Selenium
 ARCH=amd64 # arm64, amd64
 SELENIUM_SLEEP_PER_PAGE=4
-PATH_LOGS_DIRECTORY=/opt/logs

 # Deploy resources per App
 DEPLOY_CPUS=2
-DEPLOY_RAM=4G
+DEPLOY_RAM=3G

 # Ghost
 GHOST_ADMIN_API_URL=https://news.matitos.org/ghost/api/admin/
-GHOST_ADMIN_API_KEY=67fffe1b8a57a80001ecec5b:59f580020c196f92e05e208d288702082f8edad6366e2b2c8940b54e41cc355a
+GHOST_ADMIN_API_KEY=
 PEXELS_API_KEY=Y6clJkY32eihf34ukX4JsINYu9lzxh3xDdNq2HMAmGwXp0a0tt6vr6S9
+# Ollama
+ENDPOINT_OLLAMA=https://ollamamodelnpu.matitos.org
+OLLAMA_MODEL_DEFAULT=qwen2.5-instruct:3b
+
+# Telegram
+TELEGRAM_INFO_BOT_TOKEN="..."
+TELEGRAM_INFO_CHAT_ID="..."
+TELEGRAM_WARNING_BOT_TOKEN="..."
+TELEGRAM_WARNING_CHAT_ID="..."
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,11 @@
+.env
 __pycache__/
 *.pyc 
 **/credentials.py
 logs/
 postgres/
-docker_data/
+docker_data/
+**/*.pt
+**/*.pth
+**/*.tar
+**/*.onnx
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@
        - TODO: Proxy / VPN?
            - TooManyRequests, ...
        - TODO: Search per locale (nl-NL, fr-FR, en-GB)
+    - Fetch keyword search for selenium sources
+

 - URLs Processing -> Updates raw URLs
    - Extracts title, description, content, image and video URLs, main image URL, language, keywords, authors, tags, published date
@@ -52,6 +54,10 @@
 * Dev mode
 ```
 docker compose -f docker-compose-dev.yml down -v
-docker compose -f docker-compose-dev.yml build --progress=plain
-docker compose -f docker-compose-dev.yml up
+docker compose -f docker-compose-dev.yml up --no-deps --build
 ```
+* Prod mode
+```
+docker compose down -v
+docker compose up -d --no-deps --build
+```
--- a/app_cv/Demo.ipynb
+++ b/app_cv/Demo.ipynb
@@ -0,0 +1,157 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import base64\n",
+    "import json\n",
+    "import requests\n",
+    "import io\n",
+    "import numpy as np\n",
+    "import PIL.Image\n",
+    "import cv2\n",
+    "from pprint import pprint\n",
+    "\n",
+    "def process_image(path_img):\n",
+    "    with open(path_img, \"rb\") as image_file:\n",
+    "        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')\n",
+    "    response = requests.post(\n",
+    "        'http://localhost:5000/process',\n",
+    "        headers={'Content-Type': 'application/json'},\n",
+    "        data=json.dumps({'image': encoded_string})\n",
+    "    )\n",
+    "    response_dict = response.json()\n",
+    "    pprint(response_dict)\n",
+    "    # Decode\n",
+    "    image_bytes = base64.b64decode(response_dict.get(\"image_b64\"))\n",
+    "    img_array = np.frombuffer(io.BytesIO(image_bytes).getvalue(), dtype=np.uint8)\n",
+    "    img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)\n",
+    "    img_rgb = img_bgr[:, :, ::-1]\n",
+    "    return img_rgb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_img = \"imgs/img_1p.jpg\"\n",
+    "PIL.Image.fromarray( process_image(path_img) )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_img = \"imgs/img_nude.jpg\"\n",
+    "PIL.Image.fromarray( process_image(path_img) )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'''\n",
+    "# !git clone https://github.com/wildchlamydia/mivolo\n",
+    "# !pip install ultralytics yt_dlp pandas scipy timm==0.8.13.dev0\n",
+    "# !pip install ./mivolo\n",
+    "\n",
+    "!python mivolo/demo.py \\\n",
+    "    --input \"face_data/sample_image.jpg\" \\\n",
+    "    --output \"output\" \\\n",
+    "    --detector-weights \"mivolo/pretrained/yolov8x_person_face.pt\" \\\n",
+    "    --checkpoint \"mivolo/pretrained/model_imdb_cross_person_4.22_99.46.pth.tar\" \\\n",
+    "    --device \"cpu\" \\\n",
+    "    --draw\n",
+    "'''\n",
+    "\n",
+    "'''\n",
+    "# !git clone https://github.com/Kartik-3004/facexformer.git\n",
+    "# !pip install huggingface_hub torch torchvision torchaudio opencv-python facenet_pytorch\n",
+    "from huggingface_hub import hf_hub_download\n",
+    "hf_hub_download(repo_id=\"kartiknarayan/facexformer\", filename=\"ckpts/model.pt\", local_dir=\"./facexformer\")\n",
+    "\n",
+    "!python facexformer/inference.py \\\n",
+    "    --model_path facexformer/ckpts/model.pt \\\n",
+    "    --image_path face_data/sample_image.jpg \\\n",
+    "    --results_path face_data \\\n",
+    "    --task parsing\n",
+    "    x\n",
+    "!python facexformer/inference.py \\\n",
+    "    --model_path facexformer/ckpts/model.pt \\\n",
+    "    --image_path face_data/face.png \\\n",
+    "    --results_path face_data \\\n",
+    "    --task landmarks\n",
+    "\n",
+    "!python facexformer/inference.py \\\n",
+    "    --model_path facexformer/ckpts/model.pt \\\n",
+    "    --image_path face_data/face.png \\\n",
+    "    --results_path face_data \\\n",
+    "    --task headpose\n",
+    "\n",
+    "!python facexformer/inference.py \\\n",
+    "    --model_path facexformer/ckpts/model.pt \\\n",
+    "    --image_path face_data/face.png \\\n",
+    "    --results_path face_data \\\n",
+    "    --task attributes\n",
+    "\n",
+    "!python facexformer/inference.py \\\n",
+    "    --model_path facexformer/ckpts/model.pt \\\n",
+    "    --image_path face_data/face.png \\\n",
+    "    --results_path face_data \\\n",
+    "    --task age_gender_race\n",
+    "'''"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "matitos_cv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/app_cv/Dockerfile
+++ b/app_cv/Dockerfile
@@ -0,0 +1,27 @@
+FROM python:3.12
+
+WORKDIR /app
+
+# LibGL for OpenCV
+RUN apt-get update && apt-get install libgl1 -y
+
+# Download models
+RUN mkdir models
+
+# https://github.com/wildchlamydia/mivolo
+RUN curl "https://drive.usercontent.google.com/download?id=11i8pKctxz3wVkDBlWKvhYIh7kpVFXSZ4&confirm=xxx" -o models/model_imdb_cross_person_4.22_99.46.pth.tar
+RUN curl "https://drive.usercontent.google.com/download?id=1CGNCkZQNj5WkP3rLpENWAOgrBQkUWRdw&confirm=xxx" -o models/yolov8x_person_face.pt
+
+# https://github.com/notAI-tech/NudeNet
+# Upload to an accessible link: https://github.com/notAI-tech/NudeNet/releases/download/v3.4-weights/640m.onnx
+RUN curl "https://drive.usercontent.google.com/download?id=1lHTrW1rmYoYnMSUlhLwqFCW61-w2hvKX&confirm=xxx" -o models/640m.onnx
+
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip freeze
+
+# CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "2", "app:app"]
+CMD ["uvicorn", "--host", "0.0.0.0", "--port", "5000", "--workers", "1", "--log-level", "info", "app:app"]
+
+# docker build -t fetcher_cv .
+# docker run --rm -p 5000:5000 fetcher_cv
--- a/app_cv/Server.ipynb
+++ b/app_cv/Server.ipynb
@@ -0,0 +1,36 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uvicorn app:app --workers 1 --log-level info --port 5001\n",
+    "#!uvicorn app:app --reload --log-level debug --port 8000\n",
+    "#!python app.py"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "matitos_cv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/app_cv/app.py
+++ b/app_cv/app.py
@@ -0,0 +1,76 @@
+from fastapi import FastAPI
+from nicegui import ui, events, run
+import base64
+import io
+import numpy as np
+import cv2
+import traceback
+import logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[logging.StreamHandler()])
+from cv_processor import process
+
+from pydantic import BaseModel
+
+class Item(BaseModel):
+    image: str # Base64
+
+app = FastAPI()
+
+# Define the NiceGUI UI components
+@ui.page("/")
+def main_page():
+    async def handle_upload(e: events.UploadEventArguments) -> None:
+        ui.notify('Processing...')
+        # Read content -> image
+        nparr = np.frombuffer(e.content.read(), np.uint8)
+        img_np_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        # Async process
+        results = await run.io_bound(process, img_np_bgr)
+
+        # Display
+        with ui.dialog() as dialog:
+            # Encode
+            retval, buffer = cv2.imencode('.png', results.get("image"))
+            img_buffer_encoded = base64.b64encode(buffer).decode('utf-8')
+            img_encoded = "data:image/png;base64,{}".format(img_buffer_encoded)
+            content = ui.image(img_encoded).props('fit=scale-down')
+        dialog.open()
+
+    ui.upload(on_upload=handle_upload, auto_upload=True, on_rejected=lambda: ui.notify('Rejected!')).props('accept=image').classes('max-w-full')
+
+ui.run_with(app, title="CV")
+
+@app.post('/process')
+def process_image(item: Item):
+    logging.info("POST /process")
+    try:
+        image_data = item.image
+        if (image_data is None):
+            return {"error": "No image data provided"}
+
+        # Decode base64 string
+        image_bytes = base64.b64decode(image_data)
+        image_stream = io.BytesIO(image_bytes)
+        # Convert bytes to NumPy array
+        img_array = np.frombuffer(image_stream.getvalue(), dtype=np.uint8)
+        # Decode image using OpenCV
+        img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+        # Valid image
+        assert(img_bgr is not None)
+
+        # Process the image
+        results = process(img_bgr)
+
+        # Encode processed image to base64
+        _, buffer = cv2.imencode('.jpg', results.get("image"), [cv2.IMWRITE_JPEG_QUALITY, 100])
+        processed_image_base64 = base64.b64encode(buffer).decode('utf-8')
+
+        # Update image with base64 encoded
+        results["image_b64"] = processed_image_base64
+        # Pop image (not serializable)
+        results.pop("image")
+        return results
+    
+    except Exception as e:
+        logging.warning("Exception: {}".format(traceback.format_exc()))
+        return {"error": traceback.format_exc()}
--- a/app_cv/cv_processor.py
+++ b/app_cv/cv_processor.py
@@ -0,0 +1,125 @@
+import cv2
+import numpy as np
+import logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[logging.StreamHandler()])
+
+# Age
+from mivolo.predictor import Predictor
+import argparse
+# Nudity
+from nudenet import NudeDetector
+
+
+class CV():
+    def __init__(self):
+        args = argparse.ArgumentParser()
+        args.add_argument("--device", type=str, default="cpu")
+        args.add_argument("--checkpoint", default="models/model_imdb_cross_person_4.22_99.46.pth.tar")
+        args.add_argument("--detector_weights", default="models/yolov8x_person_face.pt")
+        args.add_argument("--with-persons", action="store_true", default=False, help="If set model will run with persons, if available")
+        args.add_argument("--disable-faces", action="store_true", default=False, help="If set model will use only persons if available")
+        args.add_argument("--draw", action="store_true", default=False, help="If set, resulted images will be drawn")
+        args = args.parse_args([])
+        # Initialize
+        self.predictor_age = Predictor(args)
+
+        # Initialize
+        self.nude_detector = NudeDetector(model_path="models/640m.onnx", inference_resolution=640)
+        # detector = NudeDetector(model_path="downloaded_640m.onnx path", inference_resolution=640)
+        # https://github.com/notAI-tech/NudeNet?tab=readme-ov-file#available-models
+
+        # All labels list
+        self.nudity_all_labels = [
+            "FEMALE_GENITALIA_COVERED",
+            "FACE_FEMALE",
+            "BUTTOCKS_EXPOSED",
+            "FEMALE_BREAST_EXPOSED",
+            "FEMALE_GENITALIA_EXPOSED",
+            "MALE_BREAST_EXPOSED",
+            "ANUS_EXPOSED",
+            "FEET_EXPOSED",
+            "BELLY_COVERED",
+            "FEET_COVERED",
+            "ARMPITS_COVERED",
+            "ARMPITS_EXPOSED",
+            "FACE_MALE",
+            "BELLY_EXPOSED",
+            "MALE_GENITALIA_EXPOSED",
+            "ANUS_COVERED",
+            "FEMALE_BREAST_COVERED",
+            "BUTTOCKS_COVERED",
+        ]
+        # Classes of interest
+        self.nudity_classes_of_interest = ["BUTTOCKS_EXPOSED", "FEMALE_BREAST_EXPOSED", "FEMALE_GENITALIA_EXPOSED", "ANUS_EXPOSED", "MALE_GENITALIA_EXPOSED"]
+
+    def _censor(self, image_bgr, detections):
+        # Copy original image
+        image_bgr_censored = image_bgr.copy()
+
+        for detection in detections:
+            box = detection["box"]
+            x, y, w, h = box[0], box[1], box[2], box[3]
+            # Change these pixels to pure black
+            image_bgr_censored[y : y + h, x : x + w] = (0, 0, 0)
+
+        return image_bgr_censored
+
+    def process_image(self, image_bgr):
+        ###################################################################
+        # Predict
+        detected_objects, out_img = self.predictor_age.recognize(image_bgr)
+        logging.debug("#persons: {}, #faces: {}".format(detected_objects.n_persons, detected_objects.n_faces))
+
+        # Num faces and persons detected
+        detected_objects.n_faces, detected_objects.n_persons
+        # Association
+        detected_objects.associate_faces_with_persons()
+
+        # detected_objects.face_to_person_map
+        # {2: 1, 3: 0}
+        # detected_objects.ages
+        # [None, None, 27.18, 23.77]
+        age_predictions = [e for e in detected_objects.ages if e is not None]
+
+        # Crops of faces & persons
+        # crops = detected_objects.collect_crops(img)
+        any_minor_present = any([ a < 18 for a in detected_objects.ages if a is not None ])
+        ###################################################################
+
+        ###################################################################
+        # Predict
+        nude_detections = self.nude_detector.detect(image_bgr)
+        logging.debug("Nude detections: {}".format(nude_detections))
+        # Filter by classes of interest
+        nude_detections = [ detection for detection in nude_detections if detection["class"] in self.nudity_classes_of_interest ]
+        # Nude detections present?
+        any_nude_detection = len(nude_detections) > 0
+        ###################################################################
+
+        ###################################################################
+        # Censor image
+        censored_img_bgr = self._censor(image_bgr, nude_detections)
+        # Plot age predictions on censored image
+        output_image = detected_objects.plot(img=censored_img_bgr)
+        ###################################################################
+
+        results = {
+            "any_minor_present": any_minor_present,
+            "any_nude_detection": any_nude_detection,
+            "nudity_detections": nude_detections,
+            "age_predictions": age_predictions,
+            "image": output_image,
+        }
+        return results
+
+def process(img_bgr):
+    try:
+        logging.info("Processing image")
+        # Process
+        results = CV().process_image(img_bgr)
+        logging.info("Returning results")
+        return results
+        
+    except Exception as e:
+        logging.warning("Error processing image: {}".format(str(e)))
+        return {}
--- a/app_cv/docker-compose.yml
+++ b/app_cv/docker-compose.yml
@@ -0,0 +1,23 @@
+services:
+  matitos_cv:
+    build:
+      context: .
+    image: fetcher_app_cv
+    container_name: fetcher_app_cv
+    restart: unless-stopped
+    ports:
+      - 5000
+    environment:
+      - DEBUG_MODE=0
+    labels:  # Reverse proxy sample
+      - "traefik.enable=true"
+      - "traefik.http.routers.cv.rule=Host(`cv.matitos.org`)"
+      - "traefik.http.routers.cv.entrypoints=websecure"
+      - "traefik.http.routers.cv.tls.certresolver=myresolvercd"
+      - "traefik.http.services.cv.loadbalancer.server.port=5000"
+    networks:
+      - docker_default  # Reverse proxy network
+
+networks:
+  docker_default:
+    external: true
--- a/app_cv/imgs/img_1p.jpg
+++ b/app_cv/imgs/img_1p.jpg
--- a/app_cv/imgs/img_nude.jpg
+++ b/app_cv/imgs/img_nude.jpg
--- a/app_cv/requirements.txt
+++ b/app_cv/requirements.txt
@@ -0,0 +1,7 @@
+opencv-python
+git+https://github.com/wildchlamydia/mivolo.git
+nudenet>=3.4.2
+torch==2.5
+nicegui
+fastapi
+gunicorn
--- a/app_cv_face/ABC.ipynb
+++ b/app_cv_face/ABC.ipynb
@@ -0,0 +1,55 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Warning: Binary output can mess up your terminal. Use \"--output -\" to tell \n",
+      "Warning: curl to output it to your terminal anyway, or consider \"--output \n",
+      "Warning: <FILE>\" to save to a file.\n"
+     ]
+    }
+   ],
+   "source": [
+    "!curl https://api.missingkids.org/photographs/NCMC2049364c1.jpg"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install deepface\n",
+    "# !pip install tf-keras\n",
+    "from deepface import DeepFace"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "matitos_cv_face",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/app_selenium/Dockerfile
+++ b/app_selenium/Dockerfile
@@ -49,7 +49,7 @@ RUN if [ "${ARCH}" = "amd64" ] ; then \
 && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false $toolDeps \
 && rm -rf /var/lib/apt/lists/* /tmp/*

-RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]"
+RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]" psutil

 WORKDIR /opt/app
 COPY . /opt/app/
--- a/app_selenium/app.py
+++ b/app_selenium/app.py
@@ -1,5 +1,7 @@
 from fastapi import FastAPI
+from pydantic import BaseModel
 from missing_kids import MissingKidsFetcher
+from search import SearchFetcher
 from logger import get_logger
 logger = get_logger()

@@ -8,7 +10,44 @@ app = FastAPI()
@app.get("/get_missing_kids/")
 def get_missing_kids(pages: int = -1):
    try:
+        logger.info("Get missing kids, #pages={}".format(pages))
        res = {"list_urls": MissingKidsFetcher().get_missing_kids_urls(first_n_pages=pages)}
    except Exception as e:
+        logger.warning("Exception: {}".format(str(e)), exc_info=True)
        res = {}
    return res
+
+class BodyVerifyMissingKid(BaseModel):
+    url: str
+
+@app.post("/verify_missing_kid/")
+def get_missing_kids(data: BodyVerifyMissingKid):
+    try:
+        logger.info("Verify missing kid, URL={}".format(data.url))
+        res = MissingKidsFetcher().verify_missing_kid_url(data.url)
+    except Exception as e:
+        logger.warning("Exception: {}".format(str(e)), exc_info=True)
+        res = {}
+    return res
+
+class BodyFetchSearch(BaseModel):
+    search: str
+
+@app.post("/fetch_search/")
+def fetch_search(data: BodyFetchSearch):
+    try:
+        # Initialize
+        search_fetcher, results = SearchFetcher(), {}
+        # Iterate
+        for source in search_fetcher.get_available_sources():
+            logger.info("Fetch based search source={} search={}".format(source, data.search))
+            # Fetch
+            results[source] = SearchFetcher().search(source, data.search)
+            # Empty?
+            if (len(results[source]) == 0):
+                results.pop(source)
+
+    except Exception as e:
+        logger.warning("Exception: {}".format(str(e)), exc_info=True)
+        results = {}
+    return results
--- a/app_selenium/logger.py
+++ b/app_selenium/logger.py
@@ -8,10 +8,10 @@ logs_directory = os.getenv("PATH_LOGS_DIRECTORY", "logs")
 os.makedirs(logs_directory, exist_ok=True)

 logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')
-logger = logging.getLogger("app_selenium")
-logger.setLevel(logging.DEBUG)
+logger = logging.getLogger("selenium")
+logger.setLevel(logging.INFO)

-# To file log: INFO / WARNING / ERROR / CRITICAL
+# To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL
 fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1)
 fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
 fh.setLevel(logging.DEBUG)
--- a/app_selenium/missing_kids.py
+++ b/app_selenium/missing_kids.py
@@ -1,27 +1,85 @@
-from selenium import webdriver
+from utils import get_webdriver, kill_process_tree
 from selenium.webdriver.common.by import By
-from selenium.webdriver.firefox.options import Options 
-from selenium.webdriver.firefox.service import Service
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException
 import time
 import os

 from logger import get_logger
 logger = get_logger()

-def get_webdriver():
-    options = Options()
-    options.add_argument('--headless')  # Optional
-    options.binary_location = '/opt/firefox/firefox'
-
-    service = Service('/usr/local/bin/geckodriver')
-
-    driver = webdriver.Firefox(options=options, service=service)
-    return driver
-
 class MissingKidsFetcher():
    def __init__(self) -> None:
        pass

+    def verify_missing_kid_url(self, url):
+        def load_finished(driver):
+            # Find all <img> tags with src attributes. Extract src URLs
+            image_urls = [img.get_attribute("src") for img in driver.find_elements(By.XPATH, "//img[@src]")]
+            # If base64 image exists, loading finished
+            finished = any(["data:image/png;base64" in i for i in image_urls])
+            # logger.debug("Finished loading URL")
+            return finished
+
+        try:
+            # Initialize
+            logger.debug("Initializing driver")
+            driver, service = get_webdriver()
+            # Load URL
+            logger.debug("Get URL: {}".format(url))
+            driver.get(url)
+            # Wait for 404?
+            try:
+                WebDriverWait(driver, 2).until(EC.title_contains("404"))
+                logger.debug("WebDriverWait -> title contains 404")
+            except TimeoutException:
+                logger.debug("WebDriverWait timeout, no 404 appeared")
+
+            if ("404" in driver.title):
+                # Status invalid
+                results = {"status": "invalid"}
+            else:
+                # Check until finished loading
+                num_checks = 10
+                while (not load_finished(driver)) and (num_checks>=0):
+                    time.sleep(1)
+                    num_checks -= 1
+
+                # Find all <img> tags with src attributes. Extract src URLs
+                image_urls = [img.get_attribute("src") for img in driver.find_elements(By.XPATH, "//img[@src]")]
+
+                # Redirects to 404?
+                if ("missingkids.org/404" in driver.current_url) or (any(["thumb-404.png" in i for i in image_urls])):
+                    # Status invalid
+                    results = {"status": "invalid"}
+                # Redirection to valid URL? -> Duplicate
+                elif (driver.current_url != url):
+                    # Redirection (duplicate)
+                    results = {"status": "duplicate", "redirection": driver.current_url}
+                # Valid
+                elif ("Have you seen this child?" in driver.title):
+                    # Status valid
+                    results = {"status": "valid"}
+                else:
+                    results = {"status": "unknown"}
+        except Exception as e:
+            logger.warning("Exception while verifying MissingKid URL {}\n{}".format(url, str(e)), exc_info=True)
+            results = {}
+
+        # Release memory
+        try:
+            driver.quit() #driver.close()
+            time.sleep(1)
+            # import atexit
+            # atexit.register(driver.quit)  # Will always be called on exit
+        except Exception as e:
+            logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True)
+        kill_process_tree(service.process.pid)
+        logger.info("Results: {} for URL: {}".format(str(results), url))
+        return results
+        
+
    def get_missing_kids_urls(self, first_n_pages=-1):
        logger.info("Get MissingKids, #pages: {}".format(first_n_pages))
        # Poster URL
@@ -30,7 +88,9 @@ class MissingKidsFetcher():
        set_urls = set()

        try:
-            driver = get_webdriver()
+            logger.debug("Initializing driver")
+            driver, service = get_webdriver()
+            logger.debug("Get URL: {}".format(url))
            # Go to URL
            driver.get(url)
            # Iterate
@@ -88,8 +148,12 @@ class MissingKidsFetcher():

        # Release memory
        try:
-            driver.close()
+            driver.quit() #driver.close()
+            time.sleep(1)
+            # import atexit
+            # atexit.register(driver.quit)  # Will always be called on exit
        except Exception as e:
-            logger.warning("Exception while closing driver: {}".format(str(e)), exc_info=True)
+            logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True)
+        kill_process_tree(service.process.pid)

        return set_urls
--- a/app_selenium/search.py
+++ b/app_selenium/search.py
@@ -0,0 +1,115 @@
+from utils import get_webdriver, kill_process_tree
+from selenium.webdriver.common.by import By
+from urllib.parse import quote
+import time
+from logger import get_logger
+logger = get_logger()
+
+class SearchFetcher():
+    def __init__(self):
+        pass
+
+    def get_available_sources(self, ):
+        return ["foxnews", "breitbart", "zerohedge"]
+
+    def search(self, source, search="child abuse"):
+        try:
+            if (source == "foxnews"):
+                return self._search_foxnews(search)
+            elif (source == "breitbart"):
+                return self._search_breitbart(search)
+            elif (source == "zerohedge"):
+                return self._search_zerohedge(search)
+            else:
+                logger.warning("Search not implemented for source={} search={}".format(source, search))
+                return []
+        except Exception as e:
+            logger.warning("Error searching for source={} search={}".format(source, search))
+            return []
+
+    def _search_foxnews(self, search):
+        url_host = "foxnews.com"
+        # URL search
+        url_unquoted = "https://www.foxnews.com/search-results/search#q={}".format(search)
+        url = quote(url_unquoted, safe=":/?=&#")
+
+        # Initialize
+        driver, service = get_webdriver()
+        # Load URL
+        driver.get(url)
+        time.sleep(2)
+
+        # Find the element with class "page"
+        page_element = driver.find_element(By.CLASS_NAME, "page")
+        # Find the articles
+        articles = page_element.find_elements(By.CLASS_NAME, "article")
+        # Extract URLs
+        urls = [ art.find_element(By.CLASS_NAME, "m").find_element(By.TAG_NAME, "a").get_attribute("href") for art in articles ]
+        
+        # Remove duplicates, remove None
+        urls = [u for u in set(urls) if u is not None]
+        # Filter by URL host
+        urls = [u for u in urls if url_host in u]
+
+        driver.quit()
+        kill_process_tree(service.process.pid)
+        
+        return urls
+    
+    def _search_breitbart(self, search):
+        url_host = "breitbart.com"
+        # URL search
+        url_unquoted = "https://www.breitbart.com/search/?s={}".format(search.replace(" ", "+"))
+        url = quote(url_unquoted, safe=":/?=&#")
+
+        # Initialize
+        driver, service = get_webdriver()
+        # Load URL
+        driver.get(url)
+        time.sleep(4)
+
+        # Find the element with class "page"
+        page_element = driver.find_element(By.CLASS_NAME, "gsc-expansionArea")
+        # Find the articles
+        articles = page_element.find_elements(By.CLASS_NAME, "gs-title")
+        # Extract URLs
+        urls = [ art.get_attribute("href") for art in articles ]
+
+        # Remove duplicates, remove None
+        urls = [u for u in set(urls) if u is not None]
+        # Filter by URL host
+        urls = [u for u in urls if url_host in u]
+
+        driver.quit()
+        kill_process_tree(service.process.pid)
+
+        return urls
+
+    def _search_zerohedge(self, search):
+        url_host = "zerohedge.com"
+        # URL search
+        url_unquoted = "https://www.zerohedge.com/search-content?qTitleBody={}".format(search.replace(" ", "+"))
+        url = quote(url_unquoted, safe=":/?=&#")
+
+        # Initialize
+        driver, service = get_webdriver()
+        # Load URL
+        driver.get(url)
+        time.sleep(2)
+
+        # Find the element with class "page"
+        page_element = driver.find_element(By.CLASS_NAME, "main-content")
+        # Find the articles
+        articles = page_element.find_elements(By.TAG_NAME, "a")
+        # Extract URLs
+        urls = [ art.get_attribute("href") for art in articles]
+        
+        # Remove duplicates, remove None
+        urls = [u for u in set(urls) if u is not None]
+        # Filter by URL host
+        urls = [u for u in urls if url_host in u]
+        
+        driver.quit()
+        kill_process_tree(service.process.pid)
+        
+        return urls
--- a/app_selenium/utils.py
+++ b/app_selenium/utils.py
@@ -0,0 +1,23 @@
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options 
+from selenium.webdriver.firefox.service import Service
+import psutil
+
+def get_webdriver():
+    options = Options()
+    options.add_argument('--headless')  # Optional
+    options.binary_location = '/opt/firefox/firefox'
+
+    service = Service('/usr/local/bin/geckodriver')
+
+    driver = webdriver.Firefox(options=options, service=service)
+    return driver, service
+
+def kill_process_tree(pid):
+    try:
+        parent = psutil.Process(pid)
+        for child in parent.children(recursive=True):
+            child.kill()
+        parent.kill()
+    except psutil.NoSuchProcess:
+        pass
--- a/app_urls/Dockerfile
+++ b/app_urls/Dockerfile
@@ -5,6 +5,9 @@ ENV PYTHONDONTWRITEBYTECODE=1
 #Prevents Python from buffering stdout and stderr
 ENV PYTHONUNBUFFERED=1

+# supervisor
+RUN apt-get update && apt-get install -y supervisor
+
 # User
 RUN useradd -m -r appuser && \
   mkdir /opt/app && \
@@ -14,10 +17,11 @@ WORKDIR /opt/app

 # Copy the Django project and install dependencies
 COPY requirements.txt  /opt/app/
-# run this command to install all dependencies 
+# Install dependencies 
 RUN pip install --no-cache-dir -r requirements.txt

 COPY --chown=appuser:appuser . /opt/app/
+COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf

 RUN chmod -R 755 /opt
 RUN chown -R appuser:appuser /opt
@@ -25,4 +29,4 @@ RUN chown -R appuser:appuser /opt
 USER appuser

 # Run Django’s server & workers
-CMD ["sh", "-c", "/opt/app/initialize.sh && /opt/app/run.sh"]
+CMD ["sh", "-c", "/opt/app/initialize.sh && /usr/bin/supervisord"]
--- a/app_urls/README.md
+++ b/app_urls/README.md
@@ -73,6 +73,17 @@ class Meta:
 * Environment variables
    * In docker-compose.yml

+* Tasks
+```
+python manage.py dumpdata \
+  django_celery_beat.PeriodicTask \
+  django_celery_beat.IntervalSchedule \
+  django_celery_beat.CrontabSchedule \
+  django_celery_beat.SolarSchedule \
+  django_celery_beat.ClockedSchedule \
+  --indent 2 > scheduled_tasks.json
+```
+
 * Deploy
 ```
 # Check environments variables on .env file
--- a/app_urls/core/init.py
+++ b/app_urls/core/init.py
@@ -0,0 +1,3 @@
+from .celery import app as celery_app
+
+__all__ = ('celery_app',)
--- a/app_urls/core/celery.py
+++ b/app_urls/core/celery.py
@@ -0,0 +1,14 @@
+# core/celery.py
+import os
+from celery import Celery
+
+# Set default Django settings module
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
+
+app = Celery('core')
+
+# Load config from Django settings, namespace CELERY
+app.config_from_object('django.conf:settings', namespace='CELERY')
+
+# Auto-discover tasks from all registered Django app configs
+app.autodiscover_tasks()
--- a/app_urls/core/settings.py
+++ b/app_urls/core/settings.py
@@ -12,15 +12,16 @@ https://docs.djangoproject.com/en/5.1/ref/settings/

 from pathlib import Path
 import os
+# Queues and routing
+from kombu import Queue

 # Build paths inside the project like this: BASE_DIR / 'subdir'.
 BASE_DIR = Path(__file__).resolve().parent.parent

-
 # Quick-start development settings - unsuitable for production

 # SECURITY WARNING: keep the secret key used in production secret!
-SECRET_KEY = os.getenv("DJANGO_SECRET_KEY", 'django-insecure-EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkzN7dXVUsMSqy6a5rjY6WNCw3CcRH5')
+SECRET_KEY = os.environ.get("DJANGO_SECRET_KEY", 'django-insecure-EtKpy7t84GvU4gBwX9z3xKPBXMS75IAV0dkzN7dXVUsMSqy6a5rjY6WNCw3CcRH5')

 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = (os.environ.get('DJANGO_DEBUG') == "True")
@@ -37,7 +38,7 @@ INSTALLED_APPS = [
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
-    'scheduler',
+    'django_celery_beat',
    'fetcher',
 ]

@@ -96,9 +97,10 @@ DATABASES = {
 CACHES = {
    "default": {
        "BACKEND": "django_redis.cache.RedisCache",
-        "LOCATION": "redis://{}:{}".format(
-            os.environ.get("REDIS_HOST", "localhost"), 
-            os.environ.get("REDIS_PORT", 6379)
+        "LOCATION": "redis://{}:{}/{}".format(
+            os.environ.get("REDIS_CACHE_HOST", "localhost"), 
+            os.environ.get("REDIS_CACHE_PORT", 6379),
+            2 # DB for Caching
        ),
        "OPTIONS": {
            "MEMCACHE_MAX_KEY_LENGTH": 2048,
@@ -107,59 +109,23 @@ CACHES = {
    }
 }

-'''
-from scheduler.types import SchedulerConfiguration, QueueConfiguration, Broker
-from typing import Dict
+# Celery configuration
+CELERY_BROKER_URL = 'redis://{}:{}/{}'.format(os.environ.get("REDIS_CELERY_HOST", "localhost"), os.environ.get("REDIS_CELERY_PORT", 6379), 0)
+CELERY_RESULT_BACKEND = 'redis://{}:{}/{}'.format(os.environ.get("REDIS_CELERY_HOST", "localhost"), os.environ.get("REDIS_CELERY_PORT", 6379), 1)
+CELERY_ACCEPT_CONTENT = ['json']
+CELERY_TASK_SERIALIZER = 'json'
+CELERY_RESULT_EXPIRES = 3600  # Auto clean results after 1 hour
+CELERY_ENABLE_UTC = True
+CELERY_TIMEZONE = "UTC"

-# https://django-tasks-scheduler.readthedocs.io/en/latest/configuration/
-SCHEDULER_CONFIG = SchedulerConfiguration(
-    DEFAULT_JOB_TIMEOUT = os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30),  # 30 minutes
-    BROKER=Broker.REDIS,
+# Celery Beat scheduler (required for django-celery-beat to work)
+CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers.DatabaseScheduler'
+
+CELERY_TASK_QUEUES = (
+    Queue('default'),
+    Queue('low'),
 )

-SCHEDULER_QUEUES: Dict[str, QueueConfiguration] = {
-    'default': QueueConfiguration(
-        HOST = os.environ.get("REDIS_HOST", "localhost"),
-        PORT = os.environ.get("REDIS_PORT", 6379),
-        DB = os.environ.get("REDIS_DB", 0),
-    ),
-    'high': QueueConfiguration(
-        HOST = os.environ.get("REDIS_HOST", "localhost"),
-        PORT = os.environ.get("REDIS_PORT", 6379),
-        DB = os.environ.get("REDIS_DB", 0),
-    ),
-    'low': QueueConfiguration(
-        HOST = os.environ.get("REDIS_HOST", "localhost"),
-        PORT = os.environ.get("REDIS_PORT", 6379),
-        DB = os.environ.get("REDIS_DB", 0),
-    ),
-}
-'''
-
-SCHEDULER_QUEUES = {
-    'default': {
-        'HOST': os.environ.get("REDIS_HOST", "localhost"),
-        'PORT': os.environ.get("REDIS_PORT", 6379),
-        'DB': os.environ.get("REDIS_DB", 0),
-    },
-    'high': {
-        'HOST': os.environ.get("REDIS_HOST", "localhost"),
-        'PORT': os.environ.get("REDIS_PORT", 6379),
-        'DB': os.environ.get("REDIS_DB", 0),
-    },
-    'low': {
-        'HOST': os.environ.get("REDIS_HOST", "localhost"),
-        'PORT': os.environ.get("REDIS_PORT", 6379),
-        'DB': os.environ.get("REDIS_DB", 0),
-    }
-}
-SCHEDULER_CONFIG = {
-    'DEFAULT_TIMEOUT': os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30),  # 30 minutes
-    'DEFAULT_RESULT_TTL': 60*60*12, # 12 hours
-    'EXECUTIONS_IN_PAGE': 20,
-    'SCHEDULER_INTERVAL': 10,  # 10 seconds
-}
-

 # Password validation

--- a/app_urls/core/urls.py
+++ b/app_urls/core/urls.py
@@ -19,6 +19,5 @@ from django.urls import path, include

 urlpatterns = [
    path('admin/', admin.site.urls),
-    path('scheduler/', include('scheduler.urls')),
    path('', include('fetcher.urls')),
 ]
--- a/app_urls/fetcher/src/db_utils.py
+++ b/app_urls/fetcher/src/db_utils.py
@@ -4,8 +4,10 @@ from django.core.cache import cache
 from django.db import IntegrityError
 from django.utils import timezone
 from datetime import timedelta
-from .fetch_utils_url_processor import process_url, get_with_protocol
+from .fetch_utils_url_processor import process_url, verify_missing_kid_url
+from .utils import get_with_protocol
 import re
+import requests
 import os
 import traceback
 from .logger import get_logger
@@ -15,7 +17,7 @@ class DB_Handler():
    def __init__(self):
        pass

-    def insert_raw_urls(self, urls, obj_source, obj_search):        
+    def insert_raw_urls(self, urls, obj_source, obj_search):
        try:
            logger.debug("Inserting raw URLs")
            # Empty?
@@ -43,7 +45,6 @@ class DB_Handler():
                        UrlsSourceSearch.objects.get_or_create(id_url=obj_url, id_source=obj_source, id_search=obj_search)
                else:
                    # Add object to insert
-                    # url_object_to_insert.append(Urls(url=url))
                    urls_to_insert.append(url)

            ### Insert URLs & (URL_id, source_id)
@@ -81,27 +82,67 @@ class DB_Handler():
        except Exception as e:
            logger.warning("Exception inserting raw URLs: {}\n{}".format(e, traceback.format_exc()))

+    def _set_status(self, obj_url, status):
+        # Update status if setting a new value
+        if (obj_url.status != status):
+            obj_url.status = status
+            obj_url.save()

-    def _process_single_url(self, obj_url, status_pattern_match, raise_exception_on_error, paywall_bypass=False):
-        
-        def set_status(obj_url, status):
-            # Update status if setting a new value
-            if (obj_url.status != status):
-                obj_url.status = status
-                obj_url.save()
+    def _set_duplicate_and_insert_canonical(self, obj_url, url_canonical):
+        # Update status
+        self._set_status(obj_url, Urls.STATUS_ENUM.DUPLICATE)
+        # Get or create URL with canonical form
+        obj_url_canonical, created = Urls.objects.get_or_create(url=url_canonical)
+        # Get the source-search IDs associated to obj_url.id
+        list_url_source_search = UrlsSourceSearch.objects.filter(id_url=obj_url)
+        for obj_url_source_search in list_url_source_search:
+            # Associate same sources to url_canonical (it might already exist)
+            UrlsSourceSearch.objects.get_or_create(id_url=obj_url_canonical, id_source=obj_url_source_search.id_source, id_search=obj_url_source_search.id_search)
+        # URLs duplciate association
+        UrlsDuplicate.objects.get_or_create(id_url_canonical=obj_url_canonical, id_url_duplicated=obj_url)
+
+    def _process_single_url(self, obj_url, status_pattern_match, raise_exception_on_error, paywall_bypass=False, request_timeout=15):
+        ##########################################################################
+        # URL pattern: missingkids.org/poster OR missingkids.org/new-poster
+        if ("missingkids.org" in obj_url.url) and ("poster" in obj_url.url):
+            try:
+                # Verify missing kid URL
+                results = verify_missing_kid_url(obj_url.url)
+            except Exception as e:
+                if (raise_exception_on_error):
+                    # Simply raise exception, handled in a different way
+                    raise Exception("Error processing URL, raising exception as expected")
+                else:
+                    logger.debug("Error processing URL: {}\n{}\n{}".format(obj_url.url, str(e), traceback.format_exc()))
+                    # Set status to error
+                    self._set_status(obj_url, Urls.STATUS_ENUM.ERROR)
+                    return
+            
+            if (results.get("status") == "valid"):
+                self._set_status(obj_url, Urls.STATUS_ENUM.VALID)
+            elif (results.get("status") == "invalid"):
+                self._set_status(obj_url, Urls.STATUS_ENUM.INVALID)
+            elif (results.get("status") == "duplicate"):
+                self._set_duplicate_and_insert_canonical(obj_url, results.get("redirection"))
+            elif (results.get("status") == "unknown"):
+                # Nothing to do, not sure about it...
+                logger.info("Missing kid verification returned unknown for URL: {}".format(obj_url.url))
+                self._set_status(obj_url, Urls.STATUS_ENUM.UNKNOWN)
+            return
+        ##########################################################################

        # Found a pattern match -> Override status
        if (status_pattern_match is not None):
            logger.debug("Pattern match, status '{}' for input URL: {}".format(status_pattern_match, obj_url.url))
            # Update status
-            set_status(obj_url, status_pattern_match)
+            self._set_status(obj_url, status_pattern_match)
            ##### Filter URL? -> Invalid (don't extract content)
            if (status_pattern_match == "invalid"):
                return
-        
+
        try:
            # Extract URL content
-            dict_url_data = process_url(obj_url.url, paywall_bypass)
+            dict_url_data = process_url(obj_url.url, paywall_bypass, request_timeout)
        except Exception as e:
            if (raise_exception_on_error):
                # Simply raise exception, handled in a different way
@@ -112,19 +153,9 @@ class DB_Handler():
                dict_url_data = None
        
        ##### Canonical URL different? -> Duplicate
-        if (dict_url_data is not None) and (dict_url_data.get("url_canonical") is not None) and (dict_url_data.get("url") != dict_url_data.get("url_canonical")):
-            # Update status
-            set_status(obj_url, Urls.STATUS_ENUM.DUPLICATE)
-            # Get or create URL with canonical form
-            obj_url_canonical, created = Urls.objects.get_or_create(url=dict_url_data.get("url_canonical"))
-            # Get the source-search IDs associated to obj_url.id
-            list_url_source_search = UrlsSourceSearch.objects.filter(id_url=obj_url)
-            for obj_url_source_search in list_url_source_search:
-                # Associate same sources to url_canonical (it might already exist)
-                UrlsSourceSearch.objects.get_or_create(id_url=obj_url_canonical, id_source=obj_url_source_search.id_source, id_search=obj_url_source_search.id_search)
-            # URLs duplciate association
-            UrlsDuplicate.objects.get_or_create(id_url_canonical=obj_url_canonical, id_url_duplicated=obj_url)
-
+        if (dict_url_data is not None) and (dict_url_data.get("url_canonical") is not None) and (dict_url_data.get("url") != dict_url_data.get("url_canonical")):            
+            # URL as duplicate, insert canonical URL
+            self._set_duplicate_and_insert_canonical(obj_url, dict_url_data.get("url_canonical"))
            # Next URL
            return

@@ -133,20 +164,20 @@ class DB_Handler():
            # (dict_url_data is None) or (Exception while processing URL) ? -> Error status
            if (dict_url_data is None):
                # Update status
-                set_status(obj_url, Urls.STATUS_ENUM.ERROR)
+                self._set_status(obj_url, Urls.STATUS_ENUM.ERROR)
                # Next URL
                return
            
            # Invalid? e.g. binary data
            if (dict_url_data.get("override_status") == "invalid"):
                # Update status
-                set_status(obj_url, Urls.STATUS_ENUM.INVALID)
+                self._set_status(obj_url, Urls.STATUS_ENUM.INVALID)
                # Next URL
                return

            ##### Valid URL
            # Update status
-            set_status(obj_url, Urls.STATUS_ENUM.VALID)
+            self._set_status(obj_url, Urls.STATUS_ENUM.VALID)

        try:
            if (dict_url_data is not None):
@@ -244,14 +275,31 @@ class DB_Handler():
        except Exception as e:
            logger.warning("Exception processing error URLs: {}\n{}".format(e, traceback.format_exc()))
    
-    def process_missing_kids_urls(self, batch_size=None):
+    def process_missing_kids_urls(self, batch_size=None, process_status_only=None):
        try:
-            logger.debug("Processing MissingKids URLs - batch_size={}".format(batch_size))
+            logger.info("Processing MissingKids URLs - batch_size={} process_status_only={}".format(batch_size, process_status_only))
+
+            if (process_status_only is None):
+                filter = (Q(status=Urls.STATUS_ENUM.VALID) | Q(status=Urls.STATUS_ENUM.INVALID) | Q(status=Urls.STATUS_ENUM.UNKNOWN) | Q(status=Urls.STATUS_ENUM.ERROR))
+            else:
+                if (process_status_only == "valid"):
+                    filter = Q(status=Urls.STATUS_ENUM.VALID)
+                elif (process_status_only == "invalid"):
+                    filter = Q(status=Urls.STATUS_ENUM.INVALID)
+                elif (process_status_only == "error"):
+                    filter = Q(status=Urls.STATUS_ENUM.ERROR)
+                elif (process_status_only == "unknown"):
+                    filter = Q(status=Urls.STATUS_ENUM.UNKNOWN)
+                elif (process_status_only == "raw"):
+                    filter = Q(status=Urls.STATUS_ENUM.RAW)
+                elif (process_status_only == "duplicate"):
+                    filter = Q(status=Urls.STATUS_ENUM.DUPLICATE)
+                else:
+                    logger.info("Unknown status to filter: {}".format(process_status_only))
+
            # Get batch of URLs, %missingkids.org/poster% AND (status='valid' OR status='invalid')
            missingkids_urls = Urls.objects.order_by("-ts_fetch").filter(
-                (Q(url__contains="missingkids.org/poster") | Q(url__contains="missingkids.org/new-poster"))
-                &
-                (Q(status=Urls.STATUS_ENUM.VALID) | Q(status=Urls.STATUS_ENUM.INVALID) | Q(status=Urls.STATUS_ENUM.ERROR))
+                filter & (Q(url__contains="missingkids.org/poster") | Q(url__contains="missingkids.org/new-poster"))
            )
            
            # Get batch size
@@ -261,14 +309,36 @@ class DB_Handler():
            # Per URL
            for obj_url in missingkids_urls:
                try:
-                    # Process URL. If no exception -> Valid
-                    self._process_single_url(obj_url, status_pattern_match=None, raise_exception_on_error=True)
+                    SELENIUM_BASED_MISSINGKID_VERIFICATION = False
+                    if (SELENIUM_BASED_MISSINGKID_VERIFICATION):
+                        # Missing kids fetching endpoint, verify URL
+                        missingkids_fetch_endpoint = os.path.join(os.getenv("SELENIUM_ENDPOINT", "http://localhost:80"), "verify_missing_kid/")
+                        data = {"url": obj_url.url}
+                        # POST
+                        r = requests.post(missingkids_fetch_endpoint, json=data, timeout=120)
+                        # Jsonify
+                        results = r.json()
+                        logger.debug("Missingkids Selenium results for URL {}: {}".format(obj_url.url, str(results)))
+                    else:
+                        # Verify
+                        results = verify_missing_kid_url(obj_url.url)
+                        logger.debug("Missingkids verify results for URL {}: {}".format(obj_url.url, str(results)))
+
+                    if (results.get("status") == "valid"):
+                        self._set_status(obj_url, Urls.STATUS_ENUM.VALID)
+                    elif (results.get("status") == "invalid"):
+                        self._set_status(obj_url, Urls.STATUS_ENUM.INVALID)
+                    elif (results.get("status") == "duplicate"):
+                        self._set_duplicate_and_insert_canonical(obj_url, results.get("redirection"))
+                    elif (results.get("status") == "unknown"):
+                        # Nothing to do, not sure about it...
+                        logger.info("Missing kid verification returned unknown for URL: {}".format(obj_url.url))
+                        pass
+
                except Exception as e:
-                    # Raised exception -> Invalid (404 error)
-                    obj_url.status = Urls.STATUS_ENUM.INVALID
-                    obj_url.save()
+                    logger.warning("Unknown error processing missing kids poster for URL: {}\n{}".format(obj_url.url, str(e)))
            
-            logger.info("Verified status of #{} missingkids.org/poster URLs".format(len(missingkids_urls)))
+            logger.info("Verified status of #{} missingkids.org/poster / missingkids.org/new-poster URLs".format(len(missingkids_urls)))
        except Exception as e:
            logger.warning("Exception processing MissingKids URLs: {}\n{}".format(e, traceback.format_exc()))
    
--- a/app_urls/fetcher/src/fetch_parser.py
+++ b/app_urls/fetcher/src/fetch_parser.py
@@ -1,6 +1,7 @@
 from .db_utils import DB_Handler
 from ..models import Search, Source
-from .fetch_utils_url_processor import get_with_protocol, url_host_slowdown
+from .fetch_utils_url_processor import url_host_slowdown
+from .utils import get_with_protocol
 import newspaper
 import traceback
 from .logger import get_logger
@@ -17,6 +18,9 @@ class FetchParser():
            url_host_clean = obj_search.search.replace("www.", "").replace("http://", "").replace("https://", "")
            # Ensure URL host in URL
            raw_urls = [u for u in raw_urls if url_host_clean in u]
+        
+        # Clean URL part after "&quot"
+        raw_urls = [u.split("&quot")[0] for u in raw_urls]

        return raw_urls

--- a/app_urls/fetcher/src/fetch_search.py
+++ b/app_urls/fetcher/src/fetch_search.py
@@ -54,6 +54,7 @@ class FetchSearcher():
                for SearchInstance in ListSearchInstances:
                    # Sleep between requests, avoid too many requests...
                    time.sleep(float(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5)))
+                    # TODO: Random proxy / VPN
                    SearchInstance(args).fetch_articles(db_writer, obj_search)

                # TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master
--- a/app_urls/fetcher/src/fetch_search_instances.py
+++ b/app_urls/fetcher/src/fetch_search_instances.py
@@ -1,15 +1,15 @@
 import time
 import feedparser
 import os
-from django.utils import timezone
-from datetime import timedelta
+from urllib.parse import unquote
 from ..models import Search, Source
 from .fetch_utils_gnews import decode_gnews_urls
 from .logger import get_logger
 logger = get_logger()

+from furl import furl
 from gnews import GNews
-from duckduckgo_search import DDGS
+from ddgs import DDGS
 from GoogleNews import GoogleNews
 from search_engines import Yahoo, Aol

@@ -42,6 +42,9 @@ class FetcherAbstract(ABC):
            # Ensure URL host in URL
            raw_urls = [u for u in raw_urls if url_host_clean in u]

+        # Remove URL parameters, e.g. "?param=1234&h=yes"
+        raw_urls = [ furl(u).remove(furl(u).args).url for u in raw_urls ]
+
        return raw_urls

    def fetch_articles(self, db_writer, obj_search):
@@ -110,7 +113,7 @@ class SearchDuckDuckGoGeneral(FetcherAbstract):
        return "ddg-general {} results={}".format(self.region, self.max_results).replace("results=None", "").strip()

    def _fetch_raw_urls(self, keyword_search):
-        try:    
+        try:
            news = DDGS().text(keyword_search, region=self.region, timelimit=self.period, max_results=self.max_results)
            urls = [e.get("href") for e in news]
        except Exception as e:
@@ -206,7 +209,10 @@ class SearchGoogleGeneral(FetcherAbstract):
                # Links
                for l in links:
                    # 'link': 'https://uk.news.yahoo.com/leaving-neverland-2-michael-jackson-lawyer-channel-4-102017088.html&ved=2ahUKEwjl38eJm5aMAxVvqJUCHXgnGzwQxfQBegQICRAC&usg=AOvVaw1osa6b3o_xXfcNinMDpLoK'
-                    set_links.add( l.get("link").split("&ved=")[0] )
+                    url = l.get("link").split("&ved=")[0]
+                    # https://www.foxnews.com/politics%3Fparam%3D446dd5e1 -> https://www.foxnews.com/politics?param=446dd5e1
+                    url = unquote(url)
+                    set_links.add(url)
                # Finished?
                if (num_before == len(set_links)):
                    break
--- a/app_urls/fetcher/src/fetch_selenium.py
+++ b/app_urls/fetcher/src/fetch_selenium.py
@@ -0,0 +1,42 @@
+from .db_utils import DB_Handler
+from ..models import Search, Source
+import traceback
+import requests
+import os
+from .logger import get_logger
+logger = get_logger()
+
+class FetchSeleniumSourceSearch():
+    def __init__(self) -> None:
+        logger.debug("Initializing Selenium Source Search")
+
+    def run(self):
+        try:
+            logger.debug("Starting FetchSeleniumSourceSearch.run()")
+
+            # Get keyword searches
+            list_keyword_search = Search.objects.filter(type=Search.TYPE_ENUM.KEYWORD_SEARCH)
+            logger.debug("Fetching news Selenium based for keyword searches: {}".format([e.search for e in list_keyword_search]))
+
+            # Run selenium search for each keyword search
+            for obj_search in list_keyword_search:
+                try:
+                    # Selenium fetching endpoint
+                    selenium_fetch_endpoint = os.path.join(os.getenv("SELENIUM_ENDPOINT", "http://localhost:80"), "fetch_search/")
+                    data = {"search": obj_search.search}
+                    # POST
+                    r = requests.post(selenium_fetch_endpoint, json=data, timeout=900)
+                    # Jsonify
+                    results = r.json()
+                    logger.debug("Selenium results for URL {}: {}".format(obj_search.search, str(results)))
+
+                    for source, urls_fetched in results.items():
+                        # Get source object
+                        obj_source, created = Source.objects.get_or_create(source="selenium {}".format(source))
+
+                        # Write to DB
+                        DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
+                except Exception as e:
+                    logger.warning("Exception while fetching selenium search: {}\n{}".format(obj_search.search, str(e)))
+        except Exception as e:
+            logger.warning("Exception in FetchSeleniumSourceSearch.run(): {}\n{}".format(e, traceback.format_exc()))
--- a/app_urls/fetcher/src/fetch_utils_url_processor.py
+++ b/app_urls/fetcher/src/fetch_utils_url_processor.py
@@ -2,20 +2,13 @@ from django.core.cache import cache
 from .logger import get_logger
 logger = get_logger()
 import newspaper
+import requests
 import time
 import os
 from urllib.parse import unquote
 import langdetect
 langdetect.DetectorFactory.seed = 0

-def get_with_protocol(url):
-    # http:// -> https://
-    url = url.replace("http://", "https://")
-    # "" -> https://
-    if not (url.startswith("https://")):
-        url = "https://" + url
-    return url
-
 def get_url_host(url):
    # URL no protocol, first substring before '/'
    url_host = url.replace("https://", "").replace("http://", "").split("/")[0]
@@ -38,8 +31,49 @@ def url_host_slowdown(url, url_host_slowdown_seconds):
    # About to process URL host, cache time
    cache.set("process_{}".format(url_host).encode("utf-8"), time.time(), timeout=60*5) # Expire after 5 minutes

-def process_url(url, paywall_bypass=False):
+
+def verify_missing_kid_url(url):
+    # Sleep required? To avoid too many requests error
+    url_host_slowdown(url, url_host_slowdown_seconds=float(os.getenv("FETCHER_URL_HOST_SLEEP", 5)))
+
+    # Request, get redirection
+    r = requests.get(url, allow_redirects=True)
+    # Redirection?
+    if (url != r.url):
+        url_redirection = r.url
+        return {"status": "duplicate", "redirection": url_redirection}
+
+    # Sample URL: "https://www.missingkids.org/poster/NCMC/2058896/1"
+    org_prefix, case_num = url.split("/")[-3], url.split("/")[-2]
+    # Fill details to API endpoint
+    base_url = "https://www.missingkids.org/bin/ncmecEndpoint?action=childDetail&orgPrefix={}&caseNum={}"
+    url_endpoint = base_url.format(org_prefix, case_num)
    
+    # Cache timeout missingkids.org
+    time.sleep(0.25)
+
+    # Request
+    r = requests.get(url_endpoint)
+    # Analyze status code and status result
+    if (r.status_code == 200):
+        r_json = r.json()
+        # Valid poster
+        if (r_json.get("status") == "success"):
+            return {"status": "valid"}
+        # Invalid poster
+        elif (r_json.get("status") == "error"):
+            return {"status": "invalid"}
+        else:
+            # ?
+            logger.info("Unknown json status: {} when verifying missing kid: {}".format(str(r_json), url))
+            return {"status": "unknown"}
+    else:
+        # Error status code
+        logger.info("Unknown request status: {} when verifying missing kid: {}".format(r.status_code, url))
+        return {"status": "unknown"}
+
+def process_url(url, paywall_bypass=False, request_timeout=15):
+
    if (paywall_bypass):
        # TODO: Implement self-hosted instance
        url_paywall_bypass_base = "https://marreta.pcdomanual.com"
@@ -51,33 +85,74 @@ def process_url(url, paywall_bypass=False):
    try:
        # Sleep required? To avoid too many requests error (original URL, not paywall bypassing endpoint)
        url_host_slowdown(url, url_host_slowdown_seconds=float(os.getenv("FETCHER_URL_HOST_SLEEP", 5)))
+        # User agent
+        user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
+        
        # Process
-        article = newspaper.article(url_of_interest)
+        if ("foxnews.com" in url_of_interest) or ("zerohedge" in url_of_interest):
+            # Request
+            r = requests.get(url, headers={"User-Agent": user_agent}, timeout=request_timeout)
+            # Raise for error code
+            r.raise_for_status()
+            # Parse
+            article = newspaper.Article(url=r.url).download(input_html=r.text).parse()
+        else:
+            # Config: Fake user agent
+            config = newspaper.configuration.Configuration()
+            config.headers = {'User-Agent': user_agent}
+            config.request_timeout = request_timeout
+            # Default mode
+            article = newspaper.article(url_of_interest, config=config)
+
    except newspaper.ArticleBinaryDataException:
        logger.warning("ArticleException for input URL {}".format(url))
        return {"override_status": "invalid"}
    except newspaper.ArticleException as e:
+
+        # Too many requests or blocked for some reason
+        if ("Status code 403" in str(e.args)):
+            # TODO: cool down and retry once?, proxy/VPN, ...
+            logger.debug("TODO: process_url Implement code 403")
+
+        # Not found, either it doesn't exist or getting blocked...
+        if ("Status code 404" in str(e.args)):
+            # TODO: cool down and retry once?, proxy/VPN, ...
+            logger.debug("TODO: process_url Implement code 404")
+
        # Too many requests? Cool down...
        if ("Status code 429" in str(e.args)):
            # TODO: cool down and retry once?, proxy/VPN, ...
            logger.debug("TODO: process_url Implement code 429")
+
        # Unavailable for legal reasons
        if ("Status code 451" in str(e.args)):
            # TODO: Bypass with VPN
            logger.debug("TODO: process_url Implement code 451")
+
        # CloudFlare protection?
        if ("Website protected with Cloudflare" in str(e.args)):
            logger.debug("TODO: process_url Implement bypass CloudFlare")
+
        # PerimeterX protection?
        if ("Website protected with PerimeterX" in str(e.args)):
            logger.debug("TODO: process_url Implement bypass PerimeterX")

        logger.debug("ArticleException for input URL {}\n{}".format(url, str(e.args)))
+
+        # Try simple request, valid response but couldn't parse article? e.g. getting blocked? -> unknown
+        time.sleep(0.25)
+        r = requests.get(url_of_interest, timeout=request_timeout)
+        if (r.status_code == 200):
+            return {"override_status": "unknown"}
+        else:
+            # Another status code still... "error" or "unknown"
+            return {"override_status": "unknown"}
+
        return None
    except Exception as e:
        logger.warning("Exception for input URL {}\n{}".format(url, str(e)))
        return None
-    
+        
    # Not a valid URL?
    if (not article.is_valid_url()):
        logger.debug("Invalid URL found: {}".format(url))
--- a/app_urls/fetcher/src/llm.py
+++ b/app_urls/fetcher/src/llm.py
@@ -1,24 +1,76 @@
 import ollama
 import os
+import requests
+import json
+from .logger import get_logger
+logger = get_logger()

 class OllamaClient():
    def __init__(self):
-        self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))
+        self.host = os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org")
+        self.client = ollama.Client(host=self.host)
+        self.options = {"temperature": 0, "seed": 13579}
    
    def _get_default_model(self):
-        return "llama3.2:3b"
+        return os.getenv("OLLAMA_MODEL_DEFAULT", "llama3.2:3b")

    def get_models(self):
-        models = sorted([m.model for m in self.client.list().models])
-        if (self._get_default_model() in models):
-            return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
-        else:
-            return models
+        try:
+            # Get models
+            models = sorted([m.model for m in self.client.list().models])
+            # r = requests.get( os.path.join(endpoint, "models") )
+            # r.json().get("models")
+
+            # Default within it?
+            if (self._get_default_model() in models):
+                return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
+            else:
+                return models
+        except Exception as e:
+            return []
    
-    def get_prompt(self):
-        return ("Rewrite the text below into a clear and concise summary of one paragraph maximum, presenting the key points as if they are newly written insights. "
+    def get_prompt(self, content):
+        return "Provide, in one sentence each, the what, why, who, when, where, and a detailed summary of the content below:\n\n{}".format(content)
+        return "First, provide a detailed summary of the content below in one paragraph. Second, specify in one sentence each the who, what, when, where and why of the story. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states':\n\n{}".format(content)
+        return "First, provide a summary of the content below in one paragraph. Second, specify the Who, What, When, Where and Why of the story:\n\n{}".format(content)
+        # First, provide a summary of the content below in one paragraph. Second, specify the who, what, when, where and why of the story in one sentence each. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states':
+        '''
+        return ("Rewrite the content below into a clear and concise summary of one paragraph maximum, presenting the key points as if they are newly written insights. "
                "Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. "
                "Write in a natural, standalone format that feels like an original explanation. "
-                "Keep it brief, engaging, informative, in the style of a news article: \n"
+                "Keep it brief, engaging, informative, in the style of a news article:\n\n{}".format(content)
        )
-    
+        '''
+    
+    def generate(self, model, prompt, format=None):
+        try:
+            # Generate response
+            response = self.client.generate(model=model, prompt=prompt, format=format, options=self.options)
+            # Extract response
+            response = response.response
+            # Json? -> Dict
+            if (format == "json"):
+                # Dict
+                response = json.loads(response)
+            # Force unload
+            r = requests.post( os.path.join(self.host, "unload_model") )
+        except Exception as e:
+            logger.warning("Exception while generating LLM response: {}".format(str(e)))
+            if (format == "json"):
+                response = {}
+            else:
+                response = None
+        # Text
+        return response
+
+    def generate_stream(self, model, prompt):
+        try:
+            # Generate response
+            response = self.client.generate(model=model, prompt=prompt, format="json", stream=True, options=self.options)
+            # Streamed chunks
+            for chunk in response:
+                yield chunk.response
+            # Force unload
+            r = requests.post( os.path.join(self.host, "unload_model") )
+        except Exception as e:
+            logger.warning("Exception while generating LLM response: {}".format(str(e)))
--- a/app_urls/fetcher/src/logger.py
+++ b/app_urls/fetcher/src/logger.py
@@ -1,6 +1,10 @@
 import logging
 import os

+# Set to warning
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+logging.getLogger("newspaper").setLevel(logging.WARNING)
+
 # Get env var
 logs_directory = os.getenv("PATH_LOGS_DIRECTORY", "logs")

@@ -11,7 +15,7 @@ logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(messa
 logger = logging.getLogger("fetcher")
 logger.setLevel(logging.DEBUG)

-# To file log: INFO / WARNING / ERROR / CRITICAL
+# To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL
 fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1)
 fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
 fh.setLevel(logging.DEBUG)
--- a/app_urls/fetcher/src/notifier.py
+++ b/app_urls/fetcher/src/notifier.py
@@ -0,0 +1,153 @@
+from django.utils import timezone
+from django.utils.timezone import now, timedelta
+from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
+from django.db.models import Count
+import requests
+import os
+import traceback
+from .logger import get_logger
+logger = get_logger()
+
+def notify_telegram_info(last_hours, channel="INFO"):
+    try:
+        start_date = timezone.now() - timedelta(hours=last_hours)
+        
+        # Count the number of URLs grouped by status within the date range
+        urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
+                                .values('status') \
+                                .annotate(count=Count('id')) \
+                                .order_by('status')
+        
+        # Count the number of URLs grouped by source
+        urls_data_source = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_source__source') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_source__source')
+        
+        # Count the number of URLs grouped by search
+        urls_data_search = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_search__search') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_search__search')
+
+
+        bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
+        chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
+
+        message = "During the last {} hours:\n".format(last_hours)
+        
+        message += "\nURLs per status:\n"
+        for o in urls_data_status:
+            message += "  {}: {}\n".format(o.get("status"), o.get("count"))
+        message += "\nURLs per source:\n"
+        for o in urls_data_source:
+            message += "  {}: {}\n".format(o.get("id_source__source"), o.get("count"))
+        message += "\nURLs per search:\n"
+        for o in urls_data_search:
+            message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))
+
+
+        url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+        params = {
+            "chat_id": chat_id,
+            "text": message
+        }
+
+        # POST
+        response = requests.post(url, params=params)
+    except Exception as e:
+        logger.info("Exception while notifying status: {}\n{}".format(str(e), traceback.format_exc()))
+
+
+def notify_telegram_warning(last_hours, channel="WARNING"):
+    try:
+        # Message appending logic
+        message = ""
+
+        start_date = timezone.now() - timedelta(hours=last_hours)
+        
+        # Count the number of URLs grouped by status within the date range
+        urls_data_status = Urls.objects.filter(ts_fetch__gte=start_date) \
+                                .values('status') \
+                                .annotate(count=Count('id')) \
+                                .order_by('status')
+        
+        # Build dictionary
+        urls_data_status_dict = {}
+        for o in urls_data_status:
+            # #STATUS
+            urls_data_status_dict[o.get("status")] = o.get("count")
+            # #TOTAL
+            urls_data_status_dict["total"] = urls_data_status_dict.get("total", 0) + o.get("count")
+
+        MINIMUM_URLS_THRESHOLD = 10
+        MINIMUM_PROCESSED_URLS_RATIO = 0.7
+
+        # Minimum amount of URLs
+        if (urls_data_status_dict.get("total") < MINIMUM_URLS_THRESHOLD):
+            message += "WARNING - Total #URLS during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
+            message += "\nURLs per status:\n"
+            for o in urls_data_status:
+                message += "  {}: {}\n".format(o.get("status"), o.get("count"))
+        
+        # Minimum ratio of processed raw urls
+        if (urls_data_status_dict.get("total") > 0):
+            if (urls_data_status_dict.get("raw", 0) / urls_data_status_dict.get("total") >= MINIMUM_PROCESSED_URLS_RATIO):
+                message += "WARNING - Small ratio of processed raw URLs during the last {} hours: {}\n".format(last_hours, urls_data_status_dict.get("total"))
+                message += "\nURLs per status:\n"
+                for o in urls_data_status:
+                    message += "  {}: {}\n".format(o.get("status"), o.get("count"))
+
+
+        # Count the number of URLs grouped by source
+        urls_data_source = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_source__source') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_source__source')
+        
+        MINIMUM_SOURCES = 3
+        if (len(urls_data_source) < MINIMUM_SOURCES):
+            message += "WARNING - Very few sources found URLs during the last {} hours".format(last_hours)
+            message += "\nURLs per source:\n"
+            for o in urls_data_source:
+                message += "  {}: {}\n".format(o.get("id_source__source"), o.get("count"))
+
+        """
+        # TODO: URLs per search, key should be present for cnbc.com, foxnews.com, zerohedge.com, breitbart.com, child abuse, child neglect
+        # Count the number of URLs grouped by search
+        urls_data_search = UrlsSourceSearch.objects \
+                                    .filter(id_url__ts_fetch__gte=start_date) \
+                                    .values('id_search__search') \
+                                    .annotate(count=Count('id_url')) \
+                                    .order_by('id_search__search')
+
+        message += "\nURLs per search:\n"
+        for o in urls_data_search:
+            message += "  {}: {}\n".format(o.get("id_search__search"), o.get("count"))
+        """
+        
+        # Valid message body?
+        if (message != ""):
+            bot_token = os.environ.get("TELEGRAM_{}_BOT_TOKEN".format(channel), "")
+            chat_id = os.environ.get("TELEGRAM_{}_CHAT_ID".format(channel), "")
+            
+            url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+            params = {
+                "chat_id": chat_id,
+                "text": message
+            }
+
+            # POST
+            response = requests.post(url, params=params)
+    except Exception as e:
+        logger.info("Exception while notifying status: {}\n{}".format(str(e)), traceback.format_exc())
+
+
+def notify_telegram(last_hours=12):
+    # INFO
+    notify_telegram_info(last_hours, channel="INFO")
+    # WARNING
+    notify_telegram_warning(last_hours, channel="WARNING")
--- a/app_urls/fetcher/src/publisher.py
+++ b/app_urls/fetcher/src/publisher.py
@@ -12,7 +12,8 @@ logger = get_logger()

 class Publisher():
    def __init__(self):
-        pass
+        self.admin_api_url = os.getenv("GHOST_ADMIN_API_URL")
+        self.admin_api_key = os.getenv("GHOST_ADMIN_API_KEY")

    def _create_jwt(self, admin_api_key):
        id_, secret = admin_api_key.split(':')
@@ -29,9 +30,7 @@ class Publisher():

    def _create_ghost_post(self, post_data):
        # Get token
-        jwt_token = self._create_jwt(os.getenv("GHOST_ADMIN_API_KEY"))
-        # Get Admin API URL
-        admin_api_url = os.getenv("GHOST_ADMIN_API_URL")
+        jwt_token = self._create_jwt(self.admin_api_key)

        headers = {
            'Authorization': f'Ghost {jwt_token}',
@@ -41,7 +40,7 @@ class Publisher():
        post_data = {"posts": [post_data]}

        response = requests.post(
-            os.path.join(admin_api_url, "posts"),
+            os.path.join(self.admin_api_url, "posts"),
            json=post_data,
            headers=headers,
            params={"source":"html"}
@@ -53,6 +52,27 @@ class Publisher():
        else:
            logger.warning("Ghost - Failed to publish post: {} {}".format(response.status_code, response.text))
            return None
+        
+    def _published_url_id(self, url_id):
+        # Get token
+        jwt_token = self._create_jwt(self.admin_api_key)
+
+        headers = {
+            'Authorization': f'Ghost {jwt_token}',
+            'Content-Type': 'application/json'
+        }
+
+        # Query param filter by URL ID
+        params = {"filter": "tags:hash-url-id-{}".format(url_id)}
+        # Get posts using filter
+        response = requests.get(os.path.join(self.admin_api_url, "posts"), params=params, headers=headers)
+        # To JSON
+        dict_response = response.json()
+
+        if (len(dict_response.get("posts")) > 0):
+            return True
+        else:
+            return False

    def _get_photo_url(self, query):
        # TODO: Get already used photos to skip. Use DB
@@ -100,14 +120,56 @@ class Publisher():
        if (url_content.valid_content is False):
            logger.warning("Ghost - URL Content is not valid for URL ID: {} {}".format(url_id, url.url))
            return
+        
+        # URL ID already published?
+        if (self._published_url_id(url_id)):
+            logger.info("Ghost - URL ID {} already published, skipping".format(url_id))
+            return

-        model = "llama3.2:3b"
-        prompt = "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
+        ###########################################
+        client_llm = OllamaClient()
+        # Model
+        model = client_llm.get_models()[0]
+        # Prompt
+        prompt = client_llm.get_prompt(url_content.content)
+        # Generate content
+        generated_content_dict = client_llm.generate(model, prompt, format="json")
+        logger.debug("Generated content: {}".format(generated_content_dict))

-        ollama_msg = {"role": "user", "content": "{}\n{}".format(prompt, url_content.content)}
-        response = OllamaClient().client.chat(model=model, messages=[ollama_msg])
+        ###########################################
+        # Get where description
+        generated_content_where = generated_content_dict.get("where")
+        # Prompt to extract address / location
+        prompt = 'Only answer with the location or address which can be extracted from this description: "{}"'.format(generated_content_where)
+        # LLM
+        extracted_location = client_llm.generate(model, prompt, format=None)
+        logger.debug("Estimated location: {}".format(extracted_location))
+        # OSM API
+        params = {
+            'q': extracted_location,
+            'format': 'json',
+            'addressdetails': 1,
+            'limit': 1
+        }

-        article_summary = response["message"]["content"]
+        response = requests.get('https://nominatim.openstreetmap.org/search', params=params, headers={'User-Agent': 'App'})
+        list_data = response.json()
+        if (len(list_data) > 0):
+            data = list_data[0]
+            location_url = "https://openstreetmap.org/{}/{}".format(data.get("osm_type"), data.get("osm_id"))
+        else:
+            location_url = None
+        ###########################################
+
+        # Parse generated content
+        summary, five_w = "", ""
+        for k, v in generated_content_dict.items():
+            if ("summary" in k.lower()):
+                summary = v if type(v) is str else "\n".join(summary)
+            else:
+                five_w += "{}: {}\n".format(k.capitalize(), v if type(v) is str else ". ".join(v) )
+        # Aggregate generated content
+        generated_content = "{}\n\n{}".format(summary, five_w)

        ################################################################################################
        if (url_content.image_main_url is None) or (requests.get(url_content.image_main_url).status_code != 200):
@@ -117,15 +179,24 @@ class Publisher():
        else:
            photo_url = url_content.image_main_url

+        # HTML: Generate content
+        html_data = "".join([ "<p>{}</p>".format(t) for t in generated_content.split("\n") ])
+        # HTML: Add location if available
+        if (location_url is not None):
+            html_data += '<p><a href="{}">Estimated location</a></p>'.format(location_url)
+        # HTML: Add source
+        html_data += '<p><a href="{}">Source: {}</a></p>'.format(url.url, url_content.url_host.replace("https://", ""))
+
        post_data = {
            # "slug": "hey-short",
            "title": url_content.title,
-            "html": "".join([ "<p>{}</p>".format(t) for t in article_summary.split("\n") ]) + '<a href="{}">Source</a>'.format(url.url),
+            "html": html_data,
            #"meta_title": "",
            #"meta_description": "",
            "feature_image": photo_url,
            #"feature_image_caption": "",
            "status": "published",
+            "tags": ["#url-id-{}".format(url_id)]   # Hidden tag with associated URL ID
        }

        # Publish post
--- a/app_urls/fetcher/src/utils.py
+++ b/app_urls/fetcher/src/utils.py
@@ -0,0 +1,8 @@
+
+def get_with_protocol(url):
+    # http:// -> https://
+    url = url.replace("http://", "https://")
+    # "" -> https://
+    if not (url.startswith("https://")):
+        url = "https://" + url
+    return url
--- a/app_urls/fetcher/tasks.py
+++ b/app_urls/fetcher/tasks.py
@@ -1,142 +1,91 @@
-from scheduler import job
+from celery import shared_task

 from .src.fetch_feed import FetchFeeds
 from .src.fetch_parser import FetchParser
 from .src.fetch_search import FetchSearcher
 from .src.fetch_missing_kids import FetchMissingKids
+from .src.fetch_selenium import FetchSeleniumSourceSearch
 from .src.db_utils import DB_Handler
 from .src.publisher import Publisher
+from .src.notifier import notify_telegram

 from .src.logger import get_logger
 logger = get_logger()

-@job('default')
-def fetch_feeds():
-    task = "Fetch Feeds"
-    logger.info("Task triggered: {}".format(task))
-    FetchFeeds().run()
-    logger.info("Task completed: {}".format(task))

-@job('default')
-def fetch_parser():
-    task = "Fetch Parser"
-    logger.info("Task triggered: {}".format(task))
-    FetchParser().run()
-    logger.info("Task completed: {}".format(task))
-
-@job('default')
-def fetch_search():
-    task = "Fetch Search"
-    logger.info("Task triggered: {}".format(task))
-    FetchSearcher().run()
-    logger.info("Task completed: {}".format(task))
-
-@job('default')
-def fetch_missing_kids(number_pages=5):
-    task = "Fetch MissingKids"
-    logger.info("Task triggered: {}".format(task))
-    FetchMissingKids().run(number_pages)
-    logger.info("Task completed: {}".format(task))
-
-@job('default')
-def fetch_missing_kids_all(number_pages=-1):
-    task = "Fetch MissingKids"
-    logger.info("Task triggered: {}".format(task))
-    FetchMissingKids().run(number_pages)
-    logger.info("Task completed: {}".format(task))
-
-@job('default')
+@shared_task(queue='light')
 def process_raw_urls(batch_size=100):
    task = "Process raw URLs"
    logger.info("Task triggered: {}".format(task))
    DB_Handler().process_raw_urls(batch_size=batch_size)
    logger.info("Task completed: {}".format(task))

-@job('default')
+@shared_task(queue='default')
 def process_error_urls(batch_size=50):
    task = "Process error URLs"
    logger.info("Task triggered: {}".format(task))
    DB_Handler().process_error_urls(batch_size=batch_size)
    logger.info("Task completed: {}".format(task))

-@job('default')
-def process_missing_kids_urls(batch_size=50):
-    task = "Process Missing Kids URLs"
+
+
+@shared_task(queue='light')
+def fetch_feeds():
+    task = "Fetch Feeds"
    logger.info("Task triggered: {}".format(task))
-    DB_Handler().process_missing_kids_urls(batch_size=batch_size)
+    FetchFeeds().run()
    logger.info("Task completed: {}".format(task))

-@job('default')
-def process_missing_kids_urls_all(batch_size=None):
-    task = "Process Missing Kids URLs ALL"
+
+@shared_task(queue='default')
+def fetch_parser():
+    task = "Fetch Parser"
    logger.info("Task triggered: {}".format(task))
-    DB_Handler().process_missing_kids_urls(batch_size=batch_size)
+    FetchParser().run()
    logger.info("Task completed: {}".format(task))

-@job('default')
-def clean_old_url_content(older_than_days=60):
+@shared_task(queue='default')
+def fetch_search():
+    task = "Fetch Search"
+    logger.info("Task triggered: {}".format(task))
+    FetchSearcher().run()
+    logger.info("Task completed: {}".format(task))
+
+
+
+@shared_task(queue='heavy')
+def fetch_selenium_search():
+    task = "Fetch Selenium search"
+    logger.info("Task triggered: {}".format(task))
+    FetchSeleniumSourceSearch().run()
+    logger.info("Task completed: {}".format(task))
+
+@shared_task(queue='heavy')
+def fetch_missing_kids(number_pages=5):
+    task = "Fetch MissingKids"
+    logger.info("Task triggered: {}".format(task))
+    FetchMissingKids().run(number_pages)
+    logger.info("Task completed: {}".format(task))
+
+@shared_task(queue='heavy')
+def process_missing_kids_urls(batch_size=None, process_status_only=None):
+    task = "Process Missing Kids URLs - batch_size={} process_status_only={}".format(batch_size, process_status_only)
+    logger.info("Task triggered: {}".format(task))
+    DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only=process_status_only)
+    logger.info("Task completed: {}".format(task))
+
+
+
+@shared_task(queue='default')
+def clean_old_url_content(older_than_days=14):
    task = "Clean old URL content"
    logger.info("Task triggered: {}".format(task))
    DB_Handler().clean_old_url_content(older_than_days=older_than_days)
    logger.info("Task completed: {}".format(task))

-
-@job('default')
-def background_task(process_type: str):
-    logger.info("Task triggered: {}".format(process_type))
-
-    try:
-        if (process_type == "fetch_feeds"):
-            FetchFeeds().run()
-        elif (process_type == "fetch_parser"):
-            FetchParser().run()
-        elif (process_type == "fetch_search"):
-            FetchSearcher().run()
-        elif (process_type == "fetch_missingkids_all"):
-            FetchMissingKids().run(number_pages=-1)
-
-        elif ("fetch_missingkids" in process_type):
-            # number_pages encoded in URL
-            try:
-                number_pages = int(process_type.split("_")[-1])
-            except Exception as e:
-                number_pages = -1
-                
-            FetchMissingKids().run(number_pages=number_pages)
-
-        elif ("process_" in process_type):
-            # Batch size encoded in URL
-            try:
-                batch_size = int(process_type.split("_")[-1])
-            except Exception as e:
-                batch_size = None
-
-            # Task type
-            if ("process_raw_urls" in process_type):
-                DB_Handler().process_raw_urls(batch_size=batch_size)
-            elif ("process_error_urls" in process_type):
-                DB_Handler().process_error_urls(batch_size=batch_size)
-            elif ("process_missing_kids_urls" in process_type):
-                DB_Handler().process_missing_kids_urls(batch_size=batch_size)
-
-        elif ("clean_old_url_content" in process_type ):
-            # Older than X days encoded in URL
-            try:
-                older_than_days = float(process_type.split("_")[-1])
-            except Exception as e:
-                older_than_days = None
-
-            DB_Handler().clean_old_url_content(older_than_days=older_than_days)
-        
-        elif ("publish" in process_type):
-            # Extract URL ID
-            url_id = process_type.split("_")[-1]
-            # Publish
-            Publisher().publish(url_id)
-            
-        else:
-            logger.info("Task unknown!: {}".format(process_type))
-
-        logger.info("Task completed: {}".format(process_type))
-    except Exception as e:
-        logger.error(e)
+@shared_task(queue='light')
+def notify_status():
+    task = "Notify status"
+    logger.info("Task triggered: {}".format(task))
+    notify_telegram()
+    logger.info("Task completed: {}".format(task))
--- a/app_urls/fetcher/templates/filtered_urls.html
+++ b/app_urls/fetcher/templates/filtered_urls.html
@@ -369,7 +369,7 @@ input[type="checkbox"] {
                <tbody>
                    {% for url in urls %}
                        <tr>
-                            <td><a href="./{{ url.id }}" class="btn btn-primary btn-sm" target="_blank">{{ url.id }}</a></td>
+                            <td><a href="./{{ url.id }}" class="btn btn-primary btn-sm" target="_blank">{{ url.id }}</a> <a href="/task/publish_{{ url.id }}" target="_blank">[✍️]</a> </td>
                            <td><a href="{{ url.url }}" target="_blank">{{ url.url }}</a></td>
                            <td>
                                {% if url.status == 'raw' %}
--- a/app_urls/fetcher/templates/url_detail.html
+++ b/app_urls/fetcher/templates/url_detail.html
@@ -278,8 +278,7 @@
        
        <!-- Input field with a default value -->        
        <label for="custom-input-{{ url_item.id }}">Prompt:</label>
-        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="5">{{ prompt }}
-{{ url_item.url }}</textarea>
+        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="5">{{ prompt }}</textarea>
        
        <div class="d-flex align-items-center">
            <!-- Fetch details button -->
--- a/app_urls/fetcher/urls.py
+++ b/app_urls/fetcher/urls.py
@@ -7,8 +7,6 @@ urlpatterns = [
    path('logs/database', views.log_db, name='log_db'),
    path('logs/<str:log_type>', views.logs, name='logs'),
    #
-    path('task/<str:task>', views.trigger_task, name='trigger_task'),
-    #
    path('urls/charts/', views.charts, name='charts'),
    path('urls-by-fetch-date/', views.urls_by_fetch_date, name='urls_by_fetch_date'),
    path('urls-per-status/', views.urls_per_status, name='urls_per_status'),
--- a/app_urls/fetcher/views.py
+++ b/app_urls/fetcher/views.py
@@ -1,8 +1,8 @@
-from .views_base import link_list, logs, log_db, trigger_task
+from .views_base import link_list, logs, log_db #, trigger_task, 

 from django.core.paginator import Paginator
 from django.shortcuts import render, get_object_or_404
-from django.http import StreamingHttpResponse, JsonResponse, HttpResponse
+from django.http import StreamingHttpResponse, JsonResponse
 from django.db.models import Q, Count
 from django.utils import timezone
 from django.utils.timezone import now, timedelta
@@ -14,16 +14,6 @@ import json
 ####################################################################################################

 def llm(request):
-
-    def stream_response(model, text):
-        msg_content = {
-            "role": "user", 
-            "content": text,
-        }
-        response = OllamaClient().client.chat(model=model, messages=[msg_content], stream=True)
-        for chunk in response:
-            yield chunk["message"]["content"]  # Stream each chunk of text
-
    if request.method == 'POST':
        try:
            body_data = json.loads(request.body)
@@ -33,7 +23,7 @@ def llm(request):
            if message is None:
                return JsonResponse({'error': 'No message found in request'}, status=400)
            
-            return StreamingHttpResponse(stream_response(model, message), content_type="text/plain")
+            return StreamingHttpResponse(OllamaClient().generate_stream(model, message), content_type="text/plain")
        except json.JSONDecodeError:
            return JsonResponse({'error': 'Invalid JSON'}, status=400)

@@ -55,13 +45,18 @@ def url_detail_view(request, id):
        url_content = {}
    
    ollama = OllamaClient()
+    try:
+        # prompt_content = "{}\n{}\n{}".format(url_content.title, url_content.description, url_content.content)
+        prompt_content = "{}".format(url_content.content)
+    except Exception as e:
+        prompt_content = ""

    context = {
        'url_item': url_item,
        'sources': url_sources,
        'searches': url_searches,
        'models': ollama.get_models(),
-        'prompt': ollama.get_prompt(),
+        'prompt': ollama.get_prompt(prompt_content),
        'url_content': url_content,
        'url_canonical': url_canonical,
    }
--- a/app_urls/fetcher/views_base.py
+++ b/app_urls/fetcher/views_base.py
@@ -1,26 +1,25 @@
 import os
-from .tasks import background_task
 from django.http import JsonResponse, HttpResponse
 from django.db import connection
+import os

 ####################################################################################################
+"""
+### from .tasks import background_task
+
 def trigger_task(request, task):
    # Enqueue function in "default" queue
    background_task.delay(task)  
    return JsonResponse({"message": "Task has been enqueued!", "task": task})
+"""

-####################################################################################################
 def link_list(request):
    # Base URL path
    app_url = request.build_absolute_uri()
-    # Tasks
-    links_fetch = ["fetch_feeds", "fetch_parser", "fetch_search", "fetch_missingkids_5", "fetch_missingkids_all"]
-    links_process = ["process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all", "clean_old_url_content_60"]
    # List of links
    list_links = \
        [ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
-        [ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning"] ] + \
-        [ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
+        [ os.path.join(app_url, "logs", log_type) for log_type in ["database", "debug", "info", "warning", "server", "beat", "worker_light", "worker_default", "worker_heavy"] ]

    # Links tuple
    links = [(l, l) for l in list_links]
@@ -32,6 +31,7 @@ def link_list(request):

    return HttpResponse(html)

+
 ####################################################################################################
 def logs(request, log_type):
    # Capture output: python manage.py rqstats
@@ -68,4 +68,4 @@ def log_db(request):
        """).fetchall()
    return HttpResponse( "\n".join([str(e) for e in r]) )

-####################################################################################################
+####################################################################################################
--- a/app_urls/init_data.json
+++ b/app_urls/init_data.json
@@ -17,18 +17,21 @@
            "cnbc.com"
        ],
        "keyword_search": [
-            "child abuse"
+            "child abuse",
+            "child neglect"
        ]
    },
    "REGEX_PATTERN_STATUS_PRIORITY": [
        [".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50],
+        ["https:\\/\\/x.com\\/.*", "invalid", 50],
        [".*cnbc\\.com\\/(video|quotes)\\/.*", "invalid", 75],
-        [".*foxnews\\.com\\/(video|category)\\/.*", "invalid", 75],
-        [".*radio.foxnews\\.com\\/.*", "invalid", 75],
        [".*breitbart\\.com\\/(tag|author)\\/.*", "invalid", 75],
-        [".*zerohedge\\.com\\/(user)\\/.*", "invalid", 75],
-        [".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50],
        [".*breitbart\\.com\\/(economy|entertainment|border|crime|clips)\\/.*", "valid", 50],
-        [".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50]
+        [".*zerohedge\\.com\\/(user|contributors)\\/.*", "invalid", 75],
+        [".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50],
+        [".*radio\\.foxnews\\.com\\/.*", "invalid", 75],
+        [".*foxnews\\.com\\/(video|category|person|books|html-sitemap)\\/.*", "invalid", 75],
+        [".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50],
+        [".*foxnews\\.com\\/[^\\/]+\\/?$", "invalid", 25]
    ]
 }
--- a/app_urls/init_data_fr.json
+++ b/app_urls/init_data_fr.json
@@ -1,65 +0,0 @@
-{
-    "SEARCH": {
-        "rss_feed": [
-        ],
-        "url_host": [
-            "johnpilger.com",
-            "lapenseeecologique.com",
-            "partage-le.com",
-            "reflets.info",
-            "rezo.net",
-            "consortiumnews.com",
-            "disclose.ngo/fr",
-            "energieetenvironnement.com",
-            "global-climat.com",
-            "slashdot.org",
-            "lesamisdebartleby.wordpress.com",
-            "lundi.am",
-            "lvsl.fr",
-            "moderndiplomacy.eu",
-            "mrmondialisation.org",
-            "ourfiniteworld.com",
-            "southfront.org",
-            "simplicius76.substack.com",
-            "smoothiex12.blogspot.com",
-            "theintercept.com",
-            "wikileaks.org",
-            "contretemps.eu",
-            "indianpunchline.com",
-            "investigaction.net/fr",
-            "notechmagazine.com",
-            "terrestres.org",
-            "truthdig.com",
-            "tass.com",
-            "bastamag.net",
-            "counterpunch.org",
-            "energy-daily.com",
-            "fakirpresse.info",
-            "geopoliticalmonitor.com",
-            "huffingtonpost.fr",
-            "legrandsoir.info",
-            "les-crises.fr",
-            "liberation.fr",
-            "maitre-eolas.fr",
-            "marianne.net",
-            "mediapart.fr",
-            "metaefficient.com",
-            "monde-diplomatique.fr",
-            "paulcraigroberts.org",
-            "politis.fr",
-            "reporterre.net",
-            "rue89.com",
-            "theguardian.com/international",
-            "treehugger.com",
-            "unz.com",
-            "voltairenet.org",
-            "wsws.org"
-        ],  
-        "keyword_search": [
-            "society collapse"
-        ]
-    },
-    "REGEX_PATTERN_STATUS_PRIORITY": [
-        [".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50]
-    ]
-}
--- a/app_urls/init_data_sca.json
+++ b/app_urls/init_data_sca.json
@@ -1,34 +0,0 @@
-{
-    "SEARCH": {
-        "rss_feed": [
-            "https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC",
-            "https://feeds.feedburner.com/breitbart",
-            "https://feeds.feedburner.com/zerohedge/feed",
-            "https://moxie.foxnews.com/google-publisher/latest.xml",
-            "https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15837362",
-            "https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100727362"
-        ],
-        "url_host": [
-            "missingkids.org/poster",
-            "missingkids.org/new-poster",
-            "breitbart.com",
-            "zerohedge.com",
-            "foxnews.com",
-            "cnbc.com"
-        ],
-        "keyword_search": [
-            "child abuse"
-        ]
-    },
-    "REGEX_PATTERN_STATUS_PRIORITY": [
-        [".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50],
-        [".*cnbc\\.com\\/(video|quotes)\\/.*", "invalid", 75],
-        [".*foxnews\\.com\\/(video|category)\\/.*", "invalid", 75],
-        [".*radio.foxnews\\.com\\/.*", "invalid", 75],
-        [".*breitbart\\.com\\/(tag|author)\\/.*", "invalid", 75],
-        [".*zerohedge\\.com\\/(user)\\/.*", "invalid", 75],
-        [".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50],
-        [".*breitbart\\.com\\/(economy|entertainment|border|crime|clips)\\/.*", "valid", 50],
-        [".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50]
-    ]
-}
--- a/app_urls/init_db.py
+++ b/app_urls/init_db.py
@@ -29,13 +29,15 @@ def wait_connection():
                    connected = True

        except psycopg.OperationalError as e:
+            print(str(e))
            # Connection not ready...
            # print(".", end="")
-            time.sleep(2)
+            time.sleep(15)
        except Exception as e:
+            print(str(e))
            # Connection not ready...
            # print("e", end="")
-            time.sleep(2)
+            time.sleep(15)

    print("DB connection ready")

@@ -57,7 +59,8 @@ def initialize_tables():
                            ts_fetch TIMESTAMPTZ NOT NULL DEFAULT NOW(),
                            status URL_STATUS NOT NULL DEFAULT 'raw' -- ,
                            -- status_wendy WENDY_STATUS DEFAULT NULL,
-                            -- ts_wendy TIMESTAMPTZ DEFAULT NULL
+                            -- ts_wendy TIMESTAMPTZ DEFAULT NULL,
+                            -- child_abuse BOOLEAN DEFAULT NULL,
                        );
                        CREATE INDEX idx_urls_status ON urls(status);
                        CREATE INDEX idx_urls_ts_fetch ON urls(ts_fetch);
@@ -212,6 +215,10 @@ def initialize_data():
                print(query)
                cur.execute(query)

+    # Connect to an existing database
+    with psycopg.connect(connection_info) as conn:
+        # Open a cursor to perform database operations
+        with conn.cursor() as cur:
            # Feeds, URL host, keyword search
            for search_type, list_searches in data_json.get("SEARCH", {}).items():
                for search in list_searches:
--- a/app_urls/initialize.sh
+++ b/app_urls/initialize.sh
@@ -6,7 +6,10 @@ else
    echo "Initializating database"
    python init_db.py --initialize_tables --initialize_data
    python manage.py makemigrations fetcher; python manage.py migrate --fake-initial
+    python manage.py migrate django_celery_beat
    python manage.py createsuperuser --noinput
    python manage.py collectstatic --no-input
-    python manage.py import --filename scheduled_tasks.json
+    python manage.py loaddata scheduled_tasks.json
+    #
+    # python manage.py inspectdb # Debugging model
 fi
--- a/app_urls/requirements.txt
+++ b/app_urls/requirements.txt
@@ -1,5 +1,5 @@
 django==5.1
-django-tasks-scheduler==3.0.1
+django-celery-beat
 django-redis
 psycopg[binary]
 gunicorn
@@ -13,8 +13,9 @@ lxml[html_clean]
 googlenewsdecoder
 gnews
 GoogleNews
-duckduckgo_search
+ddgs
 git+https://github.com/tasos-py/Search-Engines-Scraper.git
+furl
 langdetect
 ollama
-PyJWT
+PyJWT
--- a/app_urls/run.sh
+++ b/app_urls/run.sh
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-if [ "${DJANGO_DEBUG}" = true ] | [ "${DJANGO_DEBUG}" == "True" ]; then
-    echo "Running in DEBUG mode"
-    gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
-else
-    gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 600 & python manage.py rqworker high default low
-fi
--- a/app_urls/scheduled_tasks.json
+++ b/app_urls/scheduled_tasks.json
@@ -1,212 +1,507 @@
 [
  {
-    "model": "RepeatableTaskType",
-    "name": "Process error URLs",
-    "callable": "fetcher.tasks.process_error_urls",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "low",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 1800,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 4,
-    "interval_unit": "hours",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 1,
+    "fields": {
+      "name": "celery.backend_cleanup",
+      "task": "celery.backend_cleanup",
+      "interval": null,
+      "crontab": 1,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": 43200,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:07:34.609Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Process raw URLs",
-    "callable": "fetcher.tasks.process_raw_urls",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "low",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 1800,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 10,
-    "interval_unit": "minutes",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 2,
+    "fields": {
+      "name": "Process error URLs",
+      "task": "fetcher.tasks.process_error_urls",
+      "interval": 1,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:10:08.861Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Process MissingKids URLs",
-    "callable": "fetcher.tasks.process_missing_kids_urls",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 1800,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 4,
-    "interval_unit": "hours",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 3,
+    "fields": {
+      "name": "Process raw URLs",
+      "task": "fetcher.tasks.process_raw_urls",
+      "interval": 2,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": "2025-07-17T16:20:36.751Z",
+      "total_run_count": 1,
+      "date_changed": "2025-07-17T16:21:17.099Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Process MissingKids URLs ALL",
-    "callable": "fetcher.tasks.process_missing_kids_urls_all",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 7200,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 1,
-    "interval_unit": "weeks",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 4,
+    "fields": {
+      "name": "Process MissingKids URLs - batch=50",
+      "task": "fetcher.tasks.process_missing_kids_urls",
+      "interval": 3,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{\"batch_size\": 50}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:12:44.533Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Fetch Feeds",
-    "callable": "fetcher.tasks.fetch_feeds",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 1800,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 10,
-    "interval_unit": "minutes",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 5,
+    "fields": {
+      "name": "Process MissingKids URLs ALL - unknown",
+      "task": "fetcher.tasks.process_missing_kids_urls",
+      "interval": 4,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{\"process_status_only\": \"unknown\"}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:16:38.258Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Fetch Parser",
-    "callable": "fetcher.tasks.fetch_parser",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 3600,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 1,
-    "interval_unit": "hours",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 6,
+    "fields": {
+      "name": "Process MissingKids URLs ALL - valid",
+      "task": "fetcher.tasks.process_missing_kids_urls",
+      "interval": 5,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{\"process_status_only\": \"valid\"}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:20:19.969Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Fetch Search",
-    "callable": "fetcher.tasks.fetch_search",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 3600,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 1,
-    "interval_unit": "hours",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 7,
+    "fields": {
+      "name": "Process MissingKids URLs ALL - invalid",
+      "task": "fetcher.tasks.process_missing_kids_urls",
+      "interval": 6,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{\"process_status_only\": \"invalid\"}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:21:30.809Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Fetch MissingKids",
-    "callable": "fetcher.tasks.fetch_missing_kids",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 1800,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 4,
-    "interval_unit": "hours",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 8,
+    "fields": {
+      "name": "Fetch Feeds",
+      "task": "fetcher.tasks.fetch_feeds",
+      "interval": 2,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:22:15.615Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Fetch MissingKids ALL",
-    "callable": "fetcher.tasks.fetch_missing_kids_all",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": 7200,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 1,
-    "interval_unit": "weeks",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 9,
+    "fields": {
+      "name": "Fetch Parser",
+      "task": "fetcher.tasks.fetch_parser",
+      "interval": 7,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:22:40.215Z",
+      "description": ""
+    }
  },
  {
-    "model": "RepeatableTaskType",
-    "name": "Clean old URL content",
-    "callable": "fetcher.tasks.clean_old_url_content",
-    "callable_args": [],
-    "callable_kwargs": [],
-    "enabled": false,
-    "queue": "default",
-    "repeat": null,
-    "at_front": false,
-    "timeout": null,
-    "result_ttl": 86400,
-    "cron_string": null,
-    "scheduled_time": "2025-01-01T00:00:00+00:00",
-    "interval": 1,
-    "interval_unit": "weeks",
-    "successful_runs": 0,
-    "failed_runs": 0,
-    "last_successful_run": null,
-    "last_failed_run": null
+    "model": "django_celery_beat.periodictask",
+    "pk": 10,
+    "fields": {
+      "name": "Fetch Search",
+      "task": "fetcher.tasks.fetch_search",
+      "interval": 8,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:23:00.329Z",
+      "description": ""
+    }
+  },
+  {
+    "model": "django_celery_beat.periodictask",
+    "pk": 11,
+    "fields": {
+      "name": "Fetch Selenium Search",
+      "task": "fetcher.tasks.fetch_selenium_search",
+      "interval": 3,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:24:08.315Z",
+      "description": ""
+    }
+  },
+  {
+    "model": "django_celery_beat.periodictask",
+    "pk": 12,
+    "fields": {
+      "name": "Fetch MissingKids - pages=5",
+      "task": "fetcher.tasks.fetch_missing_kids",
+      "interval": 4,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{\"number_pages\": 5}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:25:02.494Z",
+      "description": ""
+    }
+  },
+  {
+    "model": "django_celery_beat.periodictask",
+    "pk": 13,
+    "fields": {
+      "name": "Fetch MissingKids - ALL",
+      "task": "fetcher.tasks.fetch_missing_kids",
+      "interval": 9,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{\"number_pages\": -1}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:25:50.597Z",
+      "description": ""
+    }
+  },
+  {
+    "model": "django_celery_beat.periodictask",
+    "pk": 14,
+    "fields": {
+      "name": "Clean old URL content",
+      "task": "fetcher.tasks.clean_old_url_content",
+      "interval": 9,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:26:16.272Z",
+      "description": ""
+    }
+  },
+  {
+    "model": "django_celery_beat.periodictask",
+    "pk": 4,
+    "fields": {
+      "name": "Notify status",
+      "task": "fetcher.tasks.notify_status",
+      "interval": 4,
+      "crontab": null,
+      "solar": null,
+      "clocked": null,
+      "args": "[]",
+      "kwargs": "{}",
+      "queue": null,
+      "exchange": null,
+      "routing_key": null,
+      "headers": "{}",
+      "priority": null,
+      "expires": null,
+      "expire_seconds": null,
+      "one_off": false,
+      "start_time": null,
+      "enabled": true,
+      "last_run_at": null,
+      "total_run_count": 0,
+      "date_changed": "2025-07-17T16:12:44.533Z",
+      "description": ""
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 1,
+    "fields": {
+      "every": 6,
+      "period": "hours"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 2,
+    "fields": {
+      "every": 10,
+      "period": "minutes"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 3,
+    "fields": {
+      "every": 1,
+      "period": "days"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 4,
+    "fields": {
+      "every": 12,
+      "period": "hours"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 5,
+    "fields": {
+      "every": 2,
+      "period": "days"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 6,
+    "fields": {
+      "every": 28,
+      "period": "days"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 7,
+    "fields": {
+      "every": 8,
+      "period": "hours"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 8,
+    "fields": {
+      "every": 4,
+      "period": "hours"
+    }
+  },
+  {
+    "model": "django_celery_beat.intervalschedule",
+    "pk": 9,
+    "fields": {
+      "every": 7,
+      "period": "days"
+    }
+  },
+  {
+    "model": "django_celery_beat.crontabschedule",
+    "pk": 1,
+    "fields": {
+      "minute": "0",
+      "hour": "4",
+      "day_of_month": "*",
+      "month_of_year": "*",
+      "day_of_week": "*",
+      "timezone": "UTC"
+    }
  }
-]
+  ]
+  
--- a/app_urls/supervisord.conf
+++ b/app_urls/supervisord.conf
@@ -0,0 +1,67 @@
+[supervisord]
+nodaemon=true
+
+[program:server]
+command=gunicorn core.wsgi:application --bind 0.0.0.0:8000
+directory=/opt/app
+autostart=true
+autorestart=true
+; Unified log file
+stdout_logfile=/opt/logs/server.log
+stderr_logfile=/opt/logs/server.log
+redirect_stderr=true
+; Rotate when file reaches max size
+stdout_logfile_maxbytes=20MB
+stdout_logfile_backups=1
+
+[program:beat]
+command=celery -A core beat -l info --scheduler django_celery_beat.schedulers:DatabaseScheduler --logfile=/opt/logs/beat.log
+directory=/opt/app
+autostart=true
+autorestart=true
+; Unified log file
+stdout_logfile=/opt/logs/beat.log
+stderr_logfile=/opt/logs/beat.log
+redirect_stderr=true
+; Rotate when file reaches max size
+stdout_logfile_maxbytes=20MB
+stdout_logfile_backups=1
+
+[program:worker_default]
+command=celery -A core worker -l info --logfile=/opt/logs/worker_default.log --concurrency=1 -Q default -n default
+directory=/opt/app
+autostart=true
+autorestart=true
+; Unified log file
+stdout_logfile=/opt/logs/worker_default.log
+stderr_logfile=/opt/logs/worker_default.log
+redirect_stderr=true
+; Rotate when file reaches max size
+stdout_logfile_maxbytes=20MB
+stdout_logfile_backups=1
+
+[program:worker_light]
+command=celery -A core worker -l info --logfile=/opt/logs/worker_light.log --concurrency=1 -Q light -n light
+directory=/opt/app
+autostart=true
+autorestart=true
+; Unified log file
+stdout_logfile=/opt/logs/worker_light.log
+stderr_logfile=/opt/logs/worker_light.log
+redirect_stderr=true
+; Rotate when file reaches max size
+stdout_logfile_maxbytes=20MB
+stdout_logfile_backups=1
+
+[program:worker_heavy]
+command=celery -A core worker -l info --logfile=/opt/logs/worker_heavy.log --concurrency=1 -Q heavy -n heavy
+directory=/opt/app
+autostart=true
+autorestart=true
+; Unified log file
+stdout_logfile=/opt/logs/worker_heavy.log
+stderr_logfile=/opt/logs/worker_heavy.log
+redirect_stderr=true
+; Rotate when file reaches max size
+stdout_logfile_maxbytes=20MB
+stdout_logfile_backups=1
--- a/docker-compose-base.yml
+++ b/docker-compose-base.yml
@@ -0,0 +1,112 @@
+services:
+
+  fetcher_app_selenium:
+    image: fetcher_app_selenium
+    build:
+      context: ./app_selenium
+      args:
+        - ARCH=${ARCH} # arm64, amd64
+    container_name: fetcher_app_selenium
+    restart: unless-stopped
+    shm_size: 512mb
+    init: true  # For zombie processes
+    environment:
+      - SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
+      - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
+    ports:
+      - 80
+    dns:
+      - 1.1.1.1
+      - 1.0.0.1
+
+  fetcher_app_urls:
+    image: fetcher_app_urls
+    build:
+      context: ./app_urls
+    container_name: fetcher_app_urls
+    restart: unless-stopped
+    environment:
+      # Initialization
+      - INITIALIZE_DB=${INITIALIZE_DB}  # Related to DB persistence
+      - DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
+      - DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
+      - DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
+      # Django
+      - DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
+      - DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
+      - DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
+      - DJANGO_DEBUG=${DJANGO_DEBUG}
+      - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
+      # Database
+      - DB_NAME=${DB_NAME}
+      - DB_USER=${DB_USER}
+      - DB_PASSWORD=${DB_PASSWORD}
+      - DB_HOST=${DB_HOST}
+      - DB_PORT=${DB_PORT}
+      - REDIS_CACHE_HOST=${REDIS_CACHE_HOST}
+      - REDIS_CACHE_PORT=${REDIS_CACHE_PORT}
+      - REDIS_CELERY_HOST=${REDIS_CELERY_HOST}
+      - REDIS_CELERY_PORT=${REDIS_CELERY_PORT}
+      # Job timeout: 30 min
+      - JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
+      # Fetcher
+      - FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
+      - FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
+      - FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP}  # Sleep time between each search
+      - FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP}  # Sleep time between requests to same URL host
+      - FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR}  # Min amonut of characters to run language detection
+      - FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
+      - FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
+      # Selenium
+      - SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
+      - ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
+      # Ghost
+      - GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
+      - GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
+      - PEXELS_API_KEY=${PEXELS_API_KEY}
+      - OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
+      # Telegram
+      - TELEGRAM_INFO_BOT_TOKEN=${TELEGRAM_INFO_BOT_TOKEN}
+      - TELEGRAM_INFO_CHAT_ID=${TELEGRAM_INFO_CHAT_ID}
+      - TELEGRAM_WARNING_BOT_TOKEN=${TELEGRAM_WARNING_BOT_TOKEN}
+      - TELEGRAM_WARNING_CHAT_ID=${TELEGRAM_WARNING_CHAT_ID}
+    ########################
+    ports:
+      - 8000
+    depends_on:
+      - fetcher_db
+      - fetcher_redis_cache
+      - fetcher_redis_celery
+      - fetcher_app_selenium
+    dns:
+      - 1.1.1.1
+      - 1.0.0.1
+
+  fetcher_redis_cache:
+    image: redis:alpine
+    container_name: fetcher_redis_cache
+    restart: unless-stopped
+    ports:
+      - 6379
+  
+  fetcher_redis_celery:
+    image: redis:alpine
+    container_name: fetcher_redis_celery
+    restart: unless-stopped
+    ports:
+      - 6379
+
+  fetcher_db:
+    container_name: fetcher_db
+    restart: unless-stopped
+
+  fetcher_flower:
+    image: mher/flower
+    container_name: fetcher_flower
+    ports:
+      - 5555
+    environment:
+      - CELERY_BROKER_URL=redis://fetcher_redis_celery:6379/0
+      - FLOWER_UNAUTHENTICATED_API=true
+    depends_on:
+      - fetcher_redis_celery
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -1,24 +1,9 @@
-version: '3.9'
-
 services:

  fetcher_app_selenium:
-    image: fetcher_app_selenium
-    build:
-      context: ./app_selenium
-      args:
-        - ARCH=${ARCH} # arm64, amd64
-    container_name: fetcher_app_selenium
-    restart: unless-stopped
-    shm_size: 512mb
-    environment:
-      - SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
-      - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
-    ports:
-      - 80:80
-    dns:
-      - 1.1.1.1
-      - 1.0.0.1
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_app_selenium
    deploy:
      resources:
        limits:
@@ -26,65 +11,11 @@ services:
          memory: ${DEPLOY_RAM}

  fetcher_app_urls:
-    image: fetcher_app_urls
-    build:
-      context: ./app_urls
-    container_name: fetcher_app_urls
-    restart: unless-stopped
-    environment:
-      # Initialization
-      - INITIALIZE_DB=${INITIALIZE_DB}  # Related to DB persistence
-      - DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
-      - DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
-      - DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
-      # Django
-      - DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
-      - DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
-      - DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
-      - DJANGO_DEBUG=${DJANGO_DEBUG}
-      - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
-      # Database
-      - DB_NAME=${DB_NAME}
-      - DB_USER=${DB_USER}
-      - DB_PASSWORD=${DB_PASSWORD}
-      - DB_HOST=${DB_HOST}
-      - DB_PORT=${DB_PORT}
-      - REDIS_HOST=${REDIS_HOST}
-      - REDIS_PORT=${REDIS_PORT}
-      # Job timeout: 30 min
-      - JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
-      # Fetcher
-      - FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
-      - FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
-      - FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP}  # Sleep time between each search
-      - FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP}  # Sleep time between requests to same URL host
-      - FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR}  # Min amonut of characters to run language detection
-      - FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
-      - FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
-      # Selenium
-      - SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
-      - ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
-      # Ghost
-      - GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
-      - GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
-      - PEXELS_API_KEY=${PEXELS_API_KEY}
-    ########################
-    volumes:   # Development mode
-      - ./app_urls:/opt/app
-    ########################
-    ports:
-      - 8000:8000
-    depends_on:
-      - fetcher_db
-      - fetcher_redis
-    dns:
-      - 1.1.1.1
-      - 1.0.0.1
-    deploy:
-      resources:
-        limits:
-          cpus: '${DEPLOY_CPUS}'
-          memory: ${DEPLOY_RAM}
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_app_urls
+      #env_files:
+      #  - .env.dev
    #labels:  # Reverse proxy sample
    #  - "traefik.enable=true"
    #  - "traefik.http.routers.fetcher.rule=Host(`urls.yourdomain.com`)"
@@ -94,11 +25,21 @@ services:
    #networks:
    #  - default # This network
    #  - docker_default  # Reverse proxy network
+    ports:
+      - 8005:8000
+    ## volumes:   # Development mode
+    ##   - ./app_urls:/opt/app
+    deploy:
+      resources:
+        limits:
+          cpus: '${DEPLOY_CPUS}'
+          memory: ${DEPLOY_RAM}

  fetcher_db:
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_db
    image: postgres:17
-    container_name: fetcher_db
-    restart: unless-stopped
    # Set shared memory limit when using docker-compose
    shm_size: 128mb
    environment:
@@ -106,18 +47,28 @@ services:
      POSTGRES_PASSWORD: ${DB_PASSWORD}
      POSTGRES_USER: ${DB_USER}
      POSTGRES_INITDB_ARGS: '--data-checksums'
-    #volumes:   # Persistent DB?
-    #  - ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
    ports:
      - 5432 #:5432
+    #volumes:   # Persistent DB?
+    #  - ./postgres:/var/lib/postgresql/data

-  fetcher_redis:
-    image: redis:alpine
-    container_name: fetcher_redis
-    restart: unless-stopped
+  fetcher_redis_cache:
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_redis_cache
    ports:
-      - 6379 #:6379
+      - 6379

-#networks:
-#  docker_default:
-#    external: true
+  fetcher_redis_celery:
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_redis_celery
+    ports:
+      - 6379
+
+  fetcher_flower:
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_flower
+    ports:
+      - 5555:5555
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,24 +1,9 @@
-version: '3.9'
-
 services:

  fetcher_app_selenium:
-    image: fetcher_app_selenium
-    build:
-      context: ./app_selenium
-      args:
-        - ARCH=${ARCH} # arm64, amd64
-    container_name: fetcher_app_selenium
-    restart: unless-stopped
-    shm_size: 512mb
-    environment:
-      - SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
-      - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
-    ports:
-      - 80
-    dns:
-      - 1.1.1.1
-      - 1.0.0.1
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_app_selenium
    deploy:
      resources:
        limits:
@@ -26,98 +11,59 @@ services:
          memory: ${DEPLOY_RAM}

  fetcher_app_urls:
-    image: fetcher_app_urls
-    build:
-      context: ./app_urls
-    container_name: fetcher_app_urls
-    restart: unless-stopped
-    environment:
-      # Initialization
-      - INITIALIZE_DB=${INITIALIZE_DB}  # Related to DB persistence
-      - DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
-      - DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
-      - DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
-      # Django
-      - DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
-      - DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
-      - DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
-      - DJANGO_DEBUG=${DJANGO_DEBUG}
-      - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
-      # Database
-      - DB_NAME=${DB_NAME}
-      - DB_USER=${DB_USER}
-      - DB_PASSWORD=${DB_PASSWORD}
-      - DB_HOST=${DB_HOST}
-      - DB_PORT=${DB_PORT}
-      - REDIS_HOST=${REDIS_HOST}
-      - REDIS_PORT=${REDIS_PORT}
-      # Job timeout: 30 min
-      - JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
-      # Fetcher
-      - FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
-      - FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
-      - FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP}  # Sleep time between each search
-      - FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP}  # Sleep time between requests to same URL host
-      - FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR}  # Min amonut of characters to run language detection
-      - FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
-      - FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
-      # Selenium
-      - SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
-      - ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
-      # Ghost
-      - GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
-      - GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
-      - PEXELS_API_KEY=${PEXELS_API_KEY}
-    ########################
-    #volumes:   # Development mode
-    #  - ./app_urls:/opt/app
-    ########################
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_app_urls
    ports:
-      - 8000 # :8000
-    depends_on:
-      - fetcher_db
-      - fetcher_redis
-    dns:
-      - 1.1.1.1
-      - 1.0.0.1
+      - 8000:8000
    deploy:
      resources:
        limits:
          cpus: '${DEPLOY_CPUS}'
          memory: ${DEPLOY_RAM}
-    labels:  # Reverse proxy sample
-      - "traefik.enable=true"
-      - "traefik.http.routers.fetcher.rule=Host(`${REVERSE_PROXY_URL}`)"
-      - "traefik.http.routers.fetcher.entrypoints=websecure"
-      - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
-      - "traefik.http.services.fetcher.loadbalancer.server.port=8000"
-    networks:
-      - default # This network
-      - docker_default  # Reverse proxy network

  fetcher_db:
-    image: postgres:17
-    container_name: fetcher_db
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_db
+    image: alpine:latest
    restart: unless-stopped
-    # Set shared memory limit when using docker-compose
-    shm_size: 128mb
-    environment:
-      POSTGRES_DB: ${DB_NAME}
-      POSTGRES_PASSWORD: ${DB_PASSWORD}
-      POSTGRES_USER: ${DB_USER}
-      POSTGRES_INITDB_ARGS: '--data-checksums'
-    volumes:   # Persistent DB?
-      - ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
+    deploy:
+      resources:
+        limits:
+          memory: 256M
+    volumes:
+      # REQUIREMENT: Add fetcher's SSH public key into the DB's .ssh/authorized_keys machine
+      - ~/.ssh:/root/.ssh:ro
    ports:
-      - 5432 #:5432
+      - 15885:15885
+      - 5432:5432
+    command:
+      - sh
+      - -c
+      - |
+        apk add --update openssh autossh
+        # Monitor status on port 15885
+        autossh -M 15885 -N -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
+        # autossh -M 15885 -N -o 'GatewayPorts yes' -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}

-  fetcher_redis:
-    image: redis:alpine
-    container_name: fetcher_redis
-    restart: unless-stopped
+  fetcher_redis_cache:
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_redis_cache
    ports:
-      - 6379 #:6379
+      - 6379

-networks:
-  docker_default:
-    external: true
+  fetcher_redis_celery:
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_redis_celery
+    ports:
+      - 6379
+
+  fetcher_flower:
+    extends:
+      file: docker-compose-base.yml
+      service: fetcher_flower
+    ports:
+      - 5555:5555
--- a/utils/DB-Dev.ipynb
+++ b/utils/DB-Dev.ipynb
@@ -0,0 +1,79 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install python-dotenv\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "# Specify the path to your .env file (optional if in the current dir)\n",
+    "load_dotenv(dotenv_path=\".env\", override=True)\n",
+    "\n",
+    "import os\n",
+    "import psycopg\n",
+    "from sshtunnel import SSHTunnelForwarder\n",
+    "\n",
+    "if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
+    "    print(\"SSH tunnel: True\")\n",
+    "else:\n",
+    "    print(\"SSH tunnel: False\")\n",
+    "\n",
+    "connect_info = \"host={} port={} user={} password={} dbname={}\".format(os.environ.get(\"DB_HOST\"), os.environ.get(\"DB_PORT\"), os.environ.get(\"DB_USER\"), os.environ.get(\"DB_PASSWORD\"), os.environ.get(\"DB_NAME\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
+    "    ssh_tunnel = SSHTunnelForwarder(\n",
+    "        (os.environ.get(\"REMOTE_HOST\"), int(os.environ.get(\"REMOTE_SSH_PORT\"))), \n",
+    "        ssh_username=os.environ.get(\"REMOTE_USERNAME\"), ssh_password=os.environ.get(\"REMOTE_PASSWORD\"), \n",
+    "        remote_bind_address=('localhost', int(os.environ.get(\"REMOTE_PORT\"))), local_bind_address=('localhost', int(os.environ.get(\"DB_PORT\"))) \n",
+    "    )\n",
+    "    ssh_tunnel.start()\n",
+    "\n",
+    "try:\n",
+    "    with psycopg.connect(connect_info) as conn:\n",
+    "        if True:\n",
+    "            for t in conn.execute(\"\"\"\n",
+    "                        SELECT * from URLS WHERE id IN (SELECT id_url FROM URLS_SOURCE_SEARCH INNER JOIN SEARCH ON URLS_SOURCE_SEARCH.id_search = SEARCH.id WHERE SEARCH.search LIKE '%child abuse%') LIMIT 5;\n",
+    "                    \"\"\").fetchall():\n",
+    "                print(t)\n",
+    "            \n",
+    "except Exception as e:\n",
+    "    print(\"Err:\", str(e))\n",
+    "\n",
+    "if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
+    "    ssh_tunnel.stop()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "matitos_urls",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/utils/Ghost-Posts.ipynb
+++ b/utils/Ghost-Posts.ipynb
@@ -0,0 +1,164 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "import jwt\n",
+    "import requests\n",
+    "from datetime import datetime, timedelta, timezone\n",
+    "\n",
+    "admin_api_url = \"\" # .env -> GHOST_ADMIN_API_URL\n",
+    "admin_api_key = \"\" # .env -> GHOST_ADMIN_API_KEY\n",
+    "\n",
+    "def _create_jwt(admin_api_key):\n",
+    "    id_, secret = admin_api_key.split(':')\n",
+    "    iat = int(time.time())\n",
+    "    exp = iat + 5 * 60  # 5 minutes\n",
+    "    header = {'alg': 'HS256', 'kid': id_}\n",
+    "    payload = {\n",
+    "        'iat': iat,\n",
+    "        'exp': exp,\n",
+    "        'aud': '/v5/admin/'  # Adjust depending on your Ghost version\n",
+    "    }\n",
+    "    token = jwt.encode(payload, bytes.fromhex(secret), algorithm='HS256', headers=header)\n",
+    "    return token\n",
+    "\n",
+    "# Get token\n",
+    "jwt_token = _create_jwt(os.getenv(\"GHOST_ADMIN_API_KEY\"))\n",
+    "\n",
+    "headers = {\n",
+    "    'Authorization': f'Ghost {jwt_token}',\n",
+    "    'Content-Type': 'application/json'\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DELETE_ALL_POSTS = False\n",
+    "\n",
+    "if DELETE_ALL_POSTS:\n",
+    "    while (True):\n",
+    "        # GET /admin/posts/\n",
+    "        response = requests.get(os.path.join(admin_api_url, \"posts\"), headers=headers)\n",
+    "        dict_response = response.json()\n",
+    "\n",
+    "        if (len(dict_response.get(\"posts\")) == 0):\n",
+    "            break\n",
+    "\n",
+    "        # Iterate posts\n",
+    "        for p in dict_response.get(\"posts\"):\n",
+    "            # Post ID\n",
+    "            post_id = p.get(\"id\")\n",
+    "\n",
+    "            # DELETE /admin/posts/{id}/\n",
+    "            r = requests.delete(os.path.join(admin_api_url, \"posts\", \"{}\".format(post_id)), headers=headers)\n",
+    "            print(\"Post:\", post_id, \"Status:\", r.status_code, r.text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "PUBLISH_SAMPLE = False\n",
+    "\n",
+    "def _create_ghost_post(jwt_token, admin_api_url, post_data):\n",
+    "    # Get Admin API URL\n",
+    "    admin_api_url = os.getenv(\"GHOST_ADMIN_API_URL\")\n",
+    "\n",
+    "    headers = {\n",
+    "        'Authorization': f'Ghost {jwt_token}',\n",
+    "        'Content-Type': 'application/json'\n",
+    "    }\n",
+    "    \n",
+    "    post_data = {\"posts\": [post_data]}\n",
+    "\n",
+    "    response = requests.post(\n",
+    "        os.path.join(admin_api_url, \"posts\"),\n",
+    "        json=post_data,\n",
+    "        headers=headers,\n",
+    "        params={\"source\":\"html\"}\n",
+    "    )\n",
+    "\n",
+    "    if response.status_code == 201:\n",
+    "        print(\"Ghost post published successfully\")\n",
+    "        return response.json()\n",
+    "    else:\n",
+    "        print(\"Ghost - Failed to publish post: {} {}\".format(response.status_code, response.text))\n",
+    "        return None\n",
+    "\n",
+    "if (PUBLISH_SAMPLE):\n",
+    "    url_id = 150\n",
+    "\n",
+    "    post_data = {\n",
+    "        # \"slug\": \"hey-short\",\n",
+    "        \"title\": \"Hey there, sample title\",\n",
+    "        \"html\": \"<p>Hey there!</p>\",\n",
+    "        # \"feature_image\": photo_url,\n",
+    "        # \"feature_image_caption\": \"\",\n",
+    "        \"status\": \"published\",\n",
+    "        \"tags\": [\"#url-id-{}\".format(url_id)]\n",
+    "    }\n",
+    "\n",
+    "    # Publish post\n",
+    "    payload = _create_ghost_post(jwt_token, admin_api_url, post_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Filter by post title\n",
+    "post_title = \"Funds raised for legal action over failure to stop grooming gangs\"\n",
+    "# Filter by published date\n",
+    "iso_time = (datetime.now(timezone.utc) - timedelta(hours=48)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z'\n",
+    "# Parameter for filter\n",
+    "params = {\"filter\": \"title:'{}'+published_at:>{}\".format(post_title, iso_time)}\n",
+    "\n",
+    "# Filter by URL ID\n",
+    "url_id = 150\n",
+    "# Parameter for filter\n",
+    "params = {\"filter\": \"tags:hash-url-id-{}\".format(url_id)}\n",
+    "\n",
+    "# Get posts using filter\n",
+    "response = requests.get(os.path.join(admin_api_url, \"posts\"), params=params, headers=headers)\n",
+    "dict_response = response.json()\n",
+    "\n",
+    "len(dict_response.get(\"posts\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "matitos_urls",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/utils/Newspapers.ipynb
+++ b/utils/Newspapers.ipynb
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"https://onlinenewspapers.com/index.shtml\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'newspaper/0.9.3.1'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "import newspaper\n",
+    "\n",
+    "newspaper.Config().__dict__\n",
+    "\n",
+    " 'requests_params': {'timeout': 7,\n",
+    "  'proxies': {},\n",
+    "  'headers': {'User-Agent': 'newspaper/0.9.3.1'}},\n",
+    "\"\"\"\n",
+    "import newspaper\n",
+    "newspaper.Config().browser_user_agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "        url (str): The url of the source (news website) to build. For example,\n",
+    "            `https://www.cnn.com`.\n",
+    "        dry (bool): If true, the source object will be constructed but not\n",
+    "            downloaded or parsed.\n",
+    "        only_homepage (bool): If true, the source object will only parse\n",
+    "            the homepage of the source.\n",
+    "        only_in_path (bool): If true, the source object will only\n",
+    "            parse the articles that are in the same path as the source's\n",
+    "            homepage. You can scrape a specific category this way.\n",
+    "            Defaults to False.\n",
+    "        input_html (str): The HTML of the source to parse. Use this to pass cached\n",
+    "            HTML to the source object.\n",
+    "        config (Configuration): A configuration object to use for the source.\n",
+    "        kwargs: Any other keyword arguments to pass to the Source constructor.\n",
+    "            If you omit the config object, you can add any configuration\n",
+    "            options here.\n",
+    "\"\"\"\n",
+    "\n",
+    "url = \"https://www.lanacion.com.ar/deportes/\"\n",
+    "\n",
+    "newspaper_built = newspaper.build(url, only_in_path=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "newspaper_built.__dict__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "newspaper_built.article_urls()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "url = \"https://www.lanacion.com.ar/\"\n",
+    "#url = \"https://www.lanacion.com.ar/deportes/\"\n",
+    "newspaper_built = newspaper.build(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "        url (str): The url of the source (news website) to build. For example,\n",
+    "            `https://www.cnn.com`.\n",
+    "        dry (bool): If true, the source object will be constructed but not\n",
+    "            downloaded or parsed.\n",
+    "        only_homepage (bool): If true, the source object will only parse\n",
+    "            the homepage of the source.\n",
+    "        only_in_path (bool): If true, the source object will only\n",
+    "            parse the articles that are in the same path as the source's\n",
+    "            homepage. You can scrape a specific category this way.\n",
+    "            Defaults to False.\n",
+    "        input_html (str): The HTML of the source to parse. Use this to pass cached\n",
+    "            HTML to the source object.\n",
+    "        config (Configuration): A configuration object to use for the source.\n",
+    "        kwargs: Any other keyword arguments to pass to the Source constructor.\n",
+    "            If you omit the config object, you can add any configuration\n",
+    "            options here.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat = newspaper_built.categories[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "newspaper_built.categories_to_articles()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "newspaper_built.category_urls()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " 'https://www.lanacion.com.ar/tema/futbol-argentino-tid57505/',\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "categories = newspaper_built.category_urls()\n",
+    "url_of_interest = \"https://www.lanacion.com.ar/sabado/todo-para-ellos-nid21042025/\"\n",
+    "\n",
+    "potential_categories = []\n",
+    "\n",
+    "for c in categories:\n",
+    "    if (c in url_of_interest):\n",
+    "        print(c, url_of_interest)\n",
+    "        potential_categories.append(c)\n",
+    "\n",
+    "# Get longest length category"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "newspaper_built.article_urls()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "matitos_urls",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/utils/Schools-NL.ipynb
+++ b/utils/Schools-NL.ipynb
@@ -11,6 +11,8 @@
    "from urllib.parse import urljoin\n",
    "import pandas as pd\n",
    "import os\n",
+    "import json\n",
+    "import csv\n",
    "\n",
    "headers = {\"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"}"
   ]
@@ -68,6 +70,154 @@
    "            # websites.append(href)\n",
    "            return href\n",
    "\n",
+    "def get_num_students_per_zipcode(soup):\n",
+    "    list_zipcode_students_percentage = []\n",
+    "\n",
+    "    h3_tag = soup.find(\"h3\", string=\"In welk postcodegebied wonen de leerlingen van deze school?\")\n",
+    "    if h3_tag:\n",
+    "        dialog = h3_tag.find_parent(\"dialog\")\n",
+    "\n",
+    "        if dialog:\n",
+    "            # print(dialog.prettify())\n",
+    "            table = dialog.find(\"table\")\n",
+    "            if table:\n",
+    "                rows = table.find_all(\"tr\")\n",
+    "                for row in rows:\n",
+    "                    cells = row.find_all([\"th\", \"td\"])\n",
+    "                    row_data = [cell.get_text(strip=True) for cell in cells]\n",
+    "                    zipcode, num_students, percentage = row_data\n",
+    "                    list_zipcode_students_percentage.append( (zipcode, num_students, percentage) )\n",
+    "    \n",
+    "    return list_zipcode_students_percentage\n",
+    "\n",
+    "def get_num_students_trend(soup):\n",
+    "    # Step 1: Locate the <aantal-leerlingen-trend-line-chart> tag\n",
+    "    trend_chart_tag = soup.find(\"aantal-leerlingen-trend-line-chart\")\n",
+    "\n",
+    "    if trend_chart_tag:\n",
+    "        # Step 2: Extract the 'leerlingen-trend-data' attribute\n",
+    "        trend_data_attr = trend_chart_tag.get(\"leerlingen-trend-data\")\n",
+    "        \n",
+    "        if trend_data_attr:\n",
+    "            # Step 3: Parse the JSON string into a Python object\n",
+    "            trend_data = json.loads(trend_data_attr)\n",
+    "            #print(\"Extracted leerlingen-trend-data:\")\n",
+    "            #print(json.dumps(trend_data, indent=4))  # Pretty-print the JSON data\n",
+    "            return [ (e.get(\"key\"), e.get(\"aantal\") ) for e in trend_data]\n",
+    "\n",
+    "def get_num_students_per_age_and_group(soup):\n",
+    "    num_students_per_group, num_students_per_age = [], []\n",
+    "    ############################################################################\n",
+    "    # Step 1: Locate the <aantal-leerlingen-leeftijd-bar-chart> tag\n",
+    "    chart_tag = soup.find('aantal-leerlingen-leeftijd-bar-chart', attrs={'aantal-per-leeftijd': True})\n",
+    "    # Step 2: Extract the 'aantal-per-leeftijd' attribute\n",
+    "    raw_data = chart_tag['aantal-per-leeftijd']\n",
+    "\n",
+    "    # Step 3: Parse the JSON data\n",
+    "    try:\n",
+    "        data = json.loads(raw_data)\n",
+    "        # Step 4: Print the extracted data\n",
+    "        # print(\"Aantal per Leeftijd:\")\n",
+    "        for entry in data:\n",
+    "            age = entry['key']\n",
+    "            num_students = entry['aantal']\n",
+    "            # school_data[\"num_students_age_{}\".format(age)] = num_students\n",
+    "            num_students_per_age.append( (age, num_students) )\n",
+    "            # print(f\"Age {entry['key']}: {entry['aantal']} leerlingen\")\n",
+    "    except json.JSONDecodeError as e:\n",
+    "        print(f\"Failed to parse JSON data: {e}\")\n",
+    "\n",
+    "    ############################################################################\n",
+    "    # Step 1: Locate the <aantal-leerlingen-leerjaar-bar-chart> tag\n",
+    "    chart_tag = soup.find('aantal-leerlingen-leerjaar-bar-chart', attrs={'aantal-per-leerjaar': True})\n",
+    "\n",
+    "    if not chart_tag:\n",
+    "        print(\"Could not find the 'aantal per leerjaar' section.\")\n",
+    "    else:\n",
+    "        # Step 2: Extract the 'aantal-per-leerjaar' attribute\n",
+    "        raw_data = chart_tag['aantal-per-leerjaar']\n",
+    "        \n",
+    "        # Step 3: Parse the JSON data\n",
+    "        try:\n",
+    "            data = json.loads(raw_data)\n",
+    "            # Step 4: Print the extracted data\n",
+    "            # print(\"Aantal per Leerjaar:\")\n",
+    "            for entry in data:\n",
+    "                group = entry['key']\n",
+    "                num_students = entry['aantal']\n",
+    "                # school_data[\"num_students_group_{}\".format(group)] = num_students\n",
+    "                num_students_per_group.append( (group, num_students) )\n",
+    "                # print(f\"Groep {entry['key']}: {entry['aantal']} leerlingen\")\n",
+    "        except json.JSONDecodeError as e:\n",
+    "            print(f\"Failed to parse JSON data: {e}\")\n",
+    "    ############################################################################\n",
+    "    return num_students_per_group, num_students_per_age\n",
+    "\n",
+    "\n",
+    "def update_school_data(school_url, school_data):\n",
+    "    try:\n",
+    "        # Process school (request contact details)\n",
+    "        response = requests.get(os.path.join(school_url, \"contact/#inhoud\"), headers=headers)\n",
+    "        response.raise_for_status()  # Raise an exception for HTTP errors\n",
+    "        # Parse the HTML content using BeautifulSoup\n",
+    "        soup_school = BeautifulSoup(response.text, 'html.parser')\n",
+    "\n",
+    "        # School details\n",
+    "        school_details = soup_school.find(class_=\"school-details\")\n",
+    "        for category_idx, li_detail in enumerate(school_details.find_all(\"li\")):\n",
+    "            data = li_detail.find('span', class_='infotip-term')['data-dfn']\n",
+    "            text = li_detail.get_text(strip=True)\n",
+    "            # Set data\n",
+    "            school_data[\"category_{}\".format(category_idx)] = text\n",
+    "            school_data[\"category_{}_description\".format(category_idx)] = data\n",
+    "        \n",
+    "        school_address = soup_school.find(class_=\"school-adres\").get_text(strip=True)\n",
+    "        school_postcode_city = soup_school.find(class_=\"school-postcode-woonplaats\").get_text(strip=True)\n",
+    "        school_postcode = \"\".join(school_postcode_city.split(\" \")[:2])\n",
+    "        school_city = \" \".join(school_postcode_city.split(\" \")[2:])\n",
+    "\n",
+    "        school_data[\"city\"] = school_city\n",
+    "        school_data[\"postcode\"] = school_postcode\n",
+    "        school_data[\"address\"] = school_address\n",
+    "\n",
+    "        try:\n",
+    "            school_data[\"website\"] = find_website(soup_school) # soup_school.find(class_=\"button button-primary\").get('href')\n",
+    "        except Exception as e:\n",
+    "            pass\n",
+    "        try:\n",
+    "            school_data[\"phone\"] = soup_school.find('a', href=lambda href: href and href.startswith('tel:')).text\n",
+    "        except Exception as e:\n",
+    "            pass\n",
+    "        try:\n",
+    "            school_data[\"email\"] = extract_emails(soup_school)\n",
+    "        except Exception as e:\n",
+    "            pass\n",
+    "\n",
+    "        # Process school main site\n",
+    "        response = requests.get(os.path.join(school_url), headers=headers)\n",
+    "        response.raise_for_status()  # Raise an exception for HTTP errors\n",
+    "        # Parse the HTML content using BeautifulSoup\n",
+    "        soup_school = BeautifulSoup(response.text, 'html.parser')\n",
+    "\n",
+    "        try:\n",
+    "            school_data[\"students_per_zipcode\"] = get_num_students_per_zipcode(soup_school)\n",
+    "        except Exception as e:\n",
+    "            pass\n",
+    "        try:\n",
+    "            school_data[\"students_per_year_trend\"] = get_num_students_trend(soup_school)\n",
+    "        except Exception as e:\n",
+    "            pass\n",
+    "\n",
+    "        if (school_data.get(\"category\").lower() == \"basisscholen\"):\n",
+    "            try:\n",
+    "                num_students_per_group, num_students_per_age = get_num_students_per_age_and_group(soup_school)\n",
+    "                school_data[\"num_students_per_group\"] = num_students_per_group if len(num_students_per_group)>0 else None\n",
+    "                school_data[\"num_students_per_age\"] = num_students_per_age if len(num_students_per_age)>0 else None\n",
+    "            except Exception as e:\n",
+    "                pass\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(school_url, str(e))\n",
    "\n",
    "def main():\n",
    "    list_urls = [\n",
@@ -128,54 +278,26 @@
    "                    \"url\": school_url,\n",
    "                }\n",
    "\n",
-    "                try:\n",
-    "                    # Process school (request contact details)\n",
-    "                    response = requests.get(os.path.join(school_url, \"contact/#inhoud\"), headers=headers)\n",
-    "                    response.raise_for_status()  # Raise an exception for HTTP errors\n",
-    "\n",
-    "                    # Parse the HTML content using BeautifulSoup\n",
-    "                    soup_school = BeautifulSoup(response.text, 'html.parser')\n",
-    "\n",
-    "                    # School details\n",
-    "                    school_details = soup_school.find(class_=\"school-details\")\n",
-    "                    for category_idx, li_detail in enumerate(school_details.find_all(\"li\")):\n",
-    "                        data = li_detail.find('span', class_='infotip-term')['data-dfn']\n",
-    "                        text = li_detail.get_text(strip=True)\n",
-    "                        # Set data\n",
-    "                        school_data[\"category_{}\".format(category_idx)] = text\n",
-    "                        school_data[\"category_{}_description\".format(category_idx)] = data\n",
-    "                    \n",
-    "                    school_address = soup_school.find(class_=\"school-adres\").get_text(strip=True)\n",
-    "                    school_postcode_city = soup_school.find(class_=\"school-postcode-woonplaats\").get_text(strip=True)\n",
-    "                    school_postcode = \"\".join(school_postcode_city.split(\" \")[:2])\n",
-    "                    school_city = \" \".join(school_postcode_city.split(\" \")[2:])\n",
-    "\n",
-    "                    school_data[\"city\"] = school_city\n",
-    "                    school_data[\"postcode\"] = school_postcode\n",
-    "                    school_data[\"address\"] = school_address\n",
-    "\n",
-    "                    try:\n",
-    "                        school_data[\"website\"] = find_website(soup_school) # soup_school.find(class_=\"button button-primary\").get('href')\n",
-    "                    except Exception as e:\n",
-    "                        pass\n",
-    "                    try:\n",
-    "                        school_data[\"phone\"] = soup_school.find('a', href=lambda href: href and href.startswith('tel:')).text\n",
-    "                    except Exception as e:\n",
-    "                        pass\n",
-    "                    try:\n",
-    "                        school_data[\"email\"] = extract_emails(soup_school)\n",
-    "                    except Exception as e:\n",
-    "                        pass\n",
-    "                    \n",
-    "                except Exception as e:\n",
-    "                    print(school_url, str(e))\n",
-    "                    # assert False\n",
+    "                update_school_data(school_url, school_data)\n",
    "\n",
    "                list_school_data_dicts.append(school_data)\n",
    "\n",
-    "    df = pd.DataFrame(list_school_data_dicts)\n",
-    "    df.to_csv(\"scholenopdekaart.csv\")\n",
+    "                # Save per processed school to track progress\n",
+    "                df = pd.DataFrame(list_school_data_dicts)\n",
+    "                df.to_csv(\"scholenopdekaart_tmp.csv\", encoding=\"utf-8\", quoting=csv.QUOTE_ALL)\n",
    "\n",
+    "    df = pd.DataFrame(list_school_data_dicts)\n",
+    "    df.to_csv(\"scholenopdekaart.csv\", encoding=\"utf-8\", quoting=csv.QUOTE_ALL)\n",
+    "    # Without extra columns\n",
+    "    df.drop(columns=[\"students_per_zipcode\", \"students_per_year_trend\", \"num_students_per_group\", \"num_students_per_age\"]).to_csv(\"scholenopdekaart_.csv\", encoding=\"utf-8\", quoting=csv.QUOTE_ALL)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "\"\"\" # Issues with URL:\n",
    "https://scholenopdekaart.nl/middelbare-scholen/grave/1900/merletcollege-grave/\n",
    "https://scholenopdekaart.nl/middelbare-scholen/lent/4386/citadel-college-locatie-griftdijk/\n",
@@ -211,15 +333,7 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [],
-   "source": [
-    "'''\n",
-    "school_url = \"https://scholenopdekaart.nl/basisscholen/aalden/9661/christelijke-basisschool-de-schutse/\"\n",
-    "response = requests.get(os.path.join(school_url, \"contact/#inhoud\"), headers=headers)\n",
-    "# Parse the HTML content using BeautifulSoup\n",
-    "soup_school = BeautifulSoup(response.text, 'html.parser')\n",
-    "soup_school\n",
-    "'''"
-   ]
+   "source": []
  },
  {
   "cell_type": "code",
@@ -229,8 +343,9 @@
   "source": [
    "import pandas as pd\n",
    "\n",
-    "df = pd.read_csv(\"scholenopdekaart.csv\", index_col=0)\n",
-    "df.loc[0, \"category_3\"]"
+    "df = pd.read_csv(\"~/Downloads/scholenopdekaart.csv\", index_col=0)\n",
+    "\n",
+    "df.head()"
   ]
  },
  {
@@ -238,82 +353,102 @@
   "execution_count": null,
   "metadata": {},
   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
   "source": [
-    "import requests\n",
-    "from bs4 import BeautifulSoup\n",
+    "def to_dict(row):\n",
+    "    # Empty?\n",
+    "    if (pd.isna(row)):\n",
+    "        return {}\n",
+    "    # Evaluate, to dict\n",
+    "    dict_data = dict(eval(row))\n",
+    "    # Remove None values\n",
+    "    for k in list(dict_data.keys()):\n",
+    "        if dict_data[k] is None:\n",
+    "            del dict_data[k]\n",
+    "    # Prefix\n",
+    "    return {f\"{column}_{k}\": v for k, v in dict_data.items()}\n",
    "\n",
-    "# Step 1: Fetch the webpage\n",
-    "url = \"https://scholenopdekaart.nl/basisscholen/aagtekerke/25963/jhr-willem-versluijsschool/\"\n",
-    "headers = {\n",
-    "    \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36\"\n",
-    "}\n",
-    "response = requests.get(url, headers=headers)\n",
-    "\n",
-    "# Check if the request was successful\n",
-    "if response.status_code != 200:\n",
-    "    print(f\"Failed to retrieve the page. Status code: {response.status_code}\")\n",
-    "    exit()\n",
-    "\n",
-    "# Step 2: Parse the HTML content\n",
-    "soup = BeautifulSoup(response.text, 'html.parser')"
+    "for column in [\"students_per_year_trend\", \"num_students_per_group\", \"num_students_per_age\"]:\n",
+    "    print(column)\n",
+    "    # Convert the list of tuples into a dictionary per row\n",
+    "    df_dicts = df[column].apply(to_dict)\n",
+    "    # Expand into separate columns\n",
+    "    df_expanded = pd.json_normalize(df_dicts)\n",
+    "    # Sort\n",
+    "    df_expanded = df_expanded[sorted(df_expanded.columns)]\n",
+    "    # Combine with original columns\n",
+    "    df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Aantal per Leerjaar:\n",
-      "Groep 1: 29 leerlingen\n",
-      "Groep 2: 28 leerlingen\n",
-      "Groep 3: 30 leerlingen\n",
-      "Groep 4: 25 leerlingen\n",
-      "Groep 5: 19 leerlingen\n",
-      "Groep 6: 26 leerlingen\n",
-      "Groep 7: 22 leerlingen\n",
-      "Groep 8: 20 leerlingen\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
-    "import json\n",
+    "def to_dict(row):\n",
+    "    # Empty?\n",
+    "    if (pd.isna(row)):\n",
+    "        return {}\n",
+    "    # Evaluate, to dict\n",
+    "    data = eval(row)\n",
+    "    # Remove first useless data\n",
+    "    data = data[1:]\n",
    "\n",
-    "# Step 1: Locate the <aantal-leerlingen-leerjaar-bar-chart> tag\n",
-    "chart_tag = soup.find('aantal-leerlingen-leerjaar-bar-chart', attrs={'aantal-per-leerjaar': True})\n",
+    "    # Generate dict\n",
+    "    dict_data = {}\n",
+    "    for (zipcode, num, percentage) in data:\n",
+    "        dict_data[f\"num_students_zipcode_{zipcode}\"] = num\n",
+    "        dict_data[f\"percentage_students_zipcode_{zipcode}\"] = percentage\n",
    "\n",
-    "if not chart_tag:\n",
-    "    print(\"Could not find the 'aantal per leerjaar' section.\")\n",
-    "else:\n",
-    "    # Step 2: Extract the 'aantal-per-leerjaar' attribute\n",
-    "    raw_data = chart_tag['aantal-per-leerjaar']\n",
-    "    \n",
-    "    # Step 3: Parse the JSON data\n",
-    "    try:\n",
-    "        data = json.loads(raw_data)\n",
-    "        \n",
-    "        # Step 4: Print the extracted data\n",
-    "        print(\"Aantal per Leerjaar:\")\n",
-    "        for entry in data:\n",
-    "            print(f\"Groep {entry['key']}: {entry['aantal']} leerlingen\")\n",
-    "    except json.JSONDecodeError as e:\n",
-    "        print(f\"Failed to parse JSON data: {e}\")"
+    "    # Remove None values\n",
+    "    for k in list(dict_data.keys()):\n",
+    "        if dict_data[k] is None:\n",
+    "            del dict_data[k]\n",
+    "    return dict_data\n",
+    "\n",
+    "for column in [\"students_per_zipcode\"]:\n",
+    "    print(column)\n",
+    "    # Convert the list of tuples into a dictionary per row\n",
+    "    df_dicts = df[column].apply(to_dict)\n",
+    "    # Expand into separate columns\n",
+    "    df_expanded = pd.json_normalize(df_dicts)\n",
+    "    # Sort\n",
+    "    df_expanded = df_expanded[sorted(df_expanded.columns)]\n",
+    "    # Combine with original columns\n",
+    "    df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(\"schools_nl.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(df.columns)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "matitos_urls",
+   "display_name": "fetcher",
   "language": "python",
   "name": "python3"
  },
@@ -327,7 +462,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
+   "version": "3.12.11"
  }
 },
 "nbformat": 4,
--- a/utils/Summary.ipynb
+++ b/utils/Summary.ipynb
@@ -0,0 +1,182 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# docker exec -it ollama_npu bash\n",
+    "# rkllama pull\n",
+    "#\n",
+    "# c01zaut/Llama-3.2-3B-Instruct-rk3588-1.1.4\n",
+    "# Llama-3.2-3B-Instruct-rk3588-w8a8-opt-0-hybrid-ratio-0.0.rkllm\n",
+    "# Llama-3.2-3B-Instruct-rk3588-w8a8-opt-0-hybrid-ratio-0.5.rkllm\n",
+    "# Llama-3.2-3B-Instruct-rk3588-w8a8-opt-1-hybrid-ratio-0.0.rkllm\n",
+    "# Llama-3.2-3B-Instruct-rk3588-w8a8-opt-1-hybrid-ratio-0.5.rkllm\n",
+    "# Llama-3.2-3B-Instruct-rk3588-w8a8_g512-opt-1-hybrid-ratio-0.5.rkllm\n",
+    "#\n",
+    "# c01zaut/Qwen2.5-3B-Instruct-RK3588-1.1.4\n",
+    "# Qwen2.5-3B-Instruct-rk3588-w8a8-opt-1-hybrid-ratio-0.0.rkllm\n",
+    "# Qwen2.5-3B-Instruct-rk3588-w8a8-opt-1-hybrid-ratio-1.0.rkllm\n",
+    "# Qwen2.5-3B-Instruct-rk3588-w8a8_g256-opt-1-hybrid-ratio-1.0.rkllm\n",
+    "#"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ollama\n",
+    "import os\n",
+    "import requests\n",
+    "import json\n",
+    "from pprint import pprint\n",
+    "\n",
+    "# endpoint = \"https://ollamamodelnpu.matitos.org\"\n",
+    "endpoint = \"https://ollamamodel.matitos.org\"\n",
+    "model = \"qwen3:0.6b\"\n",
+    "model = \"qwen3:1.7b\"\n",
+    "client = ollama.Client(endpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = requests.post( os.path.join(endpoint, \"unload_model\") )\n",
+    "r.status_code, r.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = requests.get( os.path.join(endpoint, \"models\") )\n",
+    "r.json().get(\"models\"), [ m.model for m in client.list().get(\"models\") ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = \"llama3-instruct:3b\"\n",
+    "model = \"qwen2.5-instruct:3b\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "article_content = \"Kevin Sutherland's message to Rowan Lumsden told of his agony at what he believed were malicious rumours about his life. The best friend of tragic Kevin Sutherland has revealed a heartbreaking message sent in the last hours of his life. Rowan Lumsden, 35, says Kevin’s death would have been avoided if his request for anonymity in the Scottish Child Abuse Inquiry had been accepted. Mum-of-one Rowan told how her friend sent a 17-minute voice message that culminated as he stood on the Forth Road Bridge, where he is thought to have plunged to his death on December 19. The Daily Record has told how Kevin, 33, had ticked a box to say he approved of his testimony of historic abuse that he suffered to be published online. Kevin’s family later revealed an email sent to the inquiry, in which he begged for his real name to be redacted, suggesting he may take his own life if he was not given that protection. His appeal was dismissed by SCAI chair Lady Smith. Rowan told how Kevin left a harrowing final message, telling of his agony at what he believed to be malicious rumours that plagued his life. Rowan said: “I was asleep when the messages came in and it was devastating to hear his voice, knowing where he was and what was going to happen. I just wish I could have helped. “Kevin was pushed to the limit and he was so troubled about what people were saying about him. “He lived in fear his testimony would be used by people to make him out to be at fault or misconstrued and he bitterly regretted his decision to allow it to be made public. “I have no doubt that he would be alive today if he was allowed to to retract his on story from the record.” Rowan, 35, said Lady Smith’s decision was wrong “in so many ways”. She said: “He begged her to let him be anonymous and he said that he would take his life if she refused. “But she said, ‘No’. I cannot see any way that can be explained away. He just needed the time it took to get the right interventions to turn his mental health and his life around. “Lady Smith was the top person in the inquiry. She knew she was dealing with a hugely vulnerable person – as all victims are. She knew that he was having suicidal thoughts.” Kevin suffered trauma, including sexual abuse, in his childhood. In his final message to Rowan, in the hours before his suspected death, Kevin didn’t refer directly to the SCAI inquiry but stated: “It’s just coming from the absolute f****** heart and I just cannot cope with this life any more. “It’s just been so f****** unbelievably brutal. I kind of feel like, what’s the point? People have got their preconceived ideas and malicious gossip has served such a toxic contribution to this final decision that I’ve made. “That’s me on the bridge. End of the road, eh? End of the road to all the liars and doubters and gossip mongrels.” Kevin’s sister Melanie Watson, who recently revealed the text of Kevin’s final appeal for anonymity, said she was aware of his final messages to friends. She added: “He was very fixated with the fear that people would make false assumptions about him, based on reading his testimony on Google.” The inquiry’s handling of Kevin is now part of an independent inquiry. An SCAI spokesperson said: “SCAI has commissioned an independent review to consider all aspects of its interactions with Kevin.”\"\n",
+    "article_content = \"Child services visited a Bronx apartment while a 4-year-old girl was trapped inside with the corpses of her troubled mom and brother – but walked away after knocking, neighbors said. Lisa Cotton, 38, and her 8-year-old son, Nazir Millien, 8, had been dead for at least two weeks before relatives found them and the toddler inside the house of horrors Friday, one day after reps for the Administration for Children’s Services dropped the ball, neighbor Sabrina Coleson said. “They didn’t do s–t,” Coleson said Sunday. “They were here ringing people’s bells the day before the wellness check. They were here, but they didn’t do s–t. “One rang my bell and asked if I had any concerns for upstairs. And then a man opened his door and started yelling,” she said. “Lisa was a very cool girl. I never saw her son with her, only the girl. It’s terrible.” Concerned relatives finally checked on the family on Friday and found the 4-year-old, Promise, alone, starving and in horrid condition on her mother’s bed — as bugs crawled over her dead family. Cotton’s father, Hubert, 71, had sent his oldest granddaughter to check the apartment at East 231st Street — with the woman grabbing her young sibling and fleeing the putrid home to call police. ACS wasn’t the only city agency to leave Promise trapped in hellish conditions — neighbors said cops were also called to the apartment on Tuesday but left after not sensing the stench reported by others. Hubert Cotton said the toddler survived by “feeding herself with chocolate.” Law enforcement sources said Lisa Cotton had a history of erratic behavior, and had a pending ACS case for alleged child neglect before she was found dead. She was arrested in 2021 on child abandonment charges after police said she was caught swinging her then-infant daughter around in a stroller and lighting a wig on fire on White Plains Road, sources said. When cops arrived she was allegedly walking away, leaving Promise behind. The outcome of the case was not available because the file is sealed. One neighbor said the mom had “episodes” in the past. Sources said police believe Lisa Cotton, who suffered from asthma, may have died from cardiac arrest, while her son, who was born prematurely and had a feeding tube, may have starved to death. A spokesperson for ACS declined to comment on the case on Sunday other than to say the agency is “investigating this tragedy with the NYPD.”\"\n",
+    "\n",
+    "# prompt = \"Rewrite the content below into a clear and concise summary of one paragraph maximum, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article:\\n\\n{}\".format(article_content)\n",
+    "# prompt = \"Provide a summary of the content below, presenting the key points as if they are newly written insights. Write in a natural, standalone format that feels like an original explanation. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Keep it brief, engaging, informative, in the style of a news article, and in one single paragraph:\\n\\n{}\".format(article_content)\n",
+    "# prompt = \"Provide a summary of the content below, writing in a natural and standalone format that feels like an original explanation. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Keep it brief, engaging, informative, in the style of a news article, and in one single paragraph:\\n\\n{}\".format(article_content)\n",
+    "\n",
+    "# in one sentence each\n",
+    "prompt = \"First, provide a summary of the content below in one paragraph. Second, specify the Who, What, When, Where and Why of the story:\\n\\n{}\".format(article_content)\n",
+    "# prompt = \"Provide the 5W (Who, What, When, Where, Why) and a detailed summary of the content below:\\n\\n{}\".format(article_content)\n",
+    "# Only answer with the location or address which can be extracted from this description\n",
+    "\n",
+    "prompt = \"Provide, in one sentence each, the who, what, when, where, why, and a detailed summary of the content below:\\n\\n{}\".format(article_content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{}\n"
+     ]
+    }
+   ],
+   "source": [
+    "options = {\"temperature\": 0, \"seed\": 51029}\n",
+    "resp = client.generate(model=model, prompt=prompt, format=\"json\", options=options)\n",
+    "r = requests.post( os.path.join(endpoint, \"unload_model\") )\n",
+    "\n",
+    "response_dict = json.loads(resp.response)\n",
+    "pprint(response_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'{\\n\\n\\n}'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "resp.response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"<think>\\nOkay, let's tackle this query. The user wants a one-sentence summary for each element: who, what, when, where, why, and a detailed summary.\\n\\nFirst, the main event is the child services visiting a Bronx apartment with a 4-year-old trapped, but the neighbors say they knocked out the corpses. So for the first sentence, I need to include who (child services), what (visited the apartment), when (Friday), where (the apartment), why (neighbors said they didn't do it), and a summary. \\n\\nThen, for the second part, the user might want more details. Let me check the content. The summary needs to include the specific details like the family members, the days they were found dead, the agencies involved, and the outcomes. Also, mention the sources like ACS and the neighbors' statements. I need to make sure each sentence is concise and covers all the points without being too lengthy. Let me structure each sentence to fit the required format.\\n</think>\\n\\n**Who:** Child services in the Bronx, **What:** Visited an apartment containing a 4-year-old trapped with a dead mom and brother, **When:** Friday, **Where:** East 231st Street, **Why:** Neighbors reported the agency’s actions were inadequate, **Summary:** Child services visited a Bronx apartment with a 4-year-old trapped and dead, neighbors say they knocked out the corpses, and the incident is attributed to the agency’s failure to address the situation, with the family surviving by feeding themselves and the case being sealed.\""
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#resp = client.generate(model=model, prompt=prompt, format=\"json\")\n",
+    "resp = client.generate(model=model, prompt=prompt)\n",
+    "resp.response"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "matitos_urls",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
Author	SHA1	Message	Date
Luciano Gervasoni	cbc422df36	Notify status bot token	2025-10-16 10:38:32 +02:00
Luciano Gervasoni	dc784dabec	Notify status schedule	2025-10-16 10:12:17 +02:00
Luciano Gervasoni	d8ef738d19	Notifier fix	2025-10-14 13:10:54 +02:00
Luciano Gervasoni	2f035a4222	Notifier fix	2025-10-14 12:23:05 +02:00
Luciano Gervasoni	e057568af0	Telegram bot tokens	2025-10-14 11:36:19 +02:00
Luciano Gervasoni	7924857fe5	Schools NL tuples, traceback on notify err	2025-10-14 11:33:17 +02:00
Luciano Gervasoni	f44b784715	Notifications, info and warning, try catch	2025-09-09 22:06:23 +02:00
Luciano Gervasoni	24510d26e2	Notifications, info and warning	2025-09-08 17:55:03 +02:00
Luciano Gervasoni	ef51a96db6	Process missing kids url based on API endpoint, fix2	2025-09-08 16:20:39 +02:00
Luciano Gervasoni	079b2473f8	Process missing kids url based on API endpoint	2025-09-08 16:12:27 +02:00
Luciano Gervasoni	1fbc5beb6e	URL Logs	2025-09-08 12:45:53 +02:00
Luciano Gervasoni	7886d16264	Flower allow API handling	2025-09-08 12:44:48 +02:00
Luciano Gervasoni	2ed86e31ec	Workers light,default,heavy	2025-09-08 12:34:47 +02:00
Luciano Gervasoni	892fb984d1	Debug enlarge	2025-09-05 14:18:33 +02:00
Luciano Gervasoni	c17f09a94f	Debug enlarge	2025-09-05 14:06:10 +02:00
Luciano Gervasoni	e4a325d6b4	Request timeout debugging	2025-09-05 14:00:50 +02:00
Luciano Gervasoni	2fae0a3a9d	Request timeout	2025-09-05 13:52:34 +02:00
Luciano Gervasoni	35f9260b94	Debug process raw url	2025-09-05 13:45:16 +02:00
Luciano Gervasoni	b40611bd3e	Flower port update	2025-09-04 09:26:44 +02:00
Luciano Gervasoni	346d7c9187	Debug workers	2025-09-04 09:04:04 +02:00
Luciano Gervasoni	a21ff9c5d6	Celery scheduler DB based	2025-09-04 08:46:04 +02:00
Luciano Gervasoni	7b0d24309c	Redis cache and celery, avoid overflow (3)	2025-09-03 23:20:56 +02:00
Luciano Gervasoni	334062b0ec	Redis cache and celery, avoid overflow	2025-09-03 23:18:43 +02:00
Luciano Gervasoni	a9074f45b5	Redis cache and celery, avoid overflow	2025-09-03 23:07:03 +02:00
Luciano Gervasoni	569e7d4676	Disable fetch missing kids all	2025-08-28 11:23:47 +02:00
Luciano Gervasoni	4883b097db	Disable missing kids all urls check	2025-08-26 10:16:18 +02:00
Luciano Gervasoni	a0ced90d7c	Views base fix	2025-08-22 13:26:14 +02:00
Luciano Gervasoni	883dfcd3bd	URL redirect get before newspaper processing	2025-08-22 13:11:02 +02:00
Luciano Gervasoni	9b0a84c16a	Books category	2025-08-18 16:39:26 +02:00
Luciano Gervasoni	b08ea574d9	Notify status task	2025-08-14 15:13:18 +02:00
Luciano Gervasoni	da078a6f0f	Notify status task	2025-08-14 15:09:20 +02:00
Luciano Gervasoni	4ccff2bc02	Notify status task	2025-08-14 15:06:53 +02:00
Luciano Gervasoni	ffb0f85475	View fix (10)	2025-08-14 14:58:16 +02:00
Luciano Gervasoni	c939624687	View fix (9)	2025-08-14 14:56:00 +02:00
Luciano Gervasoni	6fb14d5e72	View fix (8)	2025-08-14 14:53:56 +02:00
Luciano Gervasoni	260a505766	View fix (7)	2025-08-14 14:50:03 +02:00
Luciano Gervasoni	bae5329b1e	View fix (6)	2025-08-14 14:48:03 +02:00
Luciano Gervasoni	b3d63f820e	View fix (5)	2025-08-14 14:28:52 +02:00
Luciano Gervasoni	1fbf4cf3d4	View fix (4)	2025-08-14 14:25:56 +02:00
Luciano Gervasoni	856a9e7562	View fix (3)	2025-08-14 14:24:03 +02:00
Luciano Gervasoni	4080154f2b	View fix (2)	2025-08-14 14:12:57 +02:00
Luciano Gervasoni	015f92a06b	View fix	2025-08-14 14:04:33 +02:00
Luciano Gervasoni	3d09c1acff	Notify status	2025-08-14 13:56:06 +02:00
Luciano Gervasoni	02f756d3c2	Ride missing kids exception	2025-08-14 10:59:19 +02:00
Luciano Gervasoni	6b5073d1b6	Pattern matching, foxnews request with header	2025-08-13 14:29:44 +02:00
Luciano Gervasoni	e3d6cf8000	Pattern matching, foxnews request with header	2025-08-13 14:12:54 +02:00
Luciano Gervasoni	30c586d49a	Selenium docker psutil	2025-08-01 20:49:28 +02:00
Luciano Gervasoni	1502f09e22	Selenium kill process to release mem, supervisor conf rotate log file	2025-07-28 11:16:15 +02:00
Luciano Gervasoni	54e41139bb	Duckduckgo search update	2025-07-22 22:53:53 +02:00
Luciano Gervasoni	b112da8bd0	Supervisor based run	2025-07-22 00:51:09 +02:00
Luciano Gervasoni	cb621c9d6b	Switching to django celery for workers	2025-07-17 22:29:06 +02:00
Luciano Gervasoni	50e8666162	Django tasks workers logger	2025-07-17 00:46:48 +02:00
Luciano Gervasoni	202e58776d	Django tasks workers	2025-07-17 00:21:26 +02:00
Luciano Gervasoni	7a91fc1a87	Django tasks workers	2025-07-17 00:11:02 +02:00
Luciano Gervasoni	b2b853b32f	Django tasks workers	2025-07-17 00:06:23 +02:00
Luciano Gervasoni	d5d80ade55	Django tasks workers	2025-07-17 00:05:04 +02:00
Luciano Gervasoni	60f021fc2d	Logger for worker	2025-07-15 17:05:06 +02:00
Luciano Gervasoni	1dcf69ab08	Logger for worker	2025-07-15 16:58:48 +02:00
Luciano Gervasoni	b9ba0d8f3d	Logger for worker	2025-07-15 16:51:22 +02:00
Luciano Gervasoni	06ded0b37d	Worker params	2025-07-15 10:28:51 +02:00
Luciano Gervasoni	a38e2bc5d1	Worker logs	2025-07-15 10:04:13 +02:00
Luciano Gervasoni	5a33012a64	Workers fix 2	2025-07-15 01:07:10 +02:00
Luciano Gervasoni	9d79a4e5c4	Workers fix	2025-07-15 01:04:57 +02:00
Luciano Gervasoni	6612a50d13	Logger fix, env sample ram	2025-07-14 23:48:34 +02:00
Luciano Gervasoni	6c88759e7b	Workers ttl	2025-07-14 23:37:48 +02:00
Luciano Gervasoni	623dfbf95a	Tasks priorities	2025-07-10 14:36:19 +02:00
Luciano Gervasoni	0cb68a876b	Logger parent pid	2025-07-10 14:19:57 +02:00
Luciano Gervasoni	fdc3263785	Django multi worker, logging pid	2025-07-10 13:08:37 +02:00
Luciano Gervasoni	da5dfe5314	Timeout adjust	2025-07-08 21:33:28 +02:00
Luciano Gervasoni	0fa4482711	Missing kids trigger types	2025-07-08 21:30:20 +02:00
Luciano Gervasoni	4985f09e56	Filters fix	2025-07-08 18:28:43 +02:00
Luciano Gervasoni	0cf61026e8	Selenium based fetch of different sources	2025-07-08 18:18:26 +02:00
Luciano Gervasoni	f729bd1cb2	Selenium control loop	2025-07-08 10:19:37 +02:00
Luciano Gervasoni	9083021674	Scheduled tasks priorities	2025-07-08 10:15:21 +02:00
Luciano Gervasoni	8d72d3af0c	debugging	2025-07-08 10:11:07 +02:00
Luciano Gervasoni	75de046dd9	selenium app wip	2025-07-08 10:01:35 +02:00
Luciano Gervasoni	7fdd93d35d	docker compose base prod	2025-07-08 09:48:17 +02:00
Luciano Gervasoni	522c1cb8b3	Missing kids selenium fixes	2025-07-08 09:43:40 +02:00
Luciano Gervasoni	e81a96f4bd	typo missing kid verif	2025-07-07 17:07:04 +02:00
Luciano Gervasoni	dd8e71aaa3	Missing kid verify timeout handle	2025-07-07 16:51:55 +02:00
Luciano Gervasoni	8cf2b52325	Selenium based missing kid verify url fix (2)	2025-07-07 16:34:21 +02:00
Luciano Gervasoni	a8b236bac0	Selenium based missing kid verify url	2025-07-07 16:02:11 +02:00
Luciano Gervasoni	15035c108d	Missing kids processing fix	2025-07-07 13:22:18 +02:00
Luciano Gervasoni	4c0dd70bc3	missing kids status code handling	2025-07-07 12:57:57 +02:00
Luciano Gervasoni	b559f8cd8c	Django tasks scheduler	2025-07-04 18:52:56 +02:00
Luciano Gervasoni	737483db9f	Tasks timeout	2025-07-04 18:51:09 +02:00
Luciano Gervasoni	d0ae91bf35	quot parser issue fx	2025-07-03 14:13:47 +02:00
Luciano Gervasoni	80f40e1a74	unquote google general search	2025-07-03 13:52:18 +02:00
Luciano Gervasoni	969e08e84a	Status pattern match fox news person	2025-07-03 13:43:30 +02:00
Luciano Gervasoni	68b56eafea	furl remove parameters on search results	2025-07-03 13:35:40 +02:00
Luciano Gervasoni	e657c3bee1	Zombie processes, quot parser issue	2025-07-03 10:56:48 +02:00
Luciano Gervasoni	8b689729bf	Docker and deployment to fetcher server	2025-06-27 09:14:44 +02:00
Luciano Gervasoni	f659d4adb3	Scheduled tasks interval, env vars, view fix	2025-06-20 09:59:27 +02:00
Luciano Gervasoni	03a2949b2b	django tasks scheduler update, .env and docker compose towards fetcher sca	2025-06-20 00:35:48 +02:00
Luciano Gervasoni	490f01d66c	Unknown instead of error for fetched urls	2025-06-19 22:43:29 +02:00
Luciano Gervasoni	a2cce62096	CV app docker fix compose	2025-04-30 22:32:52 +02:00
Luciano Gervasoni	aa7aca3e66	CV app docker fix	2025-04-30 22:31:24 +02:00
Luciano Gervasoni	d7df5b4ea4	CV app with fastapi, web nicegui based	2025-04-30 18:41:35 +02:00
Luciano Gervasoni	ccfd0f9188	Schools NL, Ghost post utils, nude + age detection	2025-04-30 15:50:54 +02:00
Luciano Gervasoni	aa369d0458	Publish from filtered URLs option	2025-04-25 16:39:13 +02:00
Luciano Gervasoni	f59d16b3fc	Publish with hidden tag, don't publish if url id already processed, typo	2025-04-24 16:53:52 +02:00
Luciano Gervasoni	b3f7cb255c	Publish with hidden tag, don't publish if url id already processed	2025-04-24 16:47:14 +02:00
Luciano Gervasoni	b8fdcae5ec	Temperature and seed LLM	2025-04-23 17:46:47 +02:00
Luciano Gervasoni	cf55c586f7	Publisher fix	2025-04-23 17:34:10 +02:00
Luciano Gervasoni	e5c574ba33	LLM refactor, NPU ollama based, publisher update json query to llm	2025-04-23 16:35:50 +02:00
Luciano Gervasoni	8ea3ec1bda	Utils	2025-04-23 16:26:08 +02:00