LLM refactor, NPU ollama based, publisher update json query to llm

2025-04-23 16:35:50 +02:00
parent 8ea3ec1bda
commit e5c574ba33
7 changed files with 123 additions and 34 deletions
--- a/.env
+++ b/.env
@@ -40,9 +40,6 @@ FETCHER_ERROR_URL_CACHE_TIME=172800
 # Selenium
 SELENIUM_ENDPOINT=http://fetcher_app_selenium:80
 ENDPOINT_OLLAMA=https://ollamamodel.matitos.org
 # APP: Selenium
 ARCH=amd64 # arm64, amd64
 SELENIUM_SLEEP_PER_PAGE=4
 PATH_LOGS_DIRECTORY=/opt/logs
@@ -55,3 +52,6 @@ DEPLOY_RAM=4G
 GHOST_ADMIN_API_URL=https://news.matitos.org/ghost/api/admin/
 GHOST_ADMIN_API_KEY=67fffe1b8a57a80001ecec5b:59f580020c196f92e05e208d288702082f8edad6366e2b2c8940b54e41cc355a
 PEXELS_API_KEY=Y6clJkY32eihf34ukX4JsINYu9lzxh3xDdNq2HMAmGwXp0a0tt6vr6S9
 # Ollama
 ENDPOINT_OLLAMA=https://ollamamodelnpu.matitos.org
 OLLAMA_MODEL_DEFAULT=qwen2.5-instruct:3b
--- a/app_urls/fetcher/src/llm.py
+++ b/app_urls/fetcher/src/llm.py
@@ -1,24 +1,75 @@
 import ollama
 import os
 import requests
 import json
 from .logger import get_logger
 logger = get_logger()
 class OllamaClient():
    def __init__(self):
-        self.client = ollama.Client(host=os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org"))
+        self.host = os.getenv("ENDPOINT_OLLAMA", "https://ollamamodel.matitos.org")
        self.client = ollama.Client(host=self.host)
    def _get_default_model(self):
-        return "llama3.2:3b"
+        return os.getenv("OLLAMA_MODEL_DEFAULT", "llama3.2:3b")
    def get_models(self):
-        models = sorted([m.model for m in self.client.list().models])
+        try:
-        if (self._get_default_model() in models):
+            # Get models
-            return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
+            models = sorted([m.model for m in self.client.list().models])
-        else:
+            # r = requests.get( os.path.join(endpoint, "models") )
-            return models
+            # r.json().get("models")
            # Default within it?
            if (self._get_default_model() in models):
                return [self._get_default_model()] + [m for m in models if m != self._get_default_model()]
            else:
                return models
        except Exception as e:
            return []
-    def get_prompt(self):
+    def get_prompt(self, content):
-        return ("Rewrite the text below into a clear and concise summary of one paragraph maximum, presenting the key points as if they are newly written insights. "
+        return "Provide, in one sentence each, the who, what, when, where, why, and a detailed summary of the content below:\n\n{}".format(content)
        return "First, provide a detailed summary of the content below in one paragraph. Second, specify in one sentence each the who, what, when, where and why of the story. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states':\n\n{}".format(content)
        return "First, provide a summary of the content below in one paragraph. Second, specify the Who, What, When, Where and Why of the story:\n\n{}".format(content)
        # First, provide a summary of the content below in one paragraph. Second, specify the who, what, when, where and why of the story in one sentence each. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states':
        '''
        return ("Rewrite the content below into a clear and concise summary of one paragraph maximum, presenting the key points as if they are newly written insights. "
                "Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. "
                "Write in a natural, standalone format that feels like an original explanation. "
-                "Keep it brief, engaging, informative, in the style of a news article: \n"
+                "Keep it brief, engaging, informative, in the style of a news article:\n\n{}".format(content)
        )
-    
+        '''
    def generate(self, model, prompt, format=None):
        try:
            # Generate response
            response = self.client.generate(model=model, prompt=prompt, format=format)
            # Extract response
            response = response.response
            # Json? -> Dict
            if (format == "json"):
                # Dict
                response = json.loads(response)
            # Force unload
            r = requests.post( os.path.join(self.host, "unload_model") )
        except Exception as e:
            logger.warning("Exception while generating LLM response: {}".format(str(e)))
            if (format == "json"):
                response = {}
            else:
                response = None
        # Text
        return response
    def generate_stream(self, model, prompt):
        try:
            # Generate response
            response = self.client.generate(model=model, prompt=prompt, format="json", stream=True)
            # Streamed chunks
            for chunk in response:
                yield chunk.response
            # Force unload
            r = requests.post( os.path.join(self.host, "unload_model") )
        except Exception as e:
            logger.warning("Exception while generating LLM response: {}".format(str(e)))
--- a/app_urls/fetcher/src/publisher.py
+++ b/app_urls/fetcher/src/publisher.py
@@ -100,14 +100,51 @@ class Publisher():
        if (url_content.valid_content is False):
            logger.warning("Ghost - URL Content is not valid for URL ID: {} {}".format(url_id, url.url))
            return
        ###########################################
        client_llm = OllamaClient()
        # Model
        model = client_llm.get_models()[0]
        # Prompt
        prompt = client_llm.get_prompt(url_content.content)
        # Generate content
        generated_content_dict = client_llm.generate(model, prompt, format="json")
        logger.debug("Generated content: {}".format(generated_content_dict))
-        model = "llama3.2:3b"
+        ###########################################
-        prompt = "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
+        # Get where description
        generated_content_where = generated_content_dict.get("where")
        # Prompt to extract address / location
        prompt = 'Only answer with the location or address which can be extracted from this description: "{}"'.format(generated_content_where)
        # LLM
        extracted_location = client_llm.generate(model, prompt, format=None)
        logger.debug("Estimated location: {}".format(extracted_location))
        # OSM API
        params = {
            'q': extracted_location,
            'format': 'json',
            'addressdetails': 1,
            'limit': 1
        }
-        ollama_msg = {"role": "user", "content": "{}\n{}".format(prompt, url_content.content)}
+        response = requests.get('https://nominatim.openstreetmap.org/search', params=params, headers={'User-Agent': 'App'})
-        response = OllamaClient().client.chat(model=model, messages=[ollama_msg])
+        list_data = response.json()
        if (len(list_data) > 0):
            data = list_data[0]
            location_url = "https://openstreetmap.org/{}/{}".format(data.get("osm_type"), data.get("osm_id"))
        else:
            location_url = None
        ###########################################
-        article_summary = response["message"]["content"]
+        # Parse generated content
        summary, five_w = "", ""
        for k, v in generated_content_dict.items():
            if ("summary" in k.lower()):
                summary = v
            else:
                five_w += "{}: {}\n".format(k.capitalize(), v)
        # Aggregate generated content
        generated_content = "{}\n\n{}".format(summary, five_w)
        ################################################################################################
        if (url_content.image_main_url is None) or (requests.get(url_content.image_main_url).status_code != 200):
@@ -117,10 +154,18 @@ class Publisher():
        else:
            photo_url = url_content.image_main_url
        # HTML: Generate content
        html_data = "".join([ "<p>{}</p>".format(t) for t in generated_content.split("\n") ])
        # HTML: Add location if available
        if (location_url is not None):
            html_data += '<p><a href="{}">Estimated location</a></p>'.format(location_url)
        # HTML: Add source
        html_data += '<p><a href="{}">Source</a></p>'.format(url.url)
        post_data = {
            # "slug": "hey-short",
            "title": url_content.title,
-            "html": "".join([ "<p>{}</p>".format(t) for t in article_summary.split("\n") ]) + '<a href="{}">Source</a>'.format(url.url),
+            "html": html_data,
            #"meta_title": "",
            #"meta_description": "",
            "feature_image": photo_url,
--- a/app_urls/fetcher/templates/url_detail.html
+++ b/app_urls/fetcher/templates/url_detail.html
@@ -278,8 +278,7 @@
        <!-- Input field with a default value -->        
        <label for="custom-input-{{ url_item.id }}">Prompt:</label>
-        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="5">{{ prompt }}
+        <textarea id="custom-input-{{ url_item.id }}" class="form-control mb-2" rows="5">{{ prompt }}</textarea>
 {{ url_item.url }}</textarea>
        <div class="d-flex align-items-center">
            <!-- Fetch details button -->
--- a/app_urls/fetcher/views.py
+++ b/app_urls/fetcher/views.py
@@ -14,16 +14,6 @@ import json
 ####################################################################################################
 def llm(request):
    def stream_response(model, text):
        msg_content = {
            "role": "user", 
            "content": text,
        }
        response = OllamaClient().client.chat(model=model, messages=[msg_content], stream=True)
        for chunk in response:
            yield chunk["message"]["content"]  # Stream each chunk of text
    if request.method == 'POST':
        try:
            body_data = json.loads(request.body)
@@ -33,7 +23,7 @@ def llm(request):
            if message is None:
                return JsonResponse({'error': 'No message found in request'}, status=400)
-            return StreamingHttpResponse(stream_response(model, message), content_type="text/plain")
+            return StreamingHttpResponse(OllamaClient().generate_stream(model, message), content_type="text/plain")
        except json.JSONDecodeError:
            return JsonResponse({'error': 'Invalid JSON'}, status=400)
@@ -55,13 +45,15 @@ def url_detail_view(request, id):
        url_content = {}
    ollama = OllamaClient()
    # prompt_content = "{}\n{}\n{}".format(url_content.title, url_content.description, url_content.content)
    prompt_content = "{}".format(url_content.content)
    context = {
        'url_item': url_item,
        'sources': url_sources,
        'searches': url_searches,
        'models': ollama.get_models(),
-        'prompt': ollama.get_prompt(),
+        'prompt': ollama.get_prompt(prompt_content),
        'url_content': url_content,
        'url_canonical': url_canonical,
    }
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -68,6 +68,7 @@ services:
      - GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
      - GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
      - PEXELS_API_KEY=${PEXELS_API_KEY}
      - OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
    ########################
    volumes:   # Development mode
      - ./app_urls:/opt/app
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -68,6 +68,7 @@ services:
      - GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
      - GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
      - PEXELS_API_KEY=${PEXELS_API_KEY}
      - OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
    ########################
    #volumes:   # Development mode
    #  - ./app_urls:/opt/app