LLM refactor, NPU ollama based, publisher update json query to llm

This commit is contained in:
Luciano Gervasoni
2025-04-23 16:35:50 +02:00
parent 8ea3ec1bda
commit e5c574ba33
7 changed files with 123 additions and 34 deletions

View File

@@ -100,14 +100,51 @@ class Publisher():
if (url_content.valid_content is False):
logger.warning("Ghost - URL Content is not valid for URL ID: {} {}".format(url_id, url.url))
return
###########################################
client_llm = OllamaClient()
# Model
model = client_llm.get_models()[0]
# Prompt
prompt = client_llm.get_prompt(url_content.content)
# Generate content
generated_content_dict = client_llm.generate(model, prompt, format="json")
logger.debug("Generated content: {}".format(generated_content_dict))
model = "llama3.2:3b"
prompt = "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
###########################################
# Get where description
generated_content_where = generated_content_dict.get("where")
# Prompt to extract address / location
prompt = 'Only answer with the location or address which can be extracted from this description: "{}"'.format(generated_content_where)
# LLM
extracted_location = client_llm.generate(model, prompt, format=None)
logger.debug("Estimated location: {}".format(extracted_location))
# OSM API
params = {
'q': extracted_location,
'format': 'json',
'addressdetails': 1,
'limit': 1
}
ollama_msg = {"role": "user", "content": "{}\n{}".format(prompt, url_content.content)}
response = OllamaClient().client.chat(model=model, messages=[ollama_msg])
response = requests.get('https://nominatim.openstreetmap.org/search', params=params, headers={'User-Agent': 'App'})
list_data = response.json()
if (len(list_data) > 0):
data = list_data[0]
location_url = "https://openstreetmap.org/{}/{}".format(data.get("osm_type"), data.get("osm_id"))
else:
location_url = None
###########################################
article_summary = response["message"]["content"]
# Parse generated content
summary, five_w = "", ""
for k, v in generated_content_dict.items():
if ("summary" in k.lower()):
summary = v
else:
five_w += "{}: {}\n".format(k.capitalize(), v)
# Aggregate generated content
generated_content = "{}\n\n{}".format(summary, five_w)
################################################################################################
if (url_content.image_main_url is None) or (requests.get(url_content.image_main_url).status_code != 200):
@@ -117,10 +154,18 @@ class Publisher():
else:
photo_url = url_content.image_main_url
# HTML: Generate content
html_data = "".join([ "<p>{}</p>".format(t) for t in generated_content.split("\n") ])
# HTML: Add location if available
if (location_url is not None):
html_data += '<p><a href="{}">Estimated location</a></p>'.format(location_url)
# HTML: Add source
html_data += '<p><a href="{}">Source</a></p>'.format(url.url)
post_data = {
# "slug": "hey-short",
"title": url_content.title,
"html": "".join([ "<p>{}</p>".format(t) for t in article_summary.split("\n") ]) + '<a href="{}">Source</a>'.format(url.url),
"html": html_data,
#"meta_title": "",
#"meta_description": "",
"feature_image": photo_url,