Publisher task
This commit is contained in:
149
app_urls/fetcher/src/publisher.py
Normal file
149
app_urls/fetcher/src/publisher.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import time
|
||||
import jwt
|
||||
import os
|
||||
import requests
|
||||
import random
|
||||
from .llm import OllamaClient
|
||||
from ..models import Urls, UrlContent
|
||||
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class Publisher():
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _create_jwt(self, admin_api_key):
|
||||
id_, secret = admin_api_key.split(':')
|
||||
iat = int(time.time())
|
||||
exp = iat + 5 * 60 # 5 minutes
|
||||
header = {'alg': 'HS256', 'kid': id_}
|
||||
payload = {
|
||||
'iat': iat,
|
||||
'exp': exp,
|
||||
'aud': '/v5/admin/' # Adjust depending on your Ghost version
|
||||
}
|
||||
token = jwt.encode(payload, bytes.fromhex(secret), algorithm='HS256', headers=header)
|
||||
return token
|
||||
|
||||
def _create_ghost_post(self, post_data):
|
||||
# Get token
|
||||
jwt_token = self._create_jwt(os.getenv("GHOST_ADMIN_API_KEY"))
|
||||
# Get Admin API URL
|
||||
admin_api_url = os.getenv("GHOST_ADMIN_API_URL")
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Ghost {jwt_token}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
post_data = {"posts": [post_data]}
|
||||
|
||||
response = requests.post(
|
||||
os.path.join(admin_api_url, "posts"),
|
||||
json=post_data,
|
||||
headers=headers,
|
||||
params={"source":"html"}
|
||||
)
|
||||
|
||||
if response.status_code == 201:
|
||||
logger.info("Ghost post published successfully")
|
||||
return response.json()
|
||||
else:
|
||||
logger.warning("Ghost - Failed to publish post: {} {}".format(response.status_code, response.text))
|
||||
return None
|
||||
|
||||
def _get_photo_url(self, query):
|
||||
# TODO: Get already used photos to skip. Use DB
|
||||
try:
|
||||
endpoint = "https://api.pexels.com/v1/search?query={}".format(query)
|
||||
header= {"Authorization": os.getenv("PEXELS_API_KEY")}
|
||||
|
||||
while True:
|
||||
# Request
|
||||
r = requests.get(endpoint, headers=header)
|
||||
dict_images = r.json()
|
||||
|
||||
# Get list of photos
|
||||
list_photos = dict_images.get("photos", [])
|
||||
|
||||
# TODO: IMPROVE...
|
||||
photo_url = random.choice(list_photos).get("src").get("landscape")
|
||||
return photo_url
|
||||
|
||||
|
||||
for photo in list_photos:
|
||||
# Already used? -> Continue
|
||||
# photo.get("id") # Compare against DB
|
||||
|
||||
# Get landscape photo
|
||||
photo_url = photo.get("src").get("landscape")
|
||||
return photo_url
|
||||
|
||||
# Iterated page, already used all images
|
||||
endpoint = dict_images.get("next_page")
|
||||
except Exception as e:
|
||||
logger.warning("Something went wrong while fetching image from Pexels: {}".format(str(e)))
|
||||
return None
|
||||
|
||||
def publish(self, url_id):
|
||||
logger.info("Publishing URL ID {}".format(url_id))
|
||||
|
||||
# URL Content
|
||||
url_content = UrlContent.objects.filter(pk=url_id).first()
|
||||
url = Urls.objects.filter(pk=url_id).first()
|
||||
|
||||
if (url_content is None):
|
||||
logger.warning("Ghost - URL Content is NULL for URL ID: {} {}".format(url_id, url.url))
|
||||
return
|
||||
if (url_content.valid_content is False):
|
||||
logger.warning("Ghost - URL Content is not valid for URL ID: {} {}".format(url_id, url.url))
|
||||
return
|
||||
|
||||
model = "llama3.2:3b"
|
||||
prompt = "Rewrite the text below into a clear and concise summary, presenting the key points as if they are newly written insights. Do not mention or reference the original text, its source, or any phrases like 'According to' or 'The text states'. Instead, write in a natural, standalone format that feels like an original explanation. Keep it brief, engaging, informative, in the style of a news article, and no longer than a paragraph:"
|
||||
|
||||
ollama_msg = {"role": "user", "content": "{}\n{}".format(prompt, url_content.content)}
|
||||
response = OllamaClient().client.chat(model=model, messages=[ollama_msg])
|
||||
|
||||
article_summary = response["message"]["content"]
|
||||
|
||||
################################################################################################
|
||||
if (url_content.image_main_url is None) or (requests.get(url_content.image_main_url).status_code != 200):
|
||||
# Invalid main image -> Search for one
|
||||
photo_query = "Mountain landscape"
|
||||
photo_url = self._get_photo_url(photo_query)
|
||||
else:
|
||||
photo_url = url_content.image_main_url
|
||||
|
||||
post_data = {
|
||||
# "slug": "hey-short",
|
||||
"title": url_content.title,
|
||||
"html": "".join([ "<p>{}</p>".format(t) for t in article_summary.split("\n") ]) + '<a href="{}">Source</a>'.format(url.url),
|
||||
#"meta_title": "",
|
||||
#"meta_description": "",
|
||||
"feature_image": photo_url,
|
||||
#"feature_image_caption": "",
|
||||
"status": "published",
|
||||
}
|
||||
|
||||
# Publish post
|
||||
payload = self._create_ghost_post(post_data)
|
||||
logger.debug("Ghost payload: {}".format(str(payload)))
|
||||
|
||||
'''
|
||||
# Return a response (you can customize this as needed)
|
||||
return HttpResponse(f"""
|
||||
<h1>Generated Content</h1>
|
||||
<p>URL ID: {id_url}</p>
|
||||
<p>URL: {url.url}</p>
|
||||
<p>Title: {url_content.title}</p>
|
||||
<p>Description: {url_content.description}</p>
|
||||
<p>Content: {url_content.content}</p>
|
||||
<p>Valid content: {url_content.valid_content}</p>
|
||||
<p>Language: {url_content.language}</p>
|
||||
<p>Main image: {url_content.image_main_url}</p>
|
||||
<p>Generated summary: {article_summary}</p>
|
||||
""")
|
||||
'''
|
||||
Reference in New Issue
Block a user