import time import jwt import os import requests import random from .llm import OllamaClient from ..models import Urls, UrlContent from .logger import get_logger logger = get_logger() class Publisher(): def __init__(self): self.admin_api_url = os.getenv("GHOST_ADMIN_API_URL") self.admin_api_key = os.getenv("GHOST_ADMIN_API_KEY") def _create_jwt(self, admin_api_key): id_, secret = admin_api_key.split(':') iat = int(time.time()) exp = iat + 5 * 60 # 5 minutes header = {'alg': 'HS256', 'kid': id_} payload = { 'iat': iat, 'exp': exp, 'aud': '/v5/admin/' # Adjust depending on your Ghost version } token = jwt.encode(payload, bytes.fromhex(secret), algorithm='HS256', headers=header) return token def _create_ghost_post(self, post_data): # Get token jwt_token = self._create_jwt(self.admin_api_key) headers = { 'Authorization': f'Ghost {jwt_token}', 'Content-Type': 'application/json' } post_data = {"posts": [post_data]} response = requests.post( os.path.join(self.admin_api_url, "posts"), json=post_data, headers=headers, params={"source":"html"} ) if response.status_code == 201: logger.info("Ghost post published successfully") return response.json() else: logger.warning("Ghost - Failed to publish post: {} {}".format(response.status_code, response.text)) return None def _published_url_id(self, url_id): # Get token jwt_token = self._create_jwt(self.admin_api_key) headers = { 'Authorization': f'Ghost {jwt_token}', 'Content-Type': 'application/json' } # Query param filter by URL ID params = {"filter": "tags:hash-url-id-{}".format(url_id)} # Get posts using filter response = requests.get(os.path.join(self.admin_api_url, "posts"), params=params, headers=headers) # To JSON dict_response = response.json() if (len(dict_response.get("posts")) > 0): return True else: return False def _get_photo_url(self, query): # TODO: Get already used photos to skip. Use DB try: endpoint = "https://api.pexels.com/v1/search?query={}".format(query) header= {"Authorization": os.getenv("PEXELS_API_KEY")} while True: # Request r = requests.get(endpoint, headers=header) dict_images = r.json() # Get list of photos list_photos = dict_images.get("photos", []) # TODO: IMPROVE... photo_url = random.choice(list_photos).get("src").get("landscape") return photo_url for photo in list_photos: # Already used? -> Continue # photo.get("id") # Compare against DB # Get landscape photo photo_url = photo.get("src").get("landscape") return photo_url # Iterated page, already used all images endpoint = dict_images.get("next_page") except Exception as e: logger.warning("Something went wrong while fetching image from Pexels: {}".format(str(e))) return None def publish(self, url_id): logger.info("Publishing URL ID {}".format(url_id)) # URL Content url_content = UrlContent.objects.filter(pk=url_id).first() url = Urls.objects.filter(pk=url_id).first() if (url_content is None): logger.warning("Ghost - URL Content is NULL for URL ID: {} {}".format(url_id, url.url)) return if (url_content.valid_content is False): logger.warning("Ghost - URL Content is not valid for URL ID: {} {}".format(url_id, url.url)) return # URL ID already published? if (self._published_url_id(url_id)): logger.info("Ghost - URL ID {} already published, skipping".format(url_id)) return ########################################### client_llm = OllamaClient() # Model model = client_llm.get_models()[0] # Prompt prompt = client_llm.get_prompt(url_content.content) # Generate content generated_content_dict = client_llm.generate(model, prompt, format="json") logger.debug("Generated content: {}".format(generated_content_dict)) ########################################### # Get where description generated_content_where = generated_content_dict.get("where") # Prompt to extract address / location prompt = 'Only answer with the location or address which can be extracted from this description: "{}"'.format(generated_content_where) # LLM extracted_location = client_llm.generate(model, prompt, format=None) logger.debug("Estimated location: {}".format(extracted_location)) # OSM API params = { 'q': extracted_location, 'format': 'json', 'addressdetails': 1, 'limit': 1 } response = requests.get('https://nominatim.openstreetmap.org/search', params=params, headers={'User-Agent': 'App'}) list_data = response.json() if (len(list_data) > 0): data = list_data[0] location_url = "https://openstreetmap.org/{}/{}".format(data.get("osm_type"), data.get("osm_id")) else: location_url = None ########################################### # Parse generated content summary, five_w = "", "" for k, v in generated_content_dict.items(): if ("summary" in k.lower()): summary = v if type(v) is str else "\n".join(summary) else: five_w += "{}: {}\n".format(k.capitalize(), v if type(v) is str else ". ".join(v) ) # Aggregate generated content generated_content = "{}\n\n{}".format(summary, five_w) ################################################################################################ if (url_content.image_main_url is None) or (requests.get(url_content.image_main_url).status_code != 200): # Invalid main image -> Search for one photo_query = "Mountain landscape" photo_url = self._get_photo_url(photo_query) else: photo_url = url_content.image_main_url # HTML: Generate content html_data = "".join([ "
{}
".format(t) for t in generated_content.split("\n") ]) # HTML: Add location if available if (location_url is not None): html_data += ''.format(location_url) # HTML: Add source html_data += ''.format(url.url, url_content.url_host.replace("https://", "")) post_data = { # "slug": "hey-short", "title": url_content.title, "html": html_data, #"meta_title": "", #"meta_description": "", "feature_image": photo_url, #"feature_image_caption": "", "status": "published", "tags": ["#url-id-{}".format(url_id)] # Hidden tag with associated URL ID } # Publish post payload = self._create_ghost_post(post_data) logger.debug("Ghost payload: {}".format(str(payload))) ''' # Return a response (you can customize this as needed) return HttpResponse(f"""URL ID: {id_url}
URL: {url.url}
Title: {url_content.title}
Description: {url_content.description}
Content: {url_content.content}
Valid content: {url_content.valid_content}
Language: {url_content.language}
Main image: {url_content.image_main_url}
Generated summary: {article_summary}
""") '''