Publish with hidden tag, don't publish if url id already processed
This commit is contained in:
@@ -12,7 +12,8 @@ logger = get_logger()
|
|||||||
|
|
||||||
class Publisher():
|
class Publisher():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
self.admin_api_url = os.getenv("GHOST_ADMIN_API_URL")
|
||||||
|
self.admin_api_key = os.getenv("GHOST_ADMIN_API_KEY")
|
||||||
|
|
||||||
def _create_jwt(self, admin_api_key):
|
def _create_jwt(self, admin_api_key):
|
||||||
id_, secret = admin_api_key.split(':')
|
id_, secret = admin_api_key.split(':')
|
||||||
@@ -29,9 +30,7 @@ class Publisher():
|
|||||||
|
|
||||||
def _create_ghost_post(self, post_data):
|
def _create_ghost_post(self, post_data):
|
||||||
# Get token
|
# Get token
|
||||||
jwt_token = self._create_jwt(os.getenv("GHOST_ADMIN_API_KEY"))
|
jwt_token = self._create_jwt(self.admin_api_key)
|
||||||
# Get Admin API URL
|
|
||||||
admin_api_url = os.getenv("GHOST_ADMIN_API_URL")
|
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'Authorization': f'Ghost {jwt_token}',
|
'Authorization': f'Ghost {jwt_token}',
|
||||||
@@ -41,7 +40,7 @@ class Publisher():
|
|||||||
post_data = {"posts": [post_data]}
|
post_data = {"posts": [post_data]}
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
os.path.join(admin_api_url, "posts"),
|
os.path.join(self.admin_api_url, "posts"),
|
||||||
json=post_data,
|
json=post_data,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
params={"source":"html"}
|
params={"source":"html"}
|
||||||
@@ -53,6 +52,27 @@ class Publisher():
|
|||||||
else:
|
else:
|
||||||
logger.warning("Ghost - Failed to publish post: {} {}".format(response.status_code, response.text))
|
logger.warning("Ghost - Failed to publish post: {} {}".format(response.status_code, response.text))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _published_url_id(self, url_id):
|
||||||
|
# Get token
|
||||||
|
jwt_token = self._create_jwt(self.admin_api_key)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Ghost {jwt_token}',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Query param filter by URL ID
|
||||||
|
params = {"filter": "tags:hash-url-id-{}".format(url_id)}
|
||||||
|
# Get posts using filter
|
||||||
|
response = requests.get(os.path.join(self.admin_api_url, "posts"), params=params, headers=headers)
|
||||||
|
# To JSON
|
||||||
|
dict_response = response.json()
|
||||||
|
|
||||||
|
if (len(dict_response.get("posts")) > 0):
|
||||||
|
return True
|
||||||
|
else
|
||||||
|
return False
|
||||||
|
|
||||||
def _get_photo_url(self, query):
|
def _get_photo_url(self, query):
|
||||||
# TODO: Get already used photos to skip. Use DB
|
# TODO: Get already used photos to skip. Use DB
|
||||||
@@ -101,6 +121,11 @@ class Publisher():
|
|||||||
logger.warning("Ghost - URL Content is not valid for URL ID: {} {}".format(url_id, url.url))
|
logger.warning("Ghost - URL Content is not valid for URL ID: {} {}".format(url_id, url.url))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# URL ID already published?
|
||||||
|
if (self._published_url_id(url_id)):
|
||||||
|
logger.info("Ghost - URL ID {} already published, skipping".format(url_id))
|
||||||
|
return
|
||||||
|
|
||||||
###########################################
|
###########################################
|
||||||
client_llm = OllamaClient()
|
client_llm = OllamaClient()
|
||||||
# Model
|
# Model
|
||||||
@@ -160,7 +185,7 @@ class Publisher():
|
|||||||
if (location_url is not None):
|
if (location_url is not None):
|
||||||
html_data += '<p><a href="{}">Estimated location</a></p>'.format(location_url)
|
html_data += '<p><a href="{}">Estimated location</a></p>'.format(location_url)
|
||||||
# HTML: Add source
|
# HTML: Add source
|
||||||
html_data += '<p><a href="{}">Source</a></p>'.format(url.url)
|
html_data += '<p><a href="{}">Source: {}</a></p>'.format(url.url, url_content.url_host.replace("https://", ""))
|
||||||
|
|
||||||
post_data = {
|
post_data = {
|
||||||
# "slug": "hey-short",
|
# "slug": "hey-short",
|
||||||
@@ -171,6 +196,7 @@ class Publisher():
|
|||||||
"feature_image": photo_url,
|
"feature_image": photo_url,
|
||||||
#"feature_image_caption": "",
|
#"feature_image_caption": "",
|
||||||
"status": "published",
|
"status": "published",
|
||||||
|
"tags": ["#url-id-{}".format(url_id)] # Hidden tag with associated URL ID
|
||||||
}
|
}
|
||||||
|
|
||||||
# Publish post
|
# Publish post
|
||||||
|
|||||||
20
utils/Newspapers.ipynb
Normal file
20
utils/Newspapers.ipynb
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"url = \"https://onlinenewspapers.com/index.shtml\""
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user