Files
matitos_news/app_urls/fetcher/src/fetch_utils.py

39 lines
1.7 KiB
Python

import os
import time
from django.core.cache import cache
from .logger import get_logger
logger = get_logger()
from googlenewsdecoder import gnewsdecoder
def decode_gnews_urls(encoded_urls, interval=float(os.getenv("FETCHER_GNEWS_DECODE_SLEEP", 2))):
logger.debug("Decoding gnews URLs")
# DecodeURLs
list_decoded_urls = []
for url in encoded_urls:
# Already cached?
decoded_url = cache.get("gnews_decode_{}".format(url))
if (decoded_url is not None):
logger.debug("Already cached decoded URL: {} -> {}".format(url, decoded_url))
# Append decoded URL
list_decoded_urls.append(decoded_url)
else:
try:
# Decode URL, with interval time to avoid block
decoded_url_dict = gnewsdecoder(url, interval=interval)
# Ok?
if decoded_url_dict.get("status"):
# Append decoded URL
decoded_url = decoded_url_dict["decoded_url"]
list_decoded_urls.append(decoded_url)
# Cache decoded URL
cache.set("gnews_decode_{}".format(url), decoded_url, timeout=60*60*12)
elif ("429 Client Error: Too Many Requests for url" in decoded_url_dict.get("message")):
logger.debug("Decoding news.google.com - 429 Too many requests: {}".format(decoded_url_dict.get("message")))
# time.sleep()
else:
logger.info("Decoding news.google.com - Bad status for URL {}\n{}".format(url, decoded_url_dict.get("message")))
except Exception as e:
logger.warning("Error decoding news.google.com, URL: {}".format(url))
return list_decoded_urls