39 lines
1.7 KiB
Python
39 lines
1.7 KiB
Python
import os
|
|
import time
|
|
from django.core.cache import cache
|
|
from .logger import get_logger
|
|
logger = get_logger()
|
|
from googlenewsdecoder import gnewsdecoder
|
|
|
|
|
|
def decode_gnews_urls(encoded_urls, interval=float(os.getenv("FETCHER_GNEWS_DECODE_SLEEP", 2))):
|
|
logger.debug("Decoding gnews URLs")
|
|
# DecodeURLs
|
|
list_decoded_urls = []
|
|
for url in encoded_urls:
|
|
# Already cached?
|
|
decoded_url = cache.get("gnews_decode_{}".format(url))
|
|
|
|
if (decoded_url is not None):
|
|
logger.debug("Already cached decoded URL: {} -> {}".format(url, decoded_url))
|
|
# Append decoded URL
|
|
list_decoded_urls.append(decoded_url)
|
|
else:
|
|
try:
|
|
# Decode URL, with interval time to avoid block
|
|
decoded_url_dict = gnewsdecoder(url, interval=interval)
|
|
# Ok?
|
|
if decoded_url_dict.get("status"):
|
|
# Append decoded URL
|
|
decoded_url = decoded_url_dict["decoded_url"]
|
|
list_decoded_urls.append(decoded_url)
|
|
# Cache decoded URL
|
|
cache.set("gnews_decode_{}".format(url), decoded_url, timeout=60*60*12)
|
|
elif ("429 Client Error: Too Many Requests for url" in decoded_url_dict.get("message")):
|
|
logger.debug("Decoding news.google.com - 429 Too many requests: {}".format(decoded_url_dict.get("message")))
|
|
# time.sleep()
|
|
else:
|
|
logger.info("Decoding news.google.com - Bad status for URL {}\n{}".format(url, decoded_url_dict.get("message")))
|
|
except Exception as e:
|
|
logger.warning("Error decoding news.google.com, URL: {}".format(url))
|
|
return list_decoded_urls |