import os import time from django.core.cache import cache from .logger import get_logger logger = get_logger() from googlenewsdecoder import gnewsdecoder def decode_gnews_urls(encoded_urls, interval=float(os.getenv("FETCHER_GNEWS_DECODE_SLEEP", 2))): logger.debug("Decoding gnews URLs") # DecodeURLs list_decoded_urls = [] for url in encoded_urls: # Already cached? decoded_url = cache.get("gnews_decode_{}".format(url)) if (decoded_url is not None): logger.debug("Already cached decoded URL: {} -> {}".format(url, decoded_url)) # Append decoded URL list_decoded_urls.append(decoded_url) else: try: # Decode URL, with interval time to avoid block decoded_url_dict = gnewsdecoder(url, interval=interval) # Ok? if decoded_url_dict.get("status"): # Append decoded URL decoded_url = decoded_url_dict["decoded_url"] list_decoded_urls.append(decoded_url) # Cache decoded URL cache.set("gnews_decode_{}".format(url), decoded_url, timeout=60*60*12) elif ("429 Client Error: Too Many Requests for url" in decoded_url_dict.get("message")): logger.debug("Decoding news.google.com - 429 Too many requests: {}".format(decoded_url_dict.get("message"))) # time.sleep() else: logger.info("Decoding news.google.com - Bad status for URL {}\n{}".format(url, decoded_url_dict.get("message"))) except Exception as e: logger.warning("Error decoding news.google.com, URL: {}".format(url)) return list_decoded_urls