from .db_utils import DB_Handler from ..models import Feed import feedparser import dateutil import traceback from .logger import get_logger logger = get_logger() class FetchFeeds(): def __init__(self) -> None: logger.debug("Initializing News feed") def run(self): try: logger.debug("Starting NewsFeed.run()") # Get feeds list_url_feeds = list(Feed.objects.values_list('rss_feed', flat=True)) logger.debug("Fetching news from feeds: {}".format(list_url_feeds)) # Process via RSS feeds for url_feed in list_url_feeds: # Initialize urls_fetched, urls_publish_date = [], [] # Fetch feeds feeds = feedparser.parse(url_feed) # Parse for f in feeds.get("entries", []): # Get URL url = f.get("link", None) # Process? if (url is not None): # Available publish date? publish_date_parsed = f.get("published_parsed") if (publish_date_parsed is None): publish_date = f.get("published", None) if (publish_date is not None): publish_date_parsed = dateutil.parser.parse(publish_date) # Published date urls_publish_date.append(publish_date_parsed) # URL urls_fetched.append(url) # URL fetching source source = "feed {}".format(url_feed) # Write to DB DB_Handler().insert_raw_urls(urls_fetched, source) except Exception as e: logger.warning("Exception in NewsFeed.run(): {}\n{}".format(e, traceback.format_exc()))