from .db_utils import DB_Handler import feedparser import dateutil from .logger import get_logger logger = get_logger() class FetchFeeds(): def __init__(self, db_handler: DB_Handler) -> None: logger.debug("Initializing News feed") self.db_handler = db_handler def run(self): try: logger.debug("Starting NewsFeed.run()") # Get feeds list_url_feeds = self.db_handler._get_feed_urls() logger.debug("Fetching news from feeds: {}".format(str(list_url_feeds))) # Process via RSS feeds for url_feed in list_url_feeds: # Initialize urls_fetched, urls_publish_date = [], [] # Fetch feeds feeds = feedparser.parse(url_feed) # Parse for f in feeds.get("entries", []): # Get URL url = f.get("link", None) # Process? if (url is not None): # Available publish date? publish_date_parsed = f.get("published_parsed") if (publish_date_parsed is None): publish_date = f.get("published", None) if (publish_date is not None): publish_date_parsed = dateutil.parser.parse(publish_date) # Published date urls_publish_date.append(publish_date_parsed) # URL urls_fetched.append(url) # URL fetching source source = "feed {}".format(url_feed) # Write to DB self.db_handler.write_batch(urls_fetched, source) except Exception as e: logger.warning("Exception in NewsFeed.run(): {}".format(str(e)))