from .db_utils import DB_Handler from ..models import Search, Source import feedparser import dateutil import traceback from .logger import get_logger logger = get_logger() class FetchFeeds(): def __init__(self) -> None: logger.debug("Initializing Fetcher Feeds") def run(self): try: logger.debug("Starting FetchFeeds.run()") # Get source object obj_source, created = Source.objects.get_or_create(source="feeds") # Get feeds objects list_obj_search_feeds = Search.objects.filter(type=Search.TYPE_ENUM.RSS_FEED) logger.debug("Fetching from feeds: {}".format([e.search for e in list_obj_search_feeds])) # Process via RSS feeds for obj_search in list_obj_search_feeds: # Initialize urls_fetched, urls_publish_date = [], [] # Fetch feeds feeds = feedparser.parse(obj_search.search) # Parse for f in feeds.get("entries", []): # Get URL url = f.get("link", None) # Process? if (url is not None): # Available publish date? publish_date_parsed = f.get("published_parsed") if (publish_date_parsed is None): publish_date = f.get("published", None) if (publish_date is not None): publish_date_parsed = dateutil.parser.parse(publish_date) # Published date urls_publish_date.append(publish_date_parsed) # URL urls_fetched.append(url) # Write to DB DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search) except Exception as e: logger.warning("Exception in FetchFeeds.run(): {}\n{}".format(e, traceback.format_exc()))