Files
matitos_news/app_urls/api/src/fetch_feed.py
2025-03-12 17:56:40 +01:00

51 lines
1.9 KiB
Python

from .db_utils import DB_Handler
from ..models import Feed
import feedparser
import dateutil
import traceback
from .logger import get_logger
logger = get_logger()
class FetchFeeds():
def __init__(self) -> None:
logger.debug("Initializing News feed")
def run(self):
try:
logger.debug("Starting NewsFeed.run()")
# Get feeds
list_url_feeds = list(Feed.objects.values_list('rss_feed', flat=True))
logger.debug("Fetching news from feeds: {}".format(list_url_feeds))
# Process via RSS feeds
for url_feed in list_url_feeds:
# Initialize
urls_fetched, urls_publish_date = [], []
# Fetch feeds
feeds = feedparser.parse(url_feed)
# Parse
for f in feeds.get("entries", []):
# Get URL
url = f.get("link", None)
# Process?
if (url is not None):
# Available publish date?
publish_date_parsed = f.get("published_parsed")
if (publish_date_parsed is None):
publish_date = f.get("published", None)
if (publish_date is not None):
publish_date_parsed = dateutil.parser.parse(publish_date)
# Published date
urls_publish_date.append(publish_date_parsed)
# URL
urls_fetched.append(url)
# URL fetching source
source = "feed {}".format(url_feed)
# Write to DB
DB_Handler().insert_raw_urls(urls_fetched, source)
except Exception as e:
logger.warning("Exception in NewsFeed.run(): {}\n{}".format(e, traceback.format_exc()))