52 lines
2.0 KiB
Python
52 lines
2.0 KiB
Python
from .db_utils import DB_Handler
|
|
from ..models import Search, Source
|
|
import feedparser
|
|
import dateutil
|
|
import traceback
|
|
from .logger import get_logger
|
|
logger = get_logger()
|
|
|
|
class FetchFeeds():
|
|
def __init__(self) -> None:
|
|
logger.debug("Initializing Fetcher Feeds")
|
|
|
|
def run(self):
|
|
try:
|
|
logger.debug("Starting FetchFeeds.run()")
|
|
|
|
# Get source object
|
|
obj_source, created = Source.objects.get_or_create(source="feeds")
|
|
|
|
# Get feeds objects
|
|
list_obj_search_feeds = Search.objects.filter(type=Search.TYPE_ENUM.RSS_FEED)
|
|
logger.debug("Fetching from feeds: {}".format([e.search for e in list_obj_search_feeds]))
|
|
|
|
# Process via RSS feeds
|
|
for obj_search in list_obj_search_feeds:
|
|
# Initialize
|
|
urls_fetched, urls_publish_date = [], []
|
|
# Fetch feeds
|
|
feeds = feedparser.parse(obj_search.search)
|
|
# Parse
|
|
for f in feeds.get("entries", []):
|
|
# Get URL
|
|
url = f.get("link", None)
|
|
# Process?
|
|
if (url is not None):
|
|
# Available publish date?
|
|
publish_date_parsed = f.get("published_parsed")
|
|
if (publish_date_parsed is None):
|
|
publish_date = f.get("published", None)
|
|
if (publish_date is not None):
|
|
publish_date_parsed = dateutil.parser.parse(publish_date)
|
|
|
|
# Published date
|
|
urls_publish_date.append(publish_date_parsed)
|
|
# URL
|
|
urls_fetched.append(url)
|
|
|
|
# Write to DB
|
|
DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
|
|
except Exception as e:
|
|
logger.warning("Exception in FetchFeeds.run(): {}\n{}".format(e, traceback.format_exc()))
|