Urls source search, cleaning code
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from .db_utils import DB_Handler
|
||||
from ..models import Search
|
||||
from ..models import Search, Source
|
||||
import feedparser
|
||||
import dateutil
|
||||
import traceback
|
||||
@@ -14,16 +14,19 @@ class FetchFeeds():
|
||||
try:
|
||||
logger.debug("Starting FetchFeeds.run()")
|
||||
|
||||
# Get feeds
|
||||
list_url_feeds = list(Search.objects.filter(type=Search.TYPE_ENUM.RSS_FEED).values_list('search', flat=True))
|
||||
logger.debug("Fetching from feeds: {}".format(list_url_feeds))
|
||||
# Get source object
|
||||
obj_source, created = Source.objects.get_or_create(source="feeds")
|
||||
|
||||
# Get feeds objects
|
||||
list_obj_search_feeds = Search.objects.filter(type=Search.TYPE_ENUM.RSS_FEED)
|
||||
logger.debug("Fetching from feeds: {}".format([e.search for e in list_obj_search_feeds]))
|
||||
|
||||
# Process via RSS feeds
|
||||
for url_feed in list_url_feeds:
|
||||
for obj_search in list_obj_search_feeds:
|
||||
# Initialize
|
||||
urls_fetched, urls_publish_date = [], []
|
||||
# Fetch feeds
|
||||
feeds = feedparser.parse(url_feed)
|
||||
feeds = feedparser.parse(obj_search.search)
|
||||
# Parse
|
||||
for f in feeds.get("entries", []):
|
||||
# Get URL
|
||||
@@ -41,10 +44,8 @@ class FetchFeeds():
|
||||
urls_publish_date.append(publish_date_parsed)
|
||||
# URL
|
||||
urls_fetched.append(url)
|
||||
|
||||
# URL fetching source
|
||||
source = "feed {}".format(url_feed)
|
||||
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(urls_fetched, source)
|
||||
DB_Handler().insert_raw_urls(urls_fetched, obj_source, obj_search)
|
||||
except Exception as e:
|
||||
logger.warning("Exception in FetchFeeds.run(): {}\n{}".format(e, traceback.format_exc()))
|
||||
|
||||
Reference in New Issue
Block a user