diff --git a/app_urls/fetcher/src/fetch_search_instances.py b/app_urls/fetcher/src/fetch_search_instances.py index 5c41e22..b33bd81 100644 --- a/app_urls/fetcher/src/fetch_search_instances.py +++ b/app_urls/fetcher/src/fetch_search_instances.py @@ -1,6 +1,7 @@ import time import feedparser import os +from urllib.parse import unquote from ..models import Search, Source from .fetch_utils_gnews import decode_gnews_urls from .logger import get_logger @@ -208,7 +209,10 @@ class SearchGoogleGeneral(FetcherAbstract): # Links for l in links: # 'link': 'https://uk.news.yahoo.com/leaving-neverland-2-michael-jackson-lawyer-channel-4-102017088.html&ved=2ahUKEwjl38eJm5aMAxVvqJUCHXgnGzwQxfQBegQICRAC&usg=AOvVaw1osa6b3o_xXfcNinMDpLoK' - set_links.add( l.get("link").split("&ved=")[0] ) + url = l.get("link").split("&ved=")[0] + # https://www.foxnews.com/politics%3Fparam%3D446dd5e1 -> https://www.foxnews.com/politics?param=446dd5e1 + url = unquote(url) + set_links.add(url) # Finished? if (num_before == len(set_links)): break