unquote google general search
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import time
|
import time
|
||||||
import feedparser
|
import feedparser
|
||||||
import os
|
import os
|
||||||
|
from urllib.parse import unquote
|
||||||
from ..models import Search, Source
|
from ..models import Search, Source
|
||||||
from .fetch_utils_gnews import decode_gnews_urls
|
from .fetch_utils_gnews import decode_gnews_urls
|
||||||
from .logger import get_logger
|
from .logger import get_logger
|
||||||
@@ -208,7 +209,10 @@ class SearchGoogleGeneral(FetcherAbstract):
|
|||||||
# Links
|
# Links
|
||||||
for l in links:
|
for l in links:
|
||||||
# 'link': 'https://uk.news.yahoo.com/leaving-neverland-2-michael-jackson-lawyer-channel-4-102017088.html&ved=2ahUKEwjl38eJm5aMAxVvqJUCHXgnGzwQxfQBegQICRAC&usg=AOvVaw1osa6b3o_xXfcNinMDpLoK'
|
# 'link': 'https://uk.news.yahoo.com/leaving-neverland-2-michael-jackson-lawyer-channel-4-102017088.html&ved=2ahUKEwjl38eJm5aMAxVvqJUCHXgnGzwQxfQBegQICRAC&usg=AOvVaw1osa6b3o_xXfcNinMDpLoK'
|
||||||
set_links.add( l.get("link").split("&ved=")[0] )
|
url = l.get("link").split("&ved=")[0]
|
||||||
|
# https://www.foxnews.com/politics%3Fparam%3D446dd5e1 -> https://www.foxnews.com/politics?param=446dd5e1
|
||||||
|
url = unquote(url)
|
||||||
|
set_links.add(url)
|
||||||
# Finished?
|
# Finished?
|
||||||
if (num_before == len(set_links)):
|
if (num_before == len(set_links)):
|
||||||
break
|
break
|
||||||
|
|||||||
Reference in New Issue
Block a user