unquote google general search

This commit is contained in:
Luciano Gervasoni
2025-07-03 13:52:18 +02:00
parent 969e08e84a
commit 80f40e1a74

View File

@@ -1,6 +1,7 @@
import time
import feedparser
import os
from urllib.parse import unquote
from ..models import Search, Source
from .fetch_utils_gnews import decode_gnews_urls
from .logger import get_logger
@@ -208,7 +209,10 @@ class SearchGoogleGeneral(FetcherAbstract):
# Links
for l in links:
# 'link': 'https://uk.news.yahoo.com/leaving-neverland-2-michael-jackson-lawyer-channel-4-102017088.html&ved=2ahUKEwjl38eJm5aMAxVvqJUCHXgnGzwQxfQBegQICRAC&usg=AOvVaw1osa6b3o_xXfcNinMDpLoK'
set_links.add( l.get("link").split("&ved=")[0] )
url = l.get("link").split("&ved=")[0]
# https://www.foxnews.com/politics%3Fparam%3D446dd5e1 -> https://www.foxnews.com/politics?param=446dd5e1
url = unquote(url)
set_links.add(url)
# Finished?
if (num_before == len(set_links)):
break