Urls source search, cleaning code

This commit is contained in:
Luciano Gervasoni
2025-03-20 17:19:52 +01:00
parent 05e17266f1
commit f84c7729f8
13 changed files with 241 additions and 300 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -14,25 +14,16 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching Bing \n",
" \r"
]
}
],
"outputs": [],
"source": [
"results = engine.search('news: \"child abuse\"', pages=2)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -41,18 +32,9 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching Brave \n",
" \r"
]
}
],
"outputs": [],
"source": [
"query = 'news: child abuse'\n",
"r = engine.search(query, pages=2)"
@@ -60,20 +42,9 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'_results': []}"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"r.__dict__"
]
@@ -87,20 +58,9 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"newspaper.exceptions.ArticleBinaryDataException"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"import newspaper\n",
"newspaper.ArticleBinaryDataException"