Working fetch search, refactoring DB towards source search
This commit is contained in:
@@ -2,15 +2,80 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import newspaper\n",
|
||||
"url = \"http://www.missingkids.org/poster/NCMC/2045193/1\"\n",
|
||||
"#url = \"https://www.missingkids.org/new-poster/NCMC/2045193/1\"\n",
|
||||
"# !pip install git+https://github.com/tasos-py/Search-Engines-Scraper.git\n",
|
||||
"import search_engines\n",
|
||||
"\n",
|
||||
"art = newspaper.article(url)"
|
||||
"engine = search_engines.Bing()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Searching Bing \n",
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = engine.search('news: \"child abuse\"', pages=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine = search_engines.search_engines_dict[\"brave\"]()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Searching Brave \n",
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = 'news: child abuse'\n",
|
||||
"r = engine.search(query, pages=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'_results': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"r.__dict__"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -18,8 +83,57 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"newspaper.exceptions.ArticleBinaryDataException"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"art.__dict__"
|
||||
"import newspaper\n",
|
||||
"newspaper.ArticleBinaryDataException"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"import newspaper\n",
|
||||
"\n",
|
||||
"url = 'https://www.missingkids.org/poster/USVA/VA25-0820/1'\n",
|
||||
"art_1 = newspaper.article(url)\n",
|
||||
"url = 'https://www.missingkids.org/poster/NCMC/2045193/1'\n",
|
||||
"art_2 = newspaper.article(url)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,15 +158,8 @@
|
||||
"l = client.list()\n",
|
||||
"list_models = [m.get(\"model\") for m in l.model_dump().get(\"models\")]\n",
|
||||
"\n",
|
||||
"list_models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(list_models)\n",
|
||||
"\n",
|
||||
"for m in list_models:\n",
|
||||
" context_key = [ k for k in client.show(m).model_dump().get(\"modelinfo\").keys() if \"context_length\" in k]\n",
|
||||
" if (len(context_key) != 1):\n",
|
||||
|
||||
Reference in New Issue
Block a user