364 lines
9.2 KiB
Plaintext
364 lines
9.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# !pip install git+https://github.com/tasos-py/Search-Engines-Scraper.git\n",
|
|
"import search_engines\n",
|
|
"\n",
|
|
"engine = search_engines.Bing()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"results = engine.search('news: \"child abuse\"', pages=2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"engine = search_engines.search_engines_dict[\"brave\"]()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"query = 'news: child abuse'\n",
|
|
"r = engine.search(query, pages=2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"r.__dict__"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import newspaper\n",
|
|
"newspaper.ArticleBinaryDataException"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"'''\n",
|
|
"import newspaper\n",
|
|
"\n",
|
|
"url = 'https://www.missingkids.org/poster/USVA/VA25-0820/1'\n",
|
|
"art_1 = newspaper.article(url)\n",
|
|
"url = 'https://www.missingkids.org/poster/NCMC/2045193/1'\n",
|
|
"art_2 = newspaper.article(url)\n",
|
|
"'''"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import ollama\n",
|
|
"\n",
|
|
"#model = \"llama3.2:1b\"\n",
|
|
"client = ollama.Client(\n",
|
|
" host = 'https://ollamamodel.matitos.org',\n",
|
|
")\n",
|
|
"l = client.list()\n",
|
|
"list_models = [m.get(\"model\") for m in l.model_dump().get(\"models\")]\n",
|
|
"\n",
|
|
"print(list_models)\n",
|
|
"\n",
|
|
"for m in list_models:\n",
|
|
" context_key = [ k for k in client.show(m).model_dump().get(\"modelinfo\").keys() if \"context_length\" in k]\n",
|
|
" if (len(context_key) != 1):\n",
|
|
" print(\"Problem!!!\")\n",
|
|
" print(m, client.show(m).model_dump().get(\"modelinfo\").get(context_key[0]))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"text = \"...\"\n",
|
|
"model = \"falcon3:1b\"\n",
|
|
"\n",
|
|
"msg_content = {\n",
|
|
" \"role\": \"user\", \n",
|
|
" \"content\": text,\n",
|
|
"}\n",
|
|
"response = client.chat(model=model, messages=[msg_content], stream=False)\n",
|
|
"print(response[\"message\"][\"content\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import requests\n",
|
|
"import cv2\n",
|
|
"import base64\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"endpoint = \"http://192.168.2.64:12343/image\"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"prompt = \"Majestic mountain landscape with snow-capped peaks, autumn foliage in vibrant reds and oranges, a turquoise river winding through a valley, crisp and serene atmosphere, ultra-realistic style.\"\n",
|
|
"prompt = \"A group of kids happily playing in a joy environment\"\n",
|
|
"#prompt = \"A bitcoin behaving like a king, surrounded by small alternative coins. Detailed, geometric style\"\n",
|
|
"\n",
|
|
"json = {\n",
|
|
" \"prompt\": prompt,\n",
|
|
" \"num_inference_steps\": 10,\n",
|
|
" \"size\": \"512x512\",\n",
|
|
" \"seed\": 123456,\n",
|
|
"}\n",
|
|
"\n",
|
|
"for inf_step in [1, 4, 10, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]:\n",
|
|
" json[\"num_inference_steps\"] = inf_step\n",
|
|
"\n",
|
|
" %time r = requests.post(endpoint, json=json)\n",
|
|
" print(\"Status code\", r.status_code)\n",
|
|
"\n",
|
|
" # Image\n",
|
|
" png_as_np = np.frombuffer(base64.b64decode(r.text), dtype=np.uint8)\n",
|
|
" image_bgr = cv2.imdecode(png_as_np, cv2.IMREAD_COLOR)\n",
|
|
"\n",
|
|
" cv2.imwrite(\"sample_img_{}.png\".format(json[\"num_inference_steps\"]), image_bgr)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# !pip install trafilatura\n",
|
|
"import trafilatura\n",
|
|
"from pprint import pprint\n",
|
|
"\n",
|
|
"url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n",
|
|
"# url = \"https://www.missingkids.org/poster/USVA/VA25-0820/1\"\n",
|
|
"url = \"https://www.bloomberg.com/news/articles/2025-03-12/eu-launches-metals-tariff-retaliation-on-26-billion-of-us-goods\"\n",
|
|
"\n",
|
|
"# Fetch\n",
|
|
"doc = trafilatura.fetch_url(url)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Content & metadata\n",
|
|
"metadata = trafilatura.extract_metadata(doc)\n",
|
|
"content = trafilatura.extract(doc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pprint(metadata.as_dict())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(content)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# !pip install newspaper4k\n",
|
|
"# !pip install langdetect \n",
|
|
"import newspaper\n",
|
|
"import langdetect\n",
|
|
"langdetect.DetectorFactory.seed = 0\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# url = \"https://www.missingkids.org/poster/USVA/VA25-0820/1\"\n",
|
|
"#url = \"https://www.waff.com/2025/03/11/colbert-heights-high-school-employee-arrested-child-abuse/\"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"#url = \"https://www.bloomberg.com/news/articles/2025-03-12/eu-launches-metals-tariff-retaliation-on-26-billion-of-us-goods\"\n",
|
|
"\n",
|
|
"\n",
|
|
"url = \"https://apnews.com/article/canada-trump-us-tariffs-steel-2517a6a2baf0596cb1a43d3a7d1e7939\"\n",
|
|
"url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n",
|
|
"#url = \"https://www.ft.com/content/6d7c6915-4ceb-43fc-9896-590036b12a87\"\n",
|
|
"#url = \"https://www.lanacion.com.ar/politica/milei-en-bahia-blanca-un-viaje-sorpresa-para-frenar-las-criticas-y-mostrar-cercania-nid12032025/\"\n",
|
|
"#url = \"https://www.missingkids.org/poster/NCMC/2043547/1\"\n",
|
|
"\n",
|
|
"try:\n",
|
|
" article = newspaper.article(url)\n",
|
|
"except newspaper.ArticleException as e:\n",
|
|
" print(\"ArticleException: {}\".format(str(e)))\n",
|
|
"except Exception as e:\n",
|
|
" print(\"Err: {}\".format(str(e)))\n",
|
|
"\n",
|
|
"# url_photo = set([i for i in article.images if \"api.missingkids.org/photographs\" in i])\n",
|
|
"# article.is_valid_url(), article.is_parsed, article.is_media_news(), article.is_valid_body()\n",
|
|
"article.meta_data\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# !pip install news-please\n",
|
|
"from newsplease import NewsPlease\n",
|
|
"\n",
|
|
"url = \"https://variety.com/2025/film/news/gene-hackman-death-suspicious-gas-leak-search-warrant-1236322610/\"\n",
|
|
"url = \"https://www.bbc.com/news/articles/cewkkkvkzn9o\"\n",
|
|
"url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n",
|
|
"article = NewsPlease.from_url(url)\n",
|
|
"print(article.title)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(article.maintext)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "matitos",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|