{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import newspaper\n", "url = \"http://www.missingkids.org/poster/NCMC/2045193/1\"\n", "#url = \"https://www.missingkids.org/new-poster/NCMC/2045193/1\"\n", "\n", "art = newspaper.article(url)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "art.__dict__" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import ollama\n", "\n", "#model = \"llama3.2:1b\"\n", "client = ollama.Client(\n", " host = 'https://ollamamodel.matitos.org',\n", ")\n", "l = client.list()\n", "list_models = [m.get(\"model\") for m in l.model_dump().get(\"models\")]\n", "\n", "list_models" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for m in list_models:\n", " context_key = [ k for k in client.show(m).model_dump().get(\"modelinfo\").keys() if \"context_length\" in k]\n", " if (len(context_key) != 1):\n", " print(\"Problem!!!\")\n", " print(m, client.show(m).model_dump().get(\"modelinfo\").get(context_key[0]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "text = \"...\"\n", "model = \"falcon3:1b\"\n", "\n", "msg_content = {\n", " \"role\": \"user\", \n", " \"content\": text,\n", "}\n", "response = client.chat(model=model, messages=[msg_content], stream=False)\n", "print(response[\"message\"][\"content\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import cv2\n", "import base64\n", "import numpy as np\n", "\n", "endpoint = \"http://192.168.2.64:12343/image\"\n", "\n", "\n", "\n", "prompt = \"Majestic mountain landscape with snow-capped peaks, autumn foliage in vibrant reds and oranges, a turquoise river winding through a valley, crisp and serene atmosphere, ultra-realistic style.\"\n", "prompt = \"A group of kids happily playing in a joy environment\"\n", "#prompt = \"A bitcoin behaving like a king, surrounded by small alternative coins. Detailed, geometric style\"\n", "\n", "json = {\n", " \"prompt\": prompt,\n", " \"num_inference_steps\": 10,\n", " \"size\": \"512x512\",\n", " \"seed\": 123456,\n", "}\n", "\n", "for inf_step in [1, 4, 10, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]:\n", " json[\"num_inference_steps\"] = inf_step\n", "\n", " %time r = requests.post(endpoint, json=json)\n", " print(\"Status code\", r.status_code)\n", "\n", " # Image\n", " png_as_np = np.frombuffer(base64.b64decode(r.text), dtype=np.uint8)\n", " image_bgr = cv2.imdecode(png_as_np, cv2.IMREAD_COLOR)\n", "\n", " cv2.imwrite(\"sample_img_{}.png\".format(json[\"num_inference_steps\"]), image_bgr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !pip install trafilatura\n", "import trafilatura\n", "from pprint import pprint\n", "\n", "url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n", "# url = \"https://www.missingkids.org/poster/USVA/VA25-0820/1\"\n", "url = \"https://www.bloomberg.com/news/articles/2025-03-12/eu-launches-metals-tariff-retaliation-on-26-billion-of-us-goods\"\n", "\n", "# Fetch\n", "doc = trafilatura.fetch_url(url)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Content & metadata\n", "metadata = trafilatura.extract_metadata(doc)\n", "content = trafilatura.extract(doc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pprint(metadata.as_dict())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(content)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !pip install newspaper4k\n", "# !pip install langdetect \n", "import newspaper\n", "import langdetect\n", "langdetect.DetectorFactory.seed = 0\n", "\n", "\n", "\n", "# url = \"https://www.missingkids.org/poster/USVA/VA25-0820/1\"\n", "#url = \"https://www.waff.com/2025/03/11/colbert-heights-high-school-employee-arrested-child-abuse/\"\n", "\n", "\n", "\n", "#url = \"https://www.bloomberg.com/news/articles/2025-03-12/eu-launches-metals-tariff-retaliation-on-26-billion-of-us-goods\"\n", "\n", "\n", "url = \"https://apnews.com/article/canada-trump-us-tariffs-steel-2517a6a2baf0596cb1a43d3a7d1e7939\"\n", "url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n", "#url = \"https://www.ft.com/content/6d7c6915-4ceb-43fc-9896-590036b12a87\"\n", "#url = \"https://www.lanacion.com.ar/politica/milei-en-bahia-blanca-un-viaje-sorpresa-para-frenar-las-criticas-y-mostrar-cercania-nid12032025/\"\n", "#url = \"https://www.missingkids.org/poster/NCMC/2043547/1\"\n", "\n", "try:\n", " article = newspaper.article(url)\n", "except newspaper.ArticleException as e:\n", " print(\"ArticleException: {}\".format(str(e)))\n", "except Exception as e:\n", " print(\"Err: {}\".format(str(e)))\n", "\n", "# url_photo = set([i for i in article.images if \"api.missingkids.org/photographs\" in i])\n", "# article.is_valid_url(), article.is_parsed, article.is_media_news(), article.is_valid_body()\n", "article.meta_data\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !pip install news-please\n", "from newsplease import NewsPlease\n", "\n", "url = \"https://variety.com/2025/film/news/gene-hackman-death-suspicious-gas-leak-search-warrant-1236322610/\"\n", "url = \"https://www.bbc.com/news/articles/cewkkkvkzn9o\"\n", "url = \"https://www.foxnews.com/us/utah-mommy-blogger-ruby-franke-power-public-image-allowed-child-abuse-go-unchecked-expert\"\n", "article = NewsPlease.from_url(url)\n", "print(article.title)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(article.maintext)" ] } ], "metadata": { "kernelspec": { "display_name": "matitos", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 2 }