diff --git a/app_urls/fetcher/src/notifier.py b/app_urls/fetcher/src/notifier.py index 86d245a..8d92292 100644 --- a/app_urls/fetcher/src/notifier.py +++ b/app_urls/fetcher/src/notifier.py @@ -4,6 +4,7 @@ from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDup from django.db.models import Count import requests import os +import traceback from .logger import get_logger logger = get_logger() @@ -57,7 +58,7 @@ def notify_telegram_info(last_hours, channel="INFO"): # POST response = requests.post(url, params=params) except Exception as e: - logger.info("Exception while notifying status: {}".format(str(e))) + logger.info("Exception while notifying status: {}\n{}".format(str(e), traceback.format_exc())) def notify_telegram_warning(last_hours, channel="WARNING"): @@ -142,7 +143,7 @@ def notify_telegram_warning(last_hours, channel="WARNING"): # POST response = requests.post(url, params=params) except Exception as e: - logger.info("Exception while notifying status: {}".format(str(e))) + logger.info("Exception while notifying status: {}\n{}".format(str(e)), traceback.format_exc()) def notify_telegram(last_hours=12): diff --git a/utils/Schools-NL.ipynb b/utils/Schools-NL.ipynb index 823fbdb..8f66aa2 100644 --- a/utils/Schools-NL.ipynb +++ b/utils/Schools-NL.ipynb @@ -14,7 +14,6 @@ "import json\n", "import csv\n", "\n", - "\n", "headers = {\"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"}" ] }, @@ -329,13 +328,22 @@ " main()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv(\"scholenopdekaart.csv\", index_col=0)\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"~/Downloads/scholenopdekaart.csv\", index_col=0)\n", "\n", "df.head()" ] @@ -346,13 +354,101 @@ "metadata": {}, "outputs": [], "source": [ - "df.tail()" + "def to_dict(row):\n", + " # Empty?\n", + " if (pd.isna(row)):\n", + " return {}\n", + " # Evaluate, to dict\n", + " dict_data = dict(eval(row))\n", + " # Remove None values\n", + " for k in list(dict_data.keys()):\n", + " if dict_data[k] is None:\n", + " del dict_data[k]\n", + " # Prefix\n", + " return {f\"{column}_{k}\": v for k, v in dict_data.items()}\n", + "\n", + "for column in [\"students_per_year_trend\", \"num_students_per_group\", \"num_students_per_age\"]:\n", + " print(column)\n", + " # Convert the list of tuples into a dictionary per row\n", + " df_dicts = df[column].apply(to_dict)\n", + " # Expand into separate columns\n", + " df_expanded = pd.json_normalize(df_dicts)\n", + " # Sort\n", + " df_expanded = df_expanded[sorted(df_expanded.columns)]\n", + " # Combine with original columns\n", + " df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def to_dict(row):\n", + " # Empty?\n", + " if (pd.isna(row)):\n", + " return {}\n", + " # Evaluate, to dict\n", + " data = eval(row)\n", + " # Remove first useless data\n", + " data = data[1:]\n", + "\n", + " # Generate dict\n", + " dict_data = {}\n", + " for (zipcode, num, percentage) in data:\n", + " dict_data[f\"num_students_zipcode_{zipcode}\"] = num\n", + " dict_data[f\"percentage_students_zipcode_{zipcode}\"] = percentage\n", + "\n", + " # Remove None values\n", + " for k in list(dict_data.keys()):\n", + " if dict_data[k] is None:\n", + " del dict_data[k]\n", + " return dict_data\n", + "\n", + "for column in [\"students_per_zipcode\"]:\n", + " print(column)\n", + " # Convert the list of tuples into a dictionary per row\n", + " df_dicts = df[column].apply(to_dict)\n", + " # Expand into separate columns\n", + " df_expanded = pd.json_normalize(df_dicts)\n", + " # Sort\n", + " df_expanded = df_expanded[sorted(df_expanded.columns)]\n", + " # Combine with original columns\n", + " df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"schools_nl.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(df.columns)" ] } ], "metadata": { "kernelspec": { - "display_name": "matitos_urls", + "display_name": "fetcher", "language": "python", "name": "python3" }, @@ -366,7 +462,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.12.11" } }, "nbformat": 4,