Schools NL tuples, traceback on notify err

2025-10-14 11:33:17 +02:00
parent f44b784715
commit 7924857fe5
2 changed files with 104 additions and 7 deletions
--- a/app_urls/fetcher/src/notifier.py
+++ b/app_urls/fetcher/src/notifier.py
@@ -4,6 +4,7 @@ from ..models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDup
 from django.db.models import Count
 import requests
 import os
+import traceback
 from .logger import get_logger
 logger = get_logger()

@@ -57,7 +58,7 @@ def notify_telegram_info(last_hours, channel="INFO"):
        # POST
        response = requests.post(url, params=params)
    except Exception as e:
-        logger.info("Exception while notifying status: {}".format(str(e)))
+        logger.info("Exception while notifying status: {}\n{}".format(str(e), traceback.format_exc()))


 def notify_telegram_warning(last_hours, channel="WARNING"):
@@ -142,7 +143,7 @@ def notify_telegram_warning(last_hours, channel="WARNING"):
            # POST
            response = requests.post(url, params=params)
    except Exception as e:
-        logger.info("Exception while notifying status: {}".format(str(e)))
+        logger.info("Exception while notifying status: {}\n{}".format(str(e)), traceback.format_exc())


 def notify_telegram(last_hours=12):
--- a/utils/Schools-NL.ipynb
+++ b/utils/Schools-NL.ipynb
@@ -14,7 +14,6 @@
    "import json\n",
    "import csv\n",
    "\n",
-    "\n",
    "headers = {\"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"}"
   ]
  },
@@ -329,13 +328,22 @@
    "    main()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "df = pd.read_csv(\"scholenopdekaart.csv\", index_col=0)\n",
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv(\"~/Downloads/scholenopdekaart.csv\", index_col=0)\n",
    "\n",
    "df.head()"
   ]
@@ -346,13 +354,101 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "df.tail()"
+    "def to_dict(row):\n",
+    "    # Empty?\n",
+    "    if (pd.isna(row)):\n",
+    "        return {}\n",
+    "    # Evaluate, to dict\n",
+    "    dict_data = dict(eval(row))\n",
+    "    # Remove None values\n",
+    "    for k in list(dict_data.keys()):\n",
+    "        if dict_data[k] is None:\n",
+    "            del dict_data[k]\n",
+    "    # Prefix\n",
+    "    return {f\"{column}_{k}\": v for k, v in dict_data.items()}\n",
+    "\n",
+    "for column in [\"students_per_year_trend\", \"num_students_per_group\", \"num_students_per_age\"]:\n",
+    "    print(column)\n",
+    "    # Convert the list of tuples into a dictionary per row\n",
+    "    df_dicts = df[column].apply(to_dict)\n",
+    "    # Expand into separate columns\n",
+    "    df_expanded = pd.json_normalize(df_dicts)\n",
+    "    # Sort\n",
+    "    df_expanded = df_expanded[sorted(df_expanded.columns)]\n",
+    "    # Combine with original columns\n",
+    "    df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def to_dict(row):\n",
+    "    # Empty?\n",
+    "    if (pd.isna(row)):\n",
+    "        return {}\n",
+    "    # Evaluate, to dict\n",
+    "    data = eval(row)\n",
+    "    # Remove first useless data\n",
+    "    data = data[1:]\n",
+    "\n",
+    "    # Generate dict\n",
+    "    dict_data = {}\n",
+    "    for (zipcode, num, percentage) in data:\n",
+    "        dict_data[f\"num_students_zipcode_{zipcode}\"] = num\n",
+    "        dict_data[f\"percentage_students_zipcode_{zipcode}\"] = percentage\n",
+    "\n",
+    "    # Remove None values\n",
+    "    for k in list(dict_data.keys()):\n",
+    "        if dict_data[k] is None:\n",
+    "            del dict_data[k]\n",
+    "    return dict_data\n",
+    "\n",
+    "for column in [\"students_per_zipcode\"]:\n",
+    "    print(column)\n",
+    "    # Convert the list of tuples into a dictionary per row\n",
+    "    df_dicts = df[column].apply(to_dict)\n",
+    "    # Expand into separate columns\n",
+    "    df_expanded = pd.json_normalize(df_dicts)\n",
+    "    # Sort\n",
+    "    df_expanded = df_expanded[sorted(df_expanded.columns)]\n",
+    "    # Combine with original columns\n",
+    "    df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(\"schools_nl.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(df.columns)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "matitos_urls",
+   "display_name": "fetcher",
   "language": "python",
   "name": "python3"
  },
@@ -366,7 +462,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
+   "version": "3.12.11"
  }
 },
 "nbformat": 4,