Schools NL tuples, traceback on notify err
This commit is contained in:
@@ -14,7 +14,6 @@
|
||||
"import json\n",
|
||||
"import csv\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"headers = {\"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"}"
|
||||
]
|
||||
},
|
||||
@@ -329,13 +328,22 @@
|
||||
" main()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv(\"scholenopdekaart.csv\", index_col=0)\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"~/Downloads/scholenopdekaart.csv\", index_col=0)\n",
|
||||
"\n",
|
||||
"df.head()"
|
||||
]
|
||||
@@ -346,13 +354,101 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.tail()"
|
||||
"def to_dict(row):\n",
|
||||
" # Empty?\n",
|
||||
" if (pd.isna(row)):\n",
|
||||
" return {}\n",
|
||||
" # Evaluate, to dict\n",
|
||||
" dict_data = dict(eval(row))\n",
|
||||
" # Remove None values\n",
|
||||
" for k in list(dict_data.keys()):\n",
|
||||
" if dict_data[k] is None:\n",
|
||||
" del dict_data[k]\n",
|
||||
" # Prefix\n",
|
||||
" return {f\"{column}_{k}\": v for k, v in dict_data.items()}\n",
|
||||
"\n",
|
||||
"for column in [\"students_per_year_trend\", \"num_students_per_group\", \"num_students_per_age\"]:\n",
|
||||
" print(column)\n",
|
||||
" # Convert the list of tuples into a dictionary per row\n",
|
||||
" df_dicts = df[column].apply(to_dict)\n",
|
||||
" # Expand into separate columns\n",
|
||||
" df_expanded = pd.json_normalize(df_dicts)\n",
|
||||
" # Sort\n",
|
||||
" df_expanded = df_expanded[sorted(df_expanded.columns)]\n",
|
||||
" # Combine with original columns\n",
|
||||
" df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def to_dict(row):\n",
|
||||
" # Empty?\n",
|
||||
" if (pd.isna(row)):\n",
|
||||
" return {}\n",
|
||||
" # Evaluate, to dict\n",
|
||||
" data = eval(row)\n",
|
||||
" # Remove first useless data\n",
|
||||
" data = data[1:]\n",
|
||||
"\n",
|
||||
" # Generate dict\n",
|
||||
" dict_data = {}\n",
|
||||
" for (zipcode, num, percentage) in data:\n",
|
||||
" dict_data[f\"num_students_zipcode_{zipcode}\"] = num\n",
|
||||
" dict_data[f\"percentage_students_zipcode_{zipcode}\"] = percentage\n",
|
||||
"\n",
|
||||
" # Remove None values\n",
|
||||
" for k in list(dict_data.keys()):\n",
|
||||
" if dict_data[k] is None:\n",
|
||||
" del dict_data[k]\n",
|
||||
" return dict_data\n",
|
||||
"\n",
|
||||
"for column in [\"students_per_zipcode\"]:\n",
|
||||
" print(column)\n",
|
||||
" # Convert the list of tuples into a dictionary per row\n",
|
||||
" df_dicts = df[column].apply(to_dict)\n",
|
||||
" # Expand into separate columns\n",
|
||||
" df_expanded = pd.json_normalize(df_dicts)\n",
|
||||
" # Sort\n",
|
||||
" df_expanded = df_expanded[sorted(df_expanded.columns)]\n",
|
||||
" # Combine with original columns\n",
|
||||
" df = pd.concat([df.drop(columns=[column]), df_expanded], axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.to_csv(\"schools_nl.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"list(df.columns)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "matitos_urls",
|
||||
"display_name": "fetcher",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -366,7 +462,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.9"
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user