feat([2fd88f42]): integrate real PLZ geocoordinate dataset

2026-02-04 12:47:46 +00:00
parent 571c125e9f
commit 9ca50cbb0e
2 changed files with 8332 additions and 26 deletions
--- a/heatmap-tool/backend/main.py
+++ b/heatmap-tool/backend/main.py
@@ -16,21 +16,24 @@ app.add_middleware(
    allow_headers=["*"],  # Allows all headers
 )
-# --- In-memory Storage ---
+# --- In-memory Storage & Data Loading ---
 df_storage = None
 plz_column_name = None
 plz_geocoord_df = None
-# --- Dummy Geocoding Data (IMPORTANT: TO BE REPLACED) ---
+@app.on_event("startup")
-# This is a tiny subset of German postal codes for demonstration purposes only.
+def load_plz_data():
-# A real implementation MUST load a comprehensive PLZ dataset from a file 
+    global plz_geocoord_df
-# (e.g., a CSV or GeoJSON file) for the application to be useful.
+    try:
-PLZ_COORDINATES = {
+        print("--- Loading PLZ geocoordinates dataset... ---")
-    "10115": {"lat": 52.53, "lon": 13.38}, # Berlin
+        df = pd.read_csv("plz_geocoord.csv", dtype={'plz': str})
-    "20095": {"lat": 53.55, "lon": 9.99},  # Hamburg
+        df['plz'] = df['plz'].str.zfill(5)
-    "80331": {"lat": 48.13, "lon": 11.57}, # Munich
+        plz_geocoord_df = df.set_index('plz')
-    "50667": {"lat": 50.93, "lon": 6.95},  # Cologne
+        print(f"--- Successfully loaded {len(plz_geocoord_df)} PLZ coordinates. ---")
-    "60311": {"lat": 50.11, "lon": 8.68},  # Frankfurt
+    except FileNotFoundError:
-}
+        print("--- FATAL ERROR: plz_geocoord.csv not found. Geocoding will not work. ---")
        # In a real app, you might want to exit or handle this more gracefully
        plz_geocoord_df = pd.DataFrame()
 # --- Pydantic Models ---
 class FilterRequest(BaseModel):
@@ -88,18 +91,20 @@ async def upload_file(file: UploadFile = File(...)):
@app.post("/api/heatmap")
 async def get_heatmap_data(request: FilterRequest):
-    global df_storage, plz_column_name
+    global df_storage, plz_column_name, plz_geocoord_df
    print(f"--- Received request to /api/heatmap with filters: {request.filters} ---")
    if df_storage is None:
        print("ERROR: No data in df_storage. File must be uploaded first.")
        raise HTTPException(status_code=404, detail="No data available. Please upload a file first.")
    if plz_geocoord_df.empty:
         raise HTTPException(status_code=500, detail="Geocoding data is not available on the server.")
    try:
        filtered_df = df_storage.copy()
        # Apply filters from the request
        for column, values in request.filters.items():
-            if values: # Only filter if there are values selected
+            if values:
                filtered_df = filtered_df[filtered_df[column].isin(values)]
        if filtered_df.empty:
@@ -109,18 +114,20 @@ async def get_heatmap_data(request: FilterRequest):
        plz_counts = filtered_df.groupby(plz_column_name).size().reset_index(name='count')
        # --- Geocoding Step ---
-        # In a real app, this would be a merge/join with a proper geo dataset
+        # Merge the aggregated counts with the geocoding dataframe
-        heatmap_data = []
+        merged_df = pd.merge(
-        for _, row in plz_counts.iterrows():
+            plz_counts,
-            plz = row[plz_column_name]
+            plz_geocoord_df,
-            coords = PLZ_COORDINATES.get(plz)
+            left_on=plz_column_name,
-            if coords:
+            right_index=True,
-                heatmap_data.append({
+            how='inner'
-                    "plz": plz,
+        )
-                    "lat": coords["lat"],
+        
-                    "lon": coords["lon"],
+        # Rename columns to match frontend expectations ('lon' and 'lat')
-                    "count": row["count"]
+        merged_df.rename(columns={'x': 'lon', 'y': 'lat'}, inplace=True)
-                })
+        
        # Convert to the required JSON format
        heatmap_data = merged_df[['plz', 'lat', 'lon', 'count']].to_dict(orient='records')
        print(f"Generated heatmap data with {len(heatmap_data)} PLZ points.")
        return heatmap_data
--- a/heatmap-tool/backend/plz_geocoord.csv
+++ b/heatmap-tool/backend/plz_geocoord.csv