feat([2fd88f42]): integrate real PLZ geocoordinate dataset

This commit is contained in:
2026-02-04 12:47:46 +00:00
parent 571c125e9f
commit 9ca50cbb0e
2 changed files with 8332 additions and 26 deletions

View File

@@ -16,21 +16,24 @@ app.add_middleware(
allow_headers=["*"], # Allows all headers
)
# --- In-memory Storage ---
# --- In-memory Storage & Data Loading ---
df_storage = None
plz_column_name = None
plz_geocoord_df = None
# --- Dummy Geocoding Data (IMPORTANT: TO BE REPLACED) ---
# This is a tiny subset of German postal codes for demonstration purposes only.
# A real implementation MUST load a comprehensive PLZ dataset from a file
# (e.g., a CSV or GeoJSON file) for the application to be useful.
PLZ_COORDINATES = {
"10115": {"lat": 52.53, "lon": 13.38}, # Berlin
"20095": {"lat": 53.55, "lon": 9.99}, # Hamburg
"80331": {"lat": 48.13, "lon": 11.57}, # Munich
"50667": {"lat": 50.93, "lon": 6.95}, # Cologne
"60311": {"lat": 50.11, "lon": 8.68}, # Frankfurt
}
@app.on_event("startup")
def load_plz_data():
global plz_geocoord_df
try:
print("--- Loading PLZ geocoordinates dataset... ---")
df = pd.read_csv("plz_geocoord.csv", dtype={'plz': str})
df['plz'] = df['plz'].str.zfill(5)
plz_geocoord_df = df.set_index('plz')
print(f"--- Successfully loaded {len(plz_geocoord_df)} PLZ coordinates. ---")
except FileNotFoundError:
print("--- FATAL ERROR: plz_geocoord.csv not found. Geocoding will not work. ---")
# In a real app, you might want to exit or handle this more gracefully
plz_geocoord_df = pd.DataFrame()
# --- Pydantic Models ---
class FilterRequest(BaseModel):
@@ -88,18 +91,20 @@ async def upload_file(file: UploadFile = File(...)):
@app.post("/api/heatmap")
async def get_heatmap_data(request: FilterRequest):
global df_storage, plz_column_name
global df_storage, plz_column_name, plz_geocoord_df
print(f"--- Received request to /api/heatmap with filters: {request.filters} ---")
if df_storage is None:
print("ERROR: No data in df_storage. File must be uploaded first.")
raise HTTPException(status_code=404, detail="No data available. Please upload a file first.")
if plz_geocoord_df.empty:
raise HTTPException(status_code=500, detail="Geocoding data is not available on the server.")
try:
filtered_df = df_storage.copy()
# Apply filters from the request
for column, values in request.filters.items():
if values: # Only filter if there are values selected
if values:
filtered_df = filtered_df[filtered_df[column].isin(values)]
if filtered_df.empty:
@@ -109,18 +114,20 @@ async def get_heatmap_data(request: FilterRequest):
plz_counts = filtered_df.groupby(plz_column_name).size().reset_index(name='count')
# --- Geocoding Step ---
# In a real app, this would be a merge/join with a proper geo dataset
heatmap_data = []
for _, row in plz_counts.iterrows():
plz = row[plz_column_name]
coords = PLZ_COORDINATES.get(plz)
if coords:
heatmap_data.append({
"plz": plz,
"lat": coords["lat"],
"lon": coords["lon"],
"count": row["count"]
})
# Merge the aggregated counts with the geocoding dataframe
merged_df = pd.merge(
plz_counts,
plz_geocoord_df,
left_on=plz_column_name,
right_index=True,
how='inner'
)
# Rename columns to match frontend expectations ('lon' and 'lat')
merged_df.rename(columns={'x': 'lon', 'y': 'lat'}, inplace=True)
# Convert to the required JSON format
heatmap_data = merged_df[['plz', 'lat', 'lon', 'count']].to_dict(orient='records')
print(f"Generated heatmap data with {len(heatmap_data)} PLZ points.")
return heatmap_data

File diff suppressed because it is too large Load Diff