feat([2fd88f42]): integrate real PLZ geocoordinate dataset
This commit is contained in:
@@ -16,21 +16,24 @@ app.add_middleware(
|
|||||||
allow_headers=["*"], # Allows all headers
|
allow_headers=["*"], # Allows all headers
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- In-memory Storage ---
|
# --- In-memory Storage & Data Loading ---
|
||||||
df_storage = None
|
df_storage = None
|
||||||
plz_column_name = None
|
plz_column_name = None
|
||||||
|
plz_geocoord_df = None
|
||||||
|
|
||||||
# --- Dummy Geocoding Data (IMPORTANT: TO BE REPLACED) ---
|
@app.on_event("startup")
|
||||||
# This is a tiny subset of German postal codes for demonstration purposes only.
|
def load_plz_data():
|
||||||
# A real implementation MUST load a comprehensive PLZ dataset from a file
|
global plz_geocoord_df
|
||||||
# (e.g., a CSV or GeoJSON file) for the application to be useful.
|
try:
|
||||||
PLZ_COORDINATES = {
|
print("--- Loading PLZ geocoordinates dataset... ---")
|
||||||
"10115": {"lat": 52.53, "lon": 13.38}, # Berlin
|
df = pd.read_csv("plz_geocoord.csv", dtype={'plz': str})
|
||||||
"20095": {"lat": 53.55, "lon": 9.99}, # Hamburg
|
df['plz'] = df['plz'].str.zfill(5)
|
||||||
"80331": {"lat": 48.13, "lon": 11.57}, # Munich
|
plz_geocoord_df = df.set_index('plz')
|
||||||
"50667": {"lat": 50.93, "lon": 6.95}, # Cologne
|
print(f"--- Successfully loaded {len(plz_geocoord_df)} PLZ coordinates. ---")
|
||||||
"60311": {"lat": 50.11, "lon": 8.68}, # Frankfurt
|
except FileNotFoundError:
|
||||||
}
|
print("--- FATAL ERROR: plz_geocoord.csv not found. Geocoding will not work. ---")
|
||||||
|
# In a real app, you might want to exit or handle this more gracefully
|
||||||
|
plz_geocoord_df = pd.DataFrame()
|
||||||
|
|
||||||
# --- Pydantic Models ---
|
# --- Pydantic Models ---
|
||||||
class FilterRequest(BaseModel):
|
class FilterRequest(BaseModel):
|
||||||
@@ -88,18 +91,20 @@ async def upload_file(file: UploadFile = File(...)):
|
|||||||
|
|
||||||
@app.post("/api/heatmap")
|
@app.post("/api/heatmap")
|
||||||
async def get_heatmap_data(request: FilterRequest):
|
async def get_heatmap_data(request: FilterRequest):
|
||||||
global df_storage, plz_column_name
|
global df_storage, plz_column_name, plz_geocoord_df
|
||||||
print(f"--- Received request to /api/heatmap with filters: {request.filters} ---")
|
print(f"--- Received request to /api/heatmap with filters: {request.filters} ---")
|
||||||
if df_storage is None:
|
if df_storage is None:
|
||||||
print("ERROR: No data in df_storage. File must be uploaded first.")
|
print("ERROR: No data in df_storage. File must be uploaded first.")
|
||||||
raise HTTPException(status_code=404, detail="No data available. Please upload a file first.")
|
raise HTTPException(status_code=404, detail="No data available. Please upload a file first.")
|
||||||
|
if plz_geocoord_df.empty:
|
||||||
|
raise HTTPException(status_code=500, detail="Geocoding data is not available on the server.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
filtered_df = df_storage.copy()
|
filtered_df = df_storage.copy()
|
||||||
|
|
||||||
# Apply filters from the request
|
# Apply filters from the request
|
||||||
for column, values in request.filters.items():
|
for column, values in request.filters.items():
|
||||||
if values: # Only filter if there are values selected
|
if values:
|
||||||
filtered_df = filtered_df[filtered_df[column].isin(values)]
|
filtered_df = filtered_df[filtered_df[column].isin(values)]
|
||||||
|
|
||||||
if filtered_df.empty:
|
if filtered_df.empty:
|
||||||
@@ -109,18 +114,20 @@ async def get_heatmap_data(request: FilterRequest):
|
|||||||
plz_counts = filtered_df.groupby(plz_column_name).size().reset_index(name='count')
|
plz_counts = filtered_df.groupby(plz_column_name).size().reset_index(name='count')
|
||||||
|
|
||||||
# --- Geocoding Step ---
|
# --- Geocoding Step ---
|
||||||
# In a real app, this would be a merge/join with a proper geo dataset
|
# Merge the aggregated counts with the geocoding dataframe
|
||||||
heatmap_data = []
|
merged_df = pd.merge(
|
||||||
for _, row in plz_counts.iterrows():
|
plz_counts,
|
||||||
plz = row[plz_column_name]
|
plz_geocoord_df,
|
||||||
coords = PLZ_COORDINATES.get(plz)
|
left_on=plz_column_name,
|
||||||
if coords:
|
right_index=True,
|
||||||
heatmap_data.append({
|
how='inner'
|
||||||
"plz": plz,
|
)
|
||||||
"lat": coords["lat"],
|
|
||||||
"lon": coords["lon"],
|
# Rename columns to match frontend expectations ('lon' and 'lat')
|
||||||
"count": row["count"]
|
merged_df.rename(columns={'x': 'lon', 'y': 'lat'}, inplace=True)
|
||||||
})
|
|
||||||
|
# Convert to the required JSON format
|
||||||
|
heatmap_data = merged_df[['plz', 'lat', 'lon', 'count']].to_dict(orient='records')
|
||||||
|
|
||||||
print(f"Generated heatmap data with {len(heatmap_data)} PLZ points.")
|
print(f"Generated heatmap data with {len(heatmap_data)} PLZ points.")
|
||||||
return heatmap_data
|
return heatmap_data
|
||||||
|
|||||||
8299
heatmap-tool/backend/plz_geocoord.csv
Normal file
8299
heatmap-tool/backend/plz_geocoord.csv
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user