235 lines
9.0 KiB
Python
235 lines
9.0 KiB
Python
from fastapi import FastAPI, File, UploadFile, HTTPException
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
import pandas as pd
|
|
import io
|
|
from pydantic import BaseModel
|
|
from typing import Dict, List
|
|
|
|
app = FastAPI()
|
|
|
|
# Configure CORS
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"], # Allows all origins
|
|
allow_credentials=True,
|
|
allow_methods=["*"], # Allows all methods
|
|
allow_headers=["*"], # Allows all headers
|
|
)
|
|
|
|
# --- In-memory Storage & Data Loading ---
|
|
df_storage = None
|
|
plz_column_name = None
|
|
plz_geocoord_df = None
|
|
|
|
@app.on_event("startup")
|
|
def load_plz_data():
|
|
global plz_geocoord_df
|
|
try:
|
|
print("--- Loading PLZ geocoordinates dataset... ---")
|
|
# The CSV has a malformed header. We read it and assign names manually.
|
|
df = pd.read_csv("plz_geocoord.csv", dtype=str)
|
|
# Rename the columns based on their expected order: PLZ, Latitude, Longitude
|
|
df.columns = ['plz', 'y', 'x']
|
|
|
|
df['plz'] = df['plz'].str.zfill(5)
|
|
plz_geocoord_df = df.set_index('plz')
|
|
print(f"--- Successfully loaded {len(plz_geocoord_df)} PLZ coordinates. ---")
|
|
except FileNotFoundError:
|
|
print("--- FATAL ERROR: plz_geocoord.csv not found. Geocoding will not work. ---")
|
|
plz_geocoord_df = pd.DataFrame()
|
|
except Exception as e:
|
|
print(f"--- FATAL ERROR loading plz_geocoord.csv: {e} ---")
|
|
plz_geocoord_df = pd.DataFrame()
|
|
|
|
# --- Pydantic Models ---
|
|
class TooltipColumnConfig(BaseModel):
|
|
id: str
|
|
name: str
|
|
visible: bool
|
|
|
|
class FilterRequest(BaseModel):
|
|
filters: Dict[str, List[str]]
|
|
tooltip_config: List[TooltipColumnConfig] = []
|
|
|
|
class PlzColumnRequest(BaseModel):
|
|
plz_column: str
|
|
|
|
# --- API Endpoints ---
|
|
@app.get("/ ")
|
|
def read_root():
|
|
return {"message": "Heatmap Tool Backend"}
|
|
|
|
@app.post("/api/upload")
|
|
async def upload_file(file: UploadFile = File(...)):
|
|
global df_storage, plz_column_name
|
|
print(f"--- Received request to /api/upload for file: {file.filename} ---")
|
|
if not file.filename.endswith('.xlsx'):
|
|
raise HTTPException(status_code=400, detail="Invalid file format. Please upload an .xlsx file.")
|
|
|
|
try:
|
|
contents = await file.read()
|
|
df = pd.read_excel(io.BytesIO(contents), dtype=str) # Read all as string to be safe
|
|
df.fillna('N/A', inplace=True)
|
|
df_storage = df # Store dataframe temporarily
|
|
|
|
# --- PLZ Column Detection ---
|
|
temp_plz_col = None
|
|
for col in df.columns:
|
|
if 'plz' in col.lower():
|
|
temp_plz_col = col
|
|
break
|
|
|
|
if not temp_plz_col:
|
|
print("PLZ column not found automatically. Asking user for selection.")
|
|
return {"plz_column_needed": True, "columns": list(df.columns)}
|
|
|
|
# If we found a column, proceed as before
|
|
plz_column_name = temp_plz_col
|
|
df[plz_column_name] = df[plz_column_name].str.strip().str.zfill(5)
|
|
df_storage = df # Update storage with normalized PLZ
|
|
|
|
filters = {}
|
|
for col in df.columns:
|
|
if col != plz_column_name:
|
|
unique_values = df[col].unique().tolist()
|
|
filters[col] = sorted(unique_values)
|
|
|
|
print(f"Successfully processed file. Found PLZ column: '{plz_column_name}'.")
|
|
return {"plz_column_needed": False, "filters": filters, "plz_column": plz_column_name}
|
|
|
|
except Exception as e:
|
|
print(f"ERROR processing file: {e}")
|
|
raise HTTPException(status_code=500, detail=f"An error occurred while processing the file: {e}")
|
|
|
|
|
|
@app.post("/api/set-plz-column")
|
|
async def set_plz_column(request: PlzColumnRequest):
|
|
global df_storage, plz_column_name
|
|
print(f"--- Received request to set PLZ column to: {request.plz_column} ---")
|
|
if df_storage is None:
|
|
raise HTTPException(status_code=400, detail="No data available. Please upload a file first.")
|
|
|
|
plz_column_name = request.plz_column
|
|
if plz_column_name not in df_storage.columns:
|
|
raise HTTPException(status_code=400, detail=f"Column '{plz_column_name}' not found in the uploaded file.")
|
|
|
|
# Normalize PLZ data
|
|
df_storage[plz_column_name] = df_storage[plz_column_name].str.strip().str.zfill(5)
|
|
|
|
# --- Dynamic Filter Detection ---
|
|
filters = {}
|
|
for col in df_storage.columns:
|
|
if col != plz_column_name:
|
|
unique_values = df_storage[col].unique().tolist()
|
|
filters[col] = sorted(unique_values)
|
|
|
|
print(f"Successfully set PLZ column. Detected {len(filters)} filterable columns.")
|
|
return {"plz_column_needed": False, "filters": filters, "plz_column": plz_column_name}
|
|
|
|
|
|
@app.post("/api/heatmap")
|
|
async def get_heatmap_data(request: FilterRequest):
|
|
global df_storage, plz_column_name, plz_geocoord_df
|
|
print(f"--- Received request to /api/heatmap with filters: {request.filters} ---")
|
|
if df_storage is None:
|
|
print("ERROR: No data in df_storage. File must be uploaded first.")
|
|
raise HTTPException(status_code=404, detail="No data available. Please upload a file first.")
|
|
if plz_geocoord_df.empty:
|
|
raise HTTPException(status_code=500, detail="Geocoding data is not available on the server.")
|
|
|
|
try:
|
|
filtered_df = df_storage.copy()
|
|
|
|
# Apply filters from the request
|
|
for column, values in request.filters.items():
|
|
if values:
|
|
filtered_df = filtered_df[filtered_df[column].isin(values)]
|
|
|
|
if filtered_df.empty:
|
|
return []
|
|
|
|
# Aggregate data by PLZ, and also collect attribute summaries
|
|
plz_grouped = filtered_df.groupby(plz_column_name)
|
|
plz_counts = plz_grouped.size().reset_index(name='count')
|
|
|
|
# Collect unique attributes for each PLZ based on tooltip_config
|
|
attribute_summaries = {}
|
|
if request.tooltip_config:
|
|
visible_columns = [col.name for col in request.tooltip_config if col.visible]
|
|
ordered_columns = [col.name for col in request.tooltip_config]
|
|
else:
|
|
# Fallback if no config is provided
|
|
visible_columns = [col for col in filtered_df.columns if col != plz_column_name]
|
|
ordered_columns = visible_columns
|
|
|
|
for plz_val, group in plz_grouped:
|
|
summary = {}
|
|
for col_name in visible_columns:
|
|
if col_name in group:
|
|
unique_attrs = group[col_name].unique().tolist()
|
|
summary[col_name] = unique_attrs[:3]
|
|
attribute_summaries[plz_val] = summary
|
|
|
|
# Convert summaries to a DataFrame for merging
|
|
summary_df = pd.DataFrame.from_dict(attribute_summaries, orient='index')
|
|
summary_df.index.name = plz_column_name
|
|
|
|
# --- Geocoding Step ---
|
|
# Merge the aggregated counts with the geocoding dataframe
|
|
merged_df = pd.merge(
|
|
plz_counts,
|
|
plz_geocoord_df,
|
|
left_on=plz_column_name,
|
|
right_index=True,
|
|
how='inner'
|
|
)
|
|
|
|
# Merge with attribute summaries
|
|
merged_df = pd.merge(
|
|
merged_df,
|
|
summary_df,
|
|
left_on=plz_column_name,
|
|
right_index=True,
|
|
how='left'
|
|
)
|
|
|
|
# Rename columns to match frontend expectations ('lon' and 'lat')
|
|
merged_df.rename(columns={'x': 'lon', 'y': 'lat'}, inplace=True)
|
|
|
|
# Also rename the original PLZ column to the consistent name 'plz'
|
|
merged_df.rename(columns={plz_column_name: 'plz'}, inplace=True)
|
|
|
|
# Convert to the required JSON format, including all remaining columns (which are the attributes)
|
|
# We'll dynamically collect attribute columns for output
|
|
output_columns = ['plz', 'lat', 'lon', 'count']
|
|
for col in merged_df.columns:
|
|
if col not in output_columns and col != plz_column_name: # Ensure we don't duplicate PLZ or coords
|
|
output_columns.append(col)
|
|
|
|
heatmap_data = merged_df[output_columns].to_dict(orient='records')
|
|
|
|
# The frontend expects 'attributes_summary' as a single field, so let's restructure for that
|
|
# For each record, pick out the attributes that are not 'plz', 'lat', 'lon', 'count'
|
|
final_heatmap_data = []
|
|
for record in heatmap_data:
|
|
# Order the attributes based on tooltip_config
|
|
ordered_attrs = {
|
|
col_name: record.get(col_name)
|
|
for col_name in ordered_columns
|
|
if col_name in record and record.get(col_name) is not None
|
|
}
|
|
final_heatmap_data.append({
|
|
"plz": record['plz'],
|
|
"lat": record['lat'],
|
|
"lon": record['lon'],
|
|
"count": record['count'],
|
|
"attributes_summary": ordered_attrs
|
|
})
|
|
|
|
print(f"Generated heatmap data with {len(final_heatmap_data)} PLZ points, respecting tooltip config.")
|
|
return final_heatmap_data
|
|
|
|
except Exception as e:
|
|
print(f"ERROR generating heatmap: {e}")
|
|
raise HTTPException(status_code=500, detail=f"An error occurred while generating heatmap data: {e}")
|