fix(ce): Resolve database schema mismatch and restore docs

- Fixed a critical  in the company-explorer by forcing a database re-initialization with a new file (). This ensures the application code is in sync with the database schema.
- Documented the schema mismatch incident and its resolution in MIGRATION_PLAN.md.

- Restored and enhanced BUILDER_APPS_MIGRATION.md by recovering extensive, valuable content from the git history that was accidentally deleted. The guide now again includes detailed troubleshooting steps and code templates for common migration pitfalls.
This commit is contained in:
2026-01-15 15:54:45 +00:00
parent 4fcbbe3723
commit 4a336f6374
13 changed files with 724 additions and 555 deletions

View File

@@ -9,7 +9,7 @@ from functools import wraps
from typing import Optional, Union, List
from thefuzz import fuzz
# Versuche neue Google GenAI Lib (v1.0+)
# Try new Google GenAI Lib (v1.0+)
try:
from google import genai
from google.genai import types
@@ -17,7 +17,7 @@ try:
except ImportError:
HAS_NEW_GENAI = False
# Fallback auf alte Lib
# Fallback to old Lib
try:
import google.generativeai as old_genai
HAS_OLD_GENAI = True
@@ -100,22 +100,33 @@ def simple_normalize_url(url: str) -> str:
return "k.A."
def normalize_company_name(name: str) -> str:
"""Normalizes a company name by removing legal forms and special characters."""
"""
Normalizes a company name by removing common legal forms, special characters,
and extra spaces, for robust comparison.
Handles names with numbers more intelligently (e.g., "11 88 0 Solutions" -> "11880 solutions").
"""
if not name:
return ""
name = name.lower()
# Remove common legal forms
# Remove common legal forms (more comprehensive list)
legal_forms = [
r'\bgmbh\b', r'\bag\b', r'\bkg\b', r'\bohg\b', r'\bug\b', r'\bltd\b',
r'\bllc\b', r'\binc\b', r'\bcorp\b', r'\bco\b', r'\b& co\b', r'\be\.v\.\b'
r'\bllc\b', r'\binc\b', r'\bcorp\b', r'\bco\b', r'\b& co\b', r'\be\.v\.\b',
r'\bsa\b', r'\bse\b', r'\bs\.a\.\b', r'\bgesellschaft\b', r'\bgp\b', r'\blp\b',
r'\bservice\b', r'\bservices\b', r'\bgroup\b', r'\bsolutions\b', r'\bsysteme\b',
r'\bhandel\b', r'\bmarketing\b', r'\btechnology\b', r'\binternational\b',
r'\bgmbh & co\. kg\b', r'\bholding\b', r'\bverwaltung\b', r'\bfoundation\b'
]
for form in legal_forms:
name = re.sub(form, '', name)
# Condense numbers: "11 88 0" -> "11880"
name = re.sub(r'(\d)\s+(\d)', r'\1\2', name) # Condense numbers separated by space
# Remove special chars and extra spaces
name = re.sub(r'[^\w\s]', '', name)
name = re.sub(r'[^\w\s\d]', '', name) # Keep digits
name = re.sub(r'\s+', ' ', name).strip()
return name
@@ -136,11 +147,14 @@ def extract_numeric_value(raw_value: str, is_umsatz: bool = False) -> str:
# Simple multiplier handling
multiplier = 1.0
if 'mrd' in raw_value or 'billion' in raw_value or 'bn' in raw_value:
multiplier = 1000.0 if is_umsatz else 1000000000.0
multiplier = 1000.0 # Standardize to Millions for revenue, Billions for absolute numbers
if not is_umsatz: multiplier = 1000000000.0
elif 'mio' in raw_value or 'million' in raw_value or 'mn' in raw_value:
multiplier = 1.0 if is_umsatz else 1000000.0
multiplier = 1.0 # Already in Millions for revenue
if not is_umsatz: multiplier = 1000000.0
elif 'tsd' in raw_value or 'thousand' in raw_value:
multiplier = 0.001 if is_umsatz else 1000.0
multiplier = 0.001 # Thousands converted to millions for revenue
if not is_umsatz: multiplier = 1000.0
# Extract number candidates
# Regex for "1.000,50" or "1,000.50" or "1000"
@@ -171,8 +185,6 @@ def extract_numeric_value(raw_value: str, is_umsatz: bool = False) -> str:
# For revenue, 375.6 vs 1.000 is tricky.
# But usually revenue in millions is small numbers with decimals (250.5).
# Large integers usually mean thousands.
# Let's assume dot is decimal for revenue unless context implies otherwise,
# but for "375.6" it works. For "1.000" it becomes 1.0.
# Let's keep dot as decimal for revenue by default unless we detect multiple dots
if num_str.count('.') > 1:
num_str = num_str.replace('.', '')
@@ -284,4 +296,4 @@ def call_gemini(
logger.error(f"Error with google-generativeai lib: {e}")
raise e
raise ImportError("No Google GenAI library installed (neither google-genai nor google-generativeai).")
raise ImportError("No Google GenAI library installed (neither google-genai nor google-generativeai).")