[30e88f42] ✦ In dieser Sitzung haben wir den End-to-End-Test der SuperOffice-Schnittstelle erfolgreich von der automatisierten Simulation bis zum produktiven Live-Lauf

✦ In dieser Sitzung haben wir den End-to-End-Test der SuperOffice-Schnittstelle erfolgreich von der automatisierten Simulation bis zum produktiven Live-Lauf
  mit Echtdaten abgeschlossen.
This commit is contained in:
2026-02-22 08:20:28 +00:00
parent 32332c092d
commit 11d2bc03bf
20 changed files with 732 additions and 70 deletions

View File

@@ -220,8 +220,49 @@ AUSGABE: NUR den fertigen Satz.
logger.error(f"Opener Error: {e}")
return None
def _sync_company_address_data(self, db: Session, company: Company):
"""Extracts address and VAT data from website scrape if available."""
from ..database import EnrichmentData
enrichment = db.query(EnrichmentData).filter_by(
company_id=company.id, source_type="website_scrape"
).order_by(EnrichmentData.created_at.desc()).first()
if enrichment and enrichment.content and "impressum" in enrichment.content:
imp = enrichment.content["impressum"]
if imp and isinstance(imp, dict):
changed = False
# City
if imp.get("city") and not company.city:
company.city = imp.get("city")
changed = True
# Street
if imp.get("street") and not company.street:
company.street = imp.get("street")
changed = True
# Zip / PLZ
zip_val = imp.get("zip") or imp.get("plz")
if zip_val and not company.zip_code:
company.zip_code = zip_val
changed = True
# Country
if imp.get("country_code") and (not company.country or company.country == "DE"):
company.country = imp.get("country_code")
changed = True
# VAT ID
if imp.get("vat_id") and not company.crm_vat:
company.crm_vat = imp.get("vat_id")
changed = True
if changed:
db.commit()
logger.info(f"Updated Address/VAT from Impressum for {company.name}: City={company.city}, VAT={company.crm_vat}")
def classify_company_potential(self, company: Company, db: Session) -> Company:
logger.info(f"--- Starting FULL Analysis v3.0 for {company.name} ---")
# Ensure metadata is synced from scrape
self._sync_company_address_data(db, company)
industries = self._load_industry_definitions(db)
website_content, _ = self._get_website_content_and_url(db, company)
if not website_content or len(website_content) < 100: