From b41b6c38b8c84ef3a80911b73d7df33155161952 Mon Sep 17 00:00:00 2001 From: Floke Date: Sat, 21 Feb 2026 21:26:57 +0000 Subject: [PATCH] Enhance: Address/VAT Sync & Infrastructure Hardening [30e88f42] - Implemented Address (City) and VAT (OrgNumber) sync back to SuperOffice. - Hardened Infrastructure: Removed Pydantic dependency in config for better Docker compatibility. - Improved SuperOffice Client error logging and handled empty SO_ENVIRONMENT variables. - Updated Matrix Generator: Switched to gemini-2.0-flash, added industry filtering, and robust JSON parsing. - Updated Documentation with session findings and troubleshooting steps. --- .dev_session/SESSION_INFO | 2 +- TASK_STATUS_REPORT_30e88f42.md | 45 ++++++++++++ company-explorer/backend/app.py | 11 ++- .../backend/scripts/generate_matrix.py | 21 ++++-- connector-superoffice/README.md | 26 +++++-- connector-superoffice/config.py | 69 +++++++++---------- connector-superoffice/superoffice_client.py | 8 ++- connector-superoffice/worker.py | 38 ++++++++++ 8 files changed, 169 insertions(+), 51 deletions(-) create mode 100644 TASK_STATUS_REPORT_30e88f42.md diff --git a/.dev_session/SESSION_INFO b/.dev_session/SESSION_INFO index 8c7b6728..1e58c046 100644 --- a/.dev_session/SESSION_INFO +++ b/.dev_session/SESSION_INFO @@ -1 +1 @@ -{"task_id": "2f988f42-8544-8100-9dba-e69ee2376730", "token": "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", "session_start_time": "2026-02-21T10:32:38.618482"} \ No newline at end of file +{"task_id": "30e88f42-8544-804e-ac61-ed061d57563a", "token": "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", "session_start_time": "2026-02-21T19:47:04.246020"} \ No newline at end of file diff --git a/TASK_STATUS_REPORT_30e88f42.md b/TASK_STATUS_REPORT_30e88f42.md new file mode 100644 index 00000000..259dea5a --- /dev/null +++ b/TASK_STATUS_REPORT_30e88f42.md @@ -0,0 +1,45 @@ +# Session Report: SuperOffice Connector & End-to-End Test + +**Date:** Feb 21, 2026 +**Focus:** End-to-End Testing, Infrastructure Hardening, Vertical Sync + +## 1. Achievements + +### ✅ Infrastructure & Stability +* **Authentication Fixed:** Resolved critical auth failures in `SuperOfficeClient`. Added fallback for empty `SO_ENVIRONMENT` variables and improved error logging. +* **Pydantic V2 Migration:** Rewrote `connector-superoffice/config.py` to remove dependency on `pydantic-settings`, resolving crash loops in Docker containers with older/mixed Python environments. +* **Network Path Clarified:** Confirmed that Webhooks reach the system via Nginx (`/connector/` route) on Port 80/8090, solving the "closed port 8003" mystery. + +### ✅ Functional Improvements +* **Bidirectional Vertical Sync:** Implemented logic in `worker.py` to detect manual Vertical changes in SuperOffice (e.g. `[I:26] -> Leisure`) and sync them back to the Company Explorer. +* **Cascading Updates:** A Vertical change now correctly triggers a re-calculation of marketing texts for all associated persons. +* **Data Persistence:** Updated `company-explorer/backend/app.py` to automatically create/update `Contact` objects during provisioning, ensuring data consistency for cascade updates. + +### ✅ Testing +* **Automated E2E Test:** Created `connector-superoffice/tests/test_e2e_flow.py`. This standalone script verifies the full data roundtrip and the vertical change scenario without needing external dependencies. +* **Matrix Content:** Generated live marketing texts for **"Healthcare - Hospital"** and **"Leisure - Indoor Active"** (5 Personas each) to enable real-world testing. + +## 2. Current Status (Snapshot) + +* **Connector:** Running, Authenticated (`✅ SuperOffice Client initialized`). +* **Worker:** Processing jobs. Currently correctly handling "Processing" state from CE by re-queueing (RETRY). +* **Write-Back:** Vertical Sync confirmed working. Address/VAT Sync implemented but requires final verification. + +## 3. Open Issues / Next Steps + +### 🔸 Address & VAT Sync Debugging +The logic for writing back `City` (PostalAddress) and `OrgNumber` (VAT) was added to `worker.py` but potentially causes loops or needs validation against the complex SuperOffice address model. +* **Todo:** Verify if address updates actually arrive in SuperOffice once the CE status switches from `PROCESSING` to `SUCCESS`. + +### 🔸 UDF Configuration +There is a suspicion that `UDF_SUBJECT` and `UDF_VERTICAL` might share the same ID (`SuperOffice:5`) in `config.py`. +* **Todo:** Verify the correct ProgIDs for the UDFs in the SuperOffice Admin client and update `.env` / `config.py`. + +### 🔸 Monitoring +* **Todo:** Consider a simple web-interface for the connector logs/queue status (as discussed). + +## 4. How to Resume + +1. **Check Logs:** Run `python3 show_logs.py` to see if the pending jobs for "Silly Billy Entertainment" have completed. +2. **Verify Data:** Check SuperOffice to see if Address and VAT were updated. +3. **Refine:** If address sync fails, debug `worker.py` section `2b.2 Sync Address & VAT`. diff --git a/company-explorer/backend/app.py b/company-explorer/backend/app.py index 15820043..480435aa 100644 --- a/company-explorer/backend/app.py +++ b/company-explorer/backend/app.py @@ -101,6 +101,11 @@ class ProvisioningResponse(BaseModel): opener: Optional[str] = None # Primary opener (Infrastructure/Cleaning) opener_secondary: Optional[str] = None # Secondary opener (Service/Logistics) texts: Dict[str, Optional[str]] = {} + + # Enrichment Data for Write-Back + address_city: Optional[str] = None + address_country: Optional[str] = None + vat_id: Optional[str] = None class IndustryDetails(BaseModel): pains: Optional[str] = None @@ -346,7 +351,11 @@ def provision_superoffice_contact( role_name=role_name, opener=company.ai_opener, opener_secondary=company.ai_opener_secondary, - texts=texts + texts=texts, + address_city=company.city, + address_country=company.country, + # TODO: Add VAT field to Company model if not present, for now using crm_vat if available + vat_id=company.crm_vat ) @app.get("/api/companies") diff --git a/company-explorer/backend/scripts/generate_matrix.py b/company-explorer/backend/scripts/generate_matrix.py index f638135f..99e8a426 100644 --- a/company-explorer/backend/scripts/generate_matrix.py +++ b/company-explorer/backend/scripts/generate_matrix.py @@ -12,7 +12,7 @@ from backend.database import SessionLocal, Industry, Persona, MarketingMatrix from backend.config import settings # --- Configuration --- -MODEL_NAME = "gemini-1.5-pro-latest" # High quality copy +MODEL_NAME = "gemini-2.0-flash" # High quality copy def generate_prompt(industry: Industry, persona: Persona) -> str: """ @@ -104,15 +104,25 @@ def real_gemini_call(prompt: str): elif text.startswith("```"): text = text[3:-3].strip() - return json.loads(text) + parsed_json = json.loads(text) + if isinstance(parsed_json, list): + if len(parsed_json) > 0: + return parsed_json[0] + else: + raise ValueError("Empty list returned from API") + return parsed_json except Exception as e: print(f"JSON Parse Error: {e}. Raw Response: {response.text}") raise -def run_matrix_generation(dry_run: bool = True, force: bool = False): +def run_matrix_generation(dry_run: bool = True, force: bool = False, specific_industry: str = None): db = SessionLocal() try: - industries = db.query(Industry).all() + query = db.query(Industry) + if specific_industry: + query = query.filter(Industry.name == specific_industry) + + industries = query.all() personas = db.query(Persona).all() print(f"Found {len(industries)} Industries and {len(personas)} Personas.") @@ -182,6 +192,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--live", action="store_true", help="Actually call Gemini and write to DB") parser.add_argument("--force", action="store_true", help="Overwrite existing matrix entries") + parser.add_argument("--industry", type=str, help="Specific industry name to process") args = parser.parse_args() - run_matrix_generation(dry_run=not args.live, force=args.force) \ No newline at end of file + run_matrix_generation(dry_run=not args.live, force=args.force, specific_industry=args.industry) \ No newline at end of file diff --git a/connector-superoffice/README.md b/connector-superoffice/README.md index b1918b12..ed317a67 100644 --- a/connector-superoffice/README.md +++ b/connector-superoffice/README.md @@ -107,10 +107,28 @@ Der Connector ist der Bote, der diese Daten in das CRM bringt. 2. Sync-Skript laufen lassen: `python3 backend/scripts/sync_notion_industries.py`. 3. Matrix neu berechnen: `python3 backend/scripts/generate_matrix.py --live`. -### Prompt-Tuning -Die Prompts für Matrix und Opener liegen in: -* Matrix: `backend/scripts/generate_matrix.py` -* Opener: `backend/services/classification.py` (oder `enrichment.py`) +### End-to-End Tests +Ein automatisierter Integrationstest (`tests/test_e2e_flow.py`) deckt den gesamten Zyklus ab: +1. **Company Creation:** Webhook -> CE Provisioning -> Write-back (Vertical). +2. **Person Creation:** Webhook -> CE Matrix Lookup -> Write-back (Texte). +3. **Vertical Change:** Änderung im CRM -> CE Update -> Cascade zu Personen -> Neue Texte. + +Ausführen mittels: +```bash +python3 connector-superoffice/tests/test_e2e_flow.py +``` + +## 7. Troubleshooting & Known Issues + +### Authentication "URL has an invalid label" +Tritt auf, wenn `SO_ENVIRONMENT` leer ist. Der Client fällt nun automatisch auf `sod` zurück. + +### Pydantic V2 Compatibility +Die `config.py` wurde auf natives Python (`os.getenv`) umgestellt, um Konflikte mit `pydantic-settings` in Docker-Containern zu vermeiden. + +### Address & VAT Sync (WIP) +Der Worker wurde erweitert, um auch `City` und `OrgNumber` (VAT) zurückzuschreiben. +**Status (21.02.2026):** Implementiert, aber noch im Feinschliff. Logs zeigen teils Re-Queueing während das Enrichment läuft. ## Appendix: The "First Sentence" Prompt diff --git a/connector-superoffice/config.py b/connector-superoffice/config.py index 1b9b23a8..02382247 100644 --- a/connector-superoffice/config.py +++ b/connector-superoffice/config.py @@ -1,44 +1,37 @@ import os -from pydantic_settings import BaseSettings -class Settings(BaseSettings): - # --- Infrastructure --- - # Internal Docker URL for Company Explorer - COMPANY_EXPLORER_URL: str = "http://company-explorer:8000" - - # --- SuperOffice API Credentials --- - SO_ENVIRONMENT: str = "sod" # 'sod' or 'online' - SO_CLIENT_ID: str = "" - SO_CLIENT_SECRET: str = "" - SO_REFRESH_TOKEN: str = "" - SO_REDIRECT_URI: str = "http://localhost" - SO_CONTEXT_IDENTIFIER: str = "Cust55774" # e.g. Cust12345 - - # --- Feature Flags --- - ENABLE_WEBSITE_SYNC: bool = False # Disabled by default to prevent loops - - # --- Mappings (IDs from SuperOffice) --- - # Vertical IDs (List Items) - # Default values match the current hardcoded DEV IDs - # Format: "Name In Explorer": ID_In_SuperOffice - VERTICAL_MAP_JSON: str = '{"Logistics - Warehouse": 23, "Healthcare - Hospital": 24, "Infrastructure - Transport": 25, "Leisure - Indoor Active": 26}' +class Settings: + def __init__(self): + # --- Infrastructure --- + # Internal Docker URL for Company Explorer + self.COMPANY_EXPLORER_URL = os.getenv("COMPANY_EXPLORER_URL", "http://company-explorer:8000") + + # --- SuperOffice API Credentials --- + # Fallback for empty string in env var + env_val = os.getenv("SO_ENVIRONMENT") + self.SO_ENVIRONMENT = env_val if env_val else "sod" + + self.SO_CLIENT_ID = os.getenv("SO_CLIENT_ID", "") + self.SO_CLIENT_SECRET = os.getenv("SO_CLIENT_SECRET", "") + self.SO_REFRESH_TOKEN = os.getenv("SO_REFRESH_TOKEN", "") + self.SO_REDIRECT_URI = os.getenv("SO_REDIRECT_URI", "http://localhost") + self.SO_CONTEXT_IDENTIFIER = os.getenv("SO_CONTEXT_IDENTIFIER", "Cust55774") # e.g. Cust12345 + + # --- Feature Flags --- + self.ENABLE_WEBSITE_SYNC = os.getenv("ENABLE_WEBSITE_SYNC", "False").lower() in ("true", "1", "t") + + # --- Mappings (IDs from SuperOffice) --- + # Vertical IDs (List Items) + self.VERTICAL_MAP_JSON = os.getenv("VERTICAL_MAP_JSON", '{"Logistics - Warehouse": 23, "Healthcare - Hospital": 24, "Infrastructure - Transport": 25, "Leisure - Indoor Active": 26}') - # Persona / Job Role IDs (List Items for "Position" field) - # To be filled after discovery - PERSONA_MAP_JSON: str = '{}' + # Persona / Job Role IDs (List Items for "Position" field) + self.PERSONA_MAP_JSON = os.getenv("PERSONA_MAP_JSON", '{}') - # User Defined Fields (ProgIDs) - # The technical names of the fields in SuperOffice - # Default values match the current hardcoded DEV UDFs - UDF_SUBJECT: str = "SuperOffice:5" - UDF_INTRO: str = "SuperOffice:6" - UDF_SOCIAL_PROOF: str = "SuperOffice:7" - UDF_VERTICAL: str = "SuperOffice:5" # NOTE: Currently same as Subject in dev? Need to verify. worker.py had 'SuperOffice:5' for vertical AND 'SuperOffice:5' for subject in the map? - - class Config: - env_file = ".env" - env_file_encoding = "utf-8" - extra = "ignore" # Ignore extra fields in .env + # User Defined Fields (ProgIDs) + self.UDF_SUBJECT = os.getenv("UDF_SUBJECT", "SuperOffice:5") + self.UDF_INTRO = os.getenv("UDF_INTRO", "SuperOffice:6") + self.UDF_SOCIAL_PROOF = os.getenv("UDF_SOCIAL_PROOF", "SuperOffice:7") + self.UDF_VERTICAL = os.getenv("UDF_VERTICAL", "SuperOffice:5") # Global instance -settings = Settings() +settings = Settings() \ No newline at end of file diff --git a/connector-superoffice/superoffice_client.py b/connector-superoffice/superoffice_client.py index 0ae6a229..9af4e6e5 100644 --- a/connector-superoffice/superoffice_client.py +++ b/connector-superoffice/superoffice_client.py @@ -19,6 +19,8 @@ class SuperOfficeClient: self.env = settings.SO_ENVIRONMENT self.cust_id = settings.SO_CONTEXT_IDENTIFIER + logger.info(f"DEBUG CONFIG: Env='{self.env}', CustID='{self.cust_id}', ClientID='{self.client_id[:4]}...'") + if not all([self.client_id, self.client_secret, self.refresh_token]): # Graceful failure: Log error but allow init (for help/docs/discovery scripts) logger.error("❌ SuperOffice credentials missing in .env file (or environment variables).") @@ -57,7 +59,8 @@ class SuperOfficeClient: resp.raise_for_status() return resp.json().get("access_token") except requests.exceptions.HTTPError as e: - logger.error(f"❌ Token Refresh Error: {e.response.text}") + logger.error(f"❌ Token Refresh Error (Status: {e.response.status_code}): {e.response.text}") + logger.debug(f"Response Headers: {e.response.headers}") return None except Exception as e: logger.error(f"❌ Connection Error during token refresh: {e}") @@ -71,7 +74,8 @@ class SuperOfficeClient: resp.raise_for_status() return resp.json() except requests.exceptions.HTTPError as e: - logger.error(f"❌ API GET Error for {endpoint}: {e.response.text}") + logger.error(f"❌ API GET Error for {endpoint} (Status: {e.response.status_code}): {e.response.text}") + logger.debug(f"Response Headers: {e.response.headers}") return None def _put(self, endpoint, payload): diff --git a/connector-superoffice/worker.py b/connector-superoffice/worker.py index 785d5730..099fa834 100644 --- a/connector-superoffice/worker.py +++ b/connector-superoffice/worker.py @@ -210,6 +210,44 @@ def process_job(job, so_client: SuperOfficeClient): else: logger.warning(f"Vertical '{vertical_name}' not found in internal mapping.") + # 2b.2 Sync Address & VAT (Standard Fields) + # Check if we have address data to sync + ce_city = provisioning_data.get("address_city") + ce_country = provisioning_data.get("address_country") # Assuming 'DE' code or similar + ce_vat = provisioning_data.get("vat_id") + + if ce_city or ce_vat: + try: + # Re-fetch contact to be safe (or use cached if optimal) + contact_data = so_client.get_contact(contact_id) + changed = False + + # City (PostalAddress) + if ce_city: + # SuperOffice Address structure is complex. Simplified check on PostalAddress. + # Address: { "PostalAddress": { "City": "..." } } + current_city = contact_data.get("PostalAddress", {}).get("City", "") + if current_city != ce_city: + if "PostalAddress" not in contact_data: contact_data["PostalAddress"] = {} + contact_data["PostalAddress"]["City"] = ce_city + changed = True + logger.info(f"Updating City: {current_city} -> {ce_city}") + + # VAT (OrgNumber) + if ce_vat: + current_vat = contact_data.get("OrgNumber", "") + if current_vat != ce_vat: + contact_data["OrgNumber"] = ce_vat + changed = True + logger.info(f"Updating VAT: {current_vat} -> {ce_vat}") + + if changed: + logger.info(f"Pushing standard field updates for Contact {contact_id}...") + so_client._put(f"Contact/{contact_id}", contact_data) + + except Exception as e: + logger.error(f"Failed to sync Address/VAT for Contact {contact_id}: {e}") + # 2c. Sync Website (Company Level) # TEMPORARILY DISABLED TO PREVENT LOOP (SO API Read-after-Write latency or field mapping issue) # Re-enable via config if needed