diff --git a/lead-engine/enrich.py b/lead-engine/enrich.py index 84673608..053fbea3 100644 --- a/lead-engine/enrich.py +++ b/lead-engine/enrich.py @@ -75,9 +75,73 @@ def enrich_contact_role(lead): return role +def sync_single_lead(lead_id): + """ + Verarbeitet einen einzelnen Lead: Rolle suchen, CE-Sync, Analyse triggern. + """ + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + c = conn.cursor() + c.execute('SELECT * FROM leads WHERE id = ?', (lead_id,)) + lead = c.fetchone() + conn.close() + + if not lead: + return {"status": "error", "message": "Lead not found"} + + lead_dict = dict(lead) + company_name = lead_dict['company_name'] + print(f"\n--- Manually Syncing Lead ID: {lead_id}, Company: '{company_name}' ---") + + # 1. Contact Enrichment (Role Lookup) + role = enrich_contact_role(lead_dict) + + # 2. Prepare Contact Info + meta = {} + if lead_dict.get('lead_metadata'): + try: meta = json.loads(lead_dict['lead_metadata']) + except: pass + + # Smarter name splitting if meta is empty (for repaired leads) + full_name = lead_dict.get('contact_name', '') + first_name = meta.get('contact_first') + last_name = meta.get('contact_last') + + if not first_name and full_name: + parts = full_name.strip().split(' ') + if len(parts) > 1: + first_name = parts[0] + last_name = ' '.join(parts[1:]) + else: + last_name = full_name + first_name = '' + + contact_info = { + "first_name": first_name, + "last_name": last_name, + "email": lead_dict['email'], + "job_title": meta.get('role', role), + "role": None, # Set to None so CE can use its RoleMappingService + "is_primary": True + } + + # 3. CE Workflow + result = handle_company_workflow(company_name, contact_info=contact_info) + + # 4. Save results + enrichment_data = { + "sync_status": result.get("status"), + "ce_id": result.get("data", {}).get("id") if result.get("data") else None, + "message": result.get("message", "Manual sync successful"), + "ce_data": result.get("data") + } + + update_lead_enrichment(lead_id, enrichment_data, status='synced') + return result + def run_sync(): """ - Haupt-Synchronisationsprozess. + Haupt-Synchronisationsprozess (Batch). Holt alle neuen Leads und stößt den Company Explorer Workflow für jeden an. """ # Hole nur die Leads, die wirklich neu sind und noch nicht verarbeitet wurden diff --git a/lead-engine/repair_leads_v2.py b/lead-engine/repair_leads_v2.py new file mode 100644 index 00000000..532afdf8 --- /dev/null +++ b/lead-engine/repair_leads_v2.py @@ -0,0 +1,40 @@ +import sqlite3 +import json +import re +import os +import sys + +# Add path to import db +sys.path.append(os.path.dirname(__file__)) +from db import get_leads, update_lead_metadata + +def parse_names(html_body): + data = {} + # Extract Vorname and Nachname from HTML if possible + v_match = re.search(r'>\s*Vorname:\s*

.*?]*>(.*?)

', html_body, re.DOTALL | re.IGNORECASE) + n_match = re.search(r'>\s*Nachname:\s*

.*?]*>(.*?)

', html_body, re.DOTALL | re.IGNORECASE) + + if v_match: data['contact_first'] = re.sub(r'<[^>]+>', '', v_match.group(1)).strip() + if n_match: data['contact_last'] = re.sub(r'<[^>]+>', '', n_match.group(1)).strip() + return data + +def repair_names(): + leads = get_leads() + count = 0 + for lead in leads: + meta = json.loads(lead['lead_metadata']) if lead['lead_metadata'] else {} + + # Only repair if names are missing in meta + if not meta.get('contact_first'): + raw_body = lead.get('raw_body', '') + if raw_body: + name_data = parse_names(raw_body) + if name_data: + meta.update(name_data) + update_lead_metadata(lead['id'], meta) + print(f"Fixed names for {lead['company_name']}: {name_data}") + count += 1 + print(f"Finished. Repaired {count} lead names.") + +if __name__ == "__main__": + repair_names()