Files
Brancheneinstufung2/lead-engine/repair_leads_v2.py

41 lines
1.4 KiB
Python

import sqlite3
import json
import re
import os
import sys
# Add path to import db
sys.path.append(os.path.dirname(__file__))
from db import get_leads, update_lead_metadata
def parse_names(html_body):
data = {}
# Extract Vorname and Nachname from HTML if possible
v_match = re.search(r'>\s*Vorname:\s*</p>.*?<p[^>]*>(.*?)</p>', html_body, re.DOTALL | re.IGNORECASE)
n_match = re.search(r'>\s*Nachname:\s*</p>.*?<p[^>]*>(.*?)</p>', html_body, re.DOTALL | re.IGNORECASE)
if v_match: data['contact_first'] = re.sub(r'<[^>]+>', '', v_match.group(1)).strip()
if n_match: data['contact_last'] = re.sub(r'<[^>]+>', '', n_match.group(1)).strip()
return data
def repair_names():
leads = get_leads()
count = 0
for lead in leads:
meta = json.loads(lead['lead_metadata']) if lead['lead_metadata'] else {}
# Only repair if names are missing in meta
if not meta.get('contact_first'):
raw_body = lead.get('raw_body', '')
if raw_body:
name_data = parse_names(raw_body)
if name_data:
meta.update(name_data)
update_lead_metadata(lead['id'], meta)
print(f"Fixed names for {lead['company_name']}: {name_data}")
count += 1
print(f"Finished. Repaired {count} lead names.")
if __name__ == "__main__":
repair_names()