41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
import sqlite3
|
|
import json
|
|
import re
|
|
import os
|
|
import sys
|
|
|
|
# Add path to import db
|
|
sys.path.append(os.path.dirname(__file__))
|
|
from db import get_leads, update_lead_metadata
|
|
|
|
def parse_names(html_body):
|
|
data = {}
|
|
# Extract Vorname and Nachname from HTML if possible
|
|
v_match = re.search(r'>\s*Vorname:\s*</p>.*?<p[^>]*>(.*?)</p>', html_body, re.DOTALL | re.IGNORECASE)
|
|
n_match = re.search(r'>\s*Nachname:\s*</p>.*?<p[^>]*>(.*?)</p>', html_body, re.DOTALL | re.IGNORECASE)
|
|
|
|
if v_match: data['contact_first'] = re.sub(r'<[^>]+>', '', v_match.group(1)).strip()
|
|
if n_match: data['contact_last'] = re.sub(r'<[^>]+>', '', n_match.group(1)).strip()
|
|
return data
|
|
|
|
def repair_names():
|
|
leads = get_leads()
|
|
count = 0
|
|
for lead in leads:
|
|
meta = json.loads(lead['lead_metadata']) if lead['lead_metadata'] else {}
|
|
|
|
# Only repair if names are missing in meta
|
|
if not meta.get('contact_first'):
|
|
raw_body = lead.get('raw_body', '')
|
|
if raw_body:
|
|
name_data = parse_names(raw_body)
|
|
if name_data:
|
|
meta.update(name_data)
|
|
update_lead_metadata(lead['id'], meta)
|
|
print(f"Fixed names for {lead['company_name']}: {name_data}")
|
|
count += 1
|
|
print(f"Finished. Repaired {count} lead names.")
|
|
|
|
if __name__ == "__main__":
|
|
repair_names()
|