import sqlite3 import json import re import os import sys # Add path to import db sys.path.append(os.path.dirname(__file__)) from db import get_leads, update_lead_metadata, init_db def parse_tradingtwins_html_local(html_body): """ Extracts data from the Tradingtwins HTML table structure. Copied logic to ensure independence. """ data = {} field_map = { 'Einsatzzweck': 'purpose', 'Reinigungs-Fläche': 'area', 'PLZ': 'zip', 'Stadt': 'city' } for label, key in field_map.items(): pattern = fr'>\s*{re.escape(label)}:\s*

.*?]*>(.*?)

' match = re.search(pattern, html_body, re.DOTALL | re.IGNORECASE) if match: raw_val = match.group(1).strip() clean_val = re.sub(r'<[^>]+>', '', raw_val).strip() data[key] = clean_val return data def repair_database(): print("Initializing DB (migrating schema if needed)...") init_db() leads = get_leads() print(f"Found {len(leads)} leads to check.") count = 0 for lead in leads: # Check if metadata is missing or empty current_meta = lead.get('lead_metadata') if not current_meta or current_meta == '{}' or current_meta == 'null': print(f"Repairing Lead {lead['id']} ({lead['company_name']})...") raw_body = lead.get('raw_body', '') if raw_body: extracted = parse_tradingtwins_html_local(raw_body) update_lead_metadata(lead['id'], extracted) print(f" -> Extracted: {extracted}") count += 1 else: print(" -> No raw body found.") print(f"Repaired {count} leads.") if __name__ == "__main__": repair_database()