60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
import sqlite3
|
|
import json
|
|
import re
|
|
import os
|
|
import sys
|
|
|
|
# Add path to import db
|
|
sys.path.append(os.path.dirname(__file__))
|
|
from db import get_leads, update_lead_metadata, init_db
|
|
|
|
def parse_tradingtwins_html_local(html_body):
|
|
"""
|
|
Extracts data from the Tradingtwins HTML table structure.
|
|
Copied logic to ensure independence.
|
|
"""
|
|
data = {}
|
|
field_map = {
|
|
'Einsatzzweck': 'purpose',
|
|
'Reinigungs-Fläche': 'area',
|
|
'PLZ': 'zip',
|
|
'Stadt': 'city'
|
|
}
|
|
|
|
for label, key in field_map.items():
|
|
pattern = fr'>\s*{re.escape(label)}:\s*</p>.*?<p[^>]*>(.*?)</p>'
|
|
match = re.search(pattern, html_body, re.DOTALL | re.IGNORECASE)
|
|
if match:
|
|
raw_val = match.group(1).strip()
|
|
clean_val = re.sub(r'<[^>]+>', '', raw_val).strip()
|
|
data[key] = clean_val
|
|
return data
|
|
|
|
def repair_database():
|
|
print("Initializing DB (migrating schema if needed)...")
|
|
init_db()
|
|
|
|
leads = get_leads()
|
|
print(f"Found {len(leads)} leads to check.")
|
|
|
|
count = 0
|
|
for lead in leads:
|
|
# Check if metadata is missing or empty
|
|
current_meta = lead.get('lead_metadata')
|
|
if not current_meta or current_meta == '{}' or current_meta == 'null':
|
|
print(f"Repairing Lead {lead['id']} ({lead['company_name']})...")
|
|
|
|
raw_body = lead.get('raw_body', '')
|
|
if raw_body:
|
|
extracted = parse_tradingtwins_html_local(raw_body)
|
|
update_lead_metadata(lead['id'], extracted)
|
|
print(f" -> Extracted: {extracted}")
|
|
count += 1
|
|
else:
|
|
print(" -> No raw body found.")
|
|
|
|
print(f"Repaired {count} leads.")
|
|
|
|
if __name__ == "__main__":
|
|
repair_database()
|