[31388f42] Final session polish: Refined UI, improved ingest parsing, and completed documentation

This commit is contained in:
2026-03-02 15:10:12 +00:00
parent aa38c555d8
commit ee2dfd5b00
10 changed files with 171 additions and 224 deletions

View File

@@ -8,22 +8,40 @@ from enrich import run_sync, refresh_ce_data, sync_single_lead
from generate_reply import generate_email_draft
def clean_html_to_text(html_content):
"""Simple helper to convert HTML email body to readable plain text."""
"""Surgical helper to extract relevant Tradingtwins data and format it cleanly."""
if not html_content:
return ""
# Remove head and style tags entirely
# 1. Strip head and style
clean = re.sub(r'<head.*?>.*?</head>', '', html_content, flags=re.DOTALL | re.IGNORECASE)
clean = re.sub(r'<style.*?>.*?</style>', '', clean, flags=re.DOTALL | re.IGNORECASE)
# Replace <br> and </p> with newlines
# 2. Extract the core data block (from 'Datum:' until the matchmaking plug)
# We look for the first 'Datum:' label
start_match = re.search(r'Datum:', clean, re.IGNORECASE)
end_match = re.search(r'Kennen Sie schon Ihr persönliches Konto', clean, re.IGNORECASE)
if start_match:
start_pos = start_match.start()
end_pos = end_match.start() if end_match else len(clean)
clean = clean[start_pos:end_pos]
# 3. Format Table Structure: </td><td> should be a space/tab, </tr> a newline
# This prevents the "Label on one line, value on next" issue
clean = re.sub(r'</td>\s*<td.*?>', ' ', clean, flags=re.IGNORECASE)
clean = re.sub(r'</tr>', '\n', clean, flags=re.IGNORECASE)
# 4. Standard Cleanup
clean = re.sub(r'<br\s*/?>', '\n', clean, flags=re.IGNORECASE)
clean = re.sub(r'</p>', '\n', clean, flags=re.IGNORECASE)
# Remove all other tags
clean = re.sub(r'<.*?>', '', clean)
# Decode some common entities
clean = clean.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&quot;', '"')
# Cleanup multiple newlines
clean = re.sub(r'\n\s*\n+', '\n\n', clean).strip()
return clean
# 5. Entity Decoding
clean = clean.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&quot;', '"').replace('&gt;', '>')
# 6. Final Polish: remove empty lines and leading/trailing whitespace
lines = [line.strip() for line in clean.split('\n') if line.strip()]
return '\n'.join(lines)
st.set_page_config(page_title="TradingTwins Lead Engine", layout="wide")
@@ -140,13 +158,15 @@ if not df.empty:
if meta.get('is_low_quality'):
st.warning("⚠️ **Low Quality Lead detected** (Free-mail or missing company).")
# --- SECTION 1: LEAD INFO (2 Columns) ---
st.markdown("### 📋 Lead Data")
c1, c2 = st.columns(2)
# --- SECTION 1: LEAD INFO & INTELLIGENCE ---
col_lead, col_intel = st.columns(2)
with c1:
with col_lead:
st.markdown("### 📋 Lead Data")
st.write(f"**Salutation:** {meta.get('salutation', '-')}")
st.write(f"**Contact:** {row['contact_name']}")
st.write(f"**Email:** {row['email']}")
st.write(f"**Phone:** {meta.get('phone', row.get('phone', '-'))}")
role = meta.get('role')
if role:
@@ -158,58 +178,56 @@ if not df.empty:
found_role = enrich_contact_role(row)
if found_role: st.success(f"Found: {found_role}"); st.rerun()
else: st.error("No role found.")
with c2:
st.write(f"**Area:** {meta.get('area', '-')}")
st.write(f"**Purpose:** {meta.get('purpose', '-')}")
st.write(f"**Functions:** {meta.get('cleaning_functions', '-')}")
st.write(f"**Location:** {meta.get('zip', '')} {meta.get('city', '')}")
with st.expander("Original Body Preview"):
st.text(clean_html_to_text(row['raw_body']))
if st.checkbox("Show HTML", key=f"raw_{row['id']}"):
st.code(row['raw_body'], language="html")
st.divider()
# --- SECTION 2: INTELLIGENCE (CE) ---
st.markdown("### 🔍 Intelligence (CE)")
enrichment = json.loads(row['enrichment_data']) if row['enrichment_data'] else {}
ce_id = enrichment.get('ce_id')
if ce_id:
st.success(f"✅ Linked to Company Explorer (ID: {ce_id})")
ce_data = enrichment.get('ce_data', {})
with col_intel:
st.markdown("### 🔍 Intelligence (CE)")
enrichment = json.loads(row['enrichment_data']) if row['enrichment_data'] else {}
ce_id = enrichment.get('ce_id')
vertical = ce_data.get('industry_ai') or ce_data.get('vertical')
summary = ce_data.get('research_dossier') or ce_data.get('summary')
intel_col1, intel_col2 = st.columns([1, 2])
with intel_col1:
if ce_id:
st.success(f"✅ Linked to Company Explorer (ID: {ce_id})")
ce_data = enrichment.get('ce_data', {})
vertical = ce_data.get('industry_ai') or ce_data.get('vertical')
summary = ce_data.get('research_dossier') or ce_data.get('summary')
if vertical and vertical != 'None':
st.info(f"**Industry:** {vertical}")
else:
st.warning("Industry Analysis pending...")
if summary:
with st.expander("Show AI Research Dossier", expanded=True):
st.write(summary)
if st.button("🔄 Refresh CE Data", key=f"refresh_{row['id']}"):
with st.spinner("Fetching..."):
refresh_ce_data(row['id'], ce_id)
st.rerun()
with intel_col2:
if summary:
with st.expander("Show AI Research Dossier", expanded=True):
st.write(summary)
else:
st.warning("⚠️ Not synced with Company Explorer yet")
if st.button("🚀 Sync to Company Explorer", key=f"sync_single_{row['id']}"):
with st.spinner("Syncing..."):
sync_single_lead(row['id'])
st.rerun()
else:
st.warning("⚠️ Not synced with Company Explorer yet")
if st.button("🚀 Sync to Company Explorer", key=f"sync_single_{row['id']}"):
with st.spinner("Syncing..."):
sync_single_lead(row['id'])
st.rerun()
st.divider()
# --- SECTION 3: RESPONSE DRAFT ---
st.markdown("### ✉️ Response Draft")
# --- SECTION 2: ORIGINAL EMAIL ---
with st.expander("✉️ View Original Email Content"):
st.text(clean_html_to_text(row['raw_body']))
if st.checkbox("Show Raw HTML", key=f"raw_{row['id']}"):
st.code(row['raw_body'], language="html")
st.divider()
# --- SECTION 3: RESPONSE DRAFT (Full Width) ---
st.markdown("### 📝 Response Draft")
if row['status'] != 'new' and ce_id:
if st.button("✨ Generate Expert Reply", key=f"gen_{row['id']}", type="primary"):
with st.spinner("Writing email..."):