[31388f42] Final session polish: Refined UI, improved ingest parsing, and completed documentation
This commit is contained in:
@@ -8,22 +8,40 @@ from enrich import run_sync, refresh_ce_data, sync_single_lead
|
||||
from generate_reply import generate_email_draft
|
||||
|
||||
def clean_html_to_text(html_content):
|
||||
"""Simple helper to convert HTML email body to readable plain text."""
|
||||
"""Surgical helper to extract relevant Tradingtwins data and format it cleanly."""
|
||||
if not html_content:
|
||||
return ""
|
||||
# Remove head and style tags entirely
|
||||
|
||||
# 1. Strip head and style
|
||||
clean = re.sub(r'<head.*?>.*?</head>', '', html_content, flags=re.DOTALL | re.IGNORECASE)
|
||||
clean = re.sub(r'<style.*?>.*?</style>', '', clean, flags=re.DOTALL | re.IGNORECASE)
|
||||
# Replace <br> and </p> with newlines
|
||||
|
||||
# 2. Extract the core data block (from 'Datum:' until the matchmaking plug)
|
||||
# We look for the first 'Datum:' label
|
||||
start_match = re.search(r'Datum:', clean, re.IGNORECASE)
|
||||
end_match = re.search(r'Kennen Sie schon Ihr persönliches Konto', clean, re.IGNORECASE)
|
||||
|
||||
if start_match:
|
||||
start_pos = start_match.start()
|
||||
end_pos = end_match.start() if end_match else len(clean)
|
||||
clean = clean[start_pos:end_pos]
|
||||
|
||||
# 3. Format Table Structure: </td><td> should be a space/tab, </tr> a newline
|
||||
# This prevents the "Label on one line, value on next" issue
|
||||
clean = re.sub(r'</td>\s*<td.*?>', ' ', clean, flags=re.IGNORECASE)
|
||||
clean = re.sub(r'</tr>', '\n', clean, flags=re.IGNORECASE)
|
||||
|
||||
# 4. Standard Cleanup
|
||||
clean = re.sub(r'<br\s*/?>', '\n', clean, flags=re.IGNORECASE)
|
||||
clean = re.sub(r'</p>', '\n', clean, flags=re.IGNORECASE)
|
||||
# Remove all other tags
|
||||
clean = re.sub(r'<.*?>', '', clean)
|
||||
# Decode some common entities
|
||||
clean = clean.replace(' ', ' ').replace('&', '&').replace('"', '"')
|
||||
# Cleanup multiple newlines
|
||||
clean = re.sub(r'\n\s*\n+', '\n\n', clean).strip()
|
||||
return clean
|
||||
|
||||
# 5. Entity Decoding
|
||||
clean = clean.replace(' ', ' ').replace('&', '&').replace('"', '"').replace('>', '>')
|
||||
|
||||
# 6. Final Polish: remove empty lines and leading/trailing whitespace
|
||||
lines = [line.strip() for line in clean.split('\n') if line.strip()]
|
||||
return '\n'.join(lines)
|
||||
|
||||
st.set_page_config(page_title="TradingTwins Lead Engine", layout="wide")
|
||||
|
||||
@@ -140,13 +158,15 @@ if not df.empty:
|
||||
if meta.get('is_low_quality'):
|
||||
st.warning("⚠️ **Low Quality Lead detected** (Free-mail or missing company).")
|
||||
|
||||
# --- SECTION 1: LEAD INFO (2 Columns) ---
|
||||
st.markdown("### 📋 Lead Data")
|
||||
c1, c2 = st.columns(2)
|
||||
# --- SECTION 1: LEAD INFO & INTELLIGENCE ---
|
||||
col_lead, col_intel = st.columns(2)
|
||||
|
||||
with c1:
|
||||
with col_lead:
|
||||
st.markdown("### 📋 Lead Data")
|
||||
st.write(f"**Salutation:** {meta.get('salutation', '-')}")
|
||||
st.write(f"**Contact:** {row['contact_name']}")
|
||||
st.write(f"**Email:** {row['email']}")
|
||||
st.write(f"**Phone:** {meta.get('phone', row.get('phone', '-'))}")
|
||||
|
||||
role = meta.get('role')
|
||||
if role:
|
||||
@@ -158,58 +178,56 @@ if not df.empty:
|
||||
found_role = enrich_contact_role(row)
|
||||
if found_role: st.success(f"Found: {found_role}"); st.rerun()
|
||||
else: st.error("No role found.")
|
||||
|
||||
with c2:
|
||||
|
||||
st.write(f"**Area:** {meta.get('area', '-')}")
|
||||
st.write(f"**Purpose:** {meta.get('purpose', '-')}")
|
||||
st.write(f"**Functions:** {meta.get('cleaning_functions', '-')}")
|
||||
st.write(f"**Location:** {meta.get('zip', '')} {meta.get('city', '')}")
|
||||
|
||||
with st.expander("Original Body Preview"):
|
||||
st.text(clean_html_to_text(row['raw_body']))
|
||||
if st.checkbox("Show HTML", key=f"raw_{row['id']}"):
|
||||
st.code(row['raw_body'], language="html")
|
||||
|
||||
st.divider()
|
||||
|
||||
# --- SECTION 2: INTELLIGENCE (CE) ---
|
||||
st.markdown("### 🔍 Intelligence (CE)")
|
||||
enrichment = json.loads(row['enrichment_data']) if row['enrichment_data'] else {}
|
||||
ce_id = enrichment.get('ce_id')
|
||||
|
||||
if ce_id:
|
||||
st.success(f"✅ Linked to Company Explorer (ID: {ce_id})")
|
||||
ce_data = enrichment.get('ce_data', {})
|
||||
with col_intel:
|
||||
st.markdown("### 🔍 Intelligence (CE)")
|
||||
enrichment = json.loads(row['enrichment_data']) if row['enrichment_data'] else {}
|
||||
ce_id = enrichment.get('ce_id')
|
||||
|
||||
vertical = ce_data.get('industry_ai') or ce_data.get('vertical')
|
||||
summary = ce_data.get('research_dossier') or ce_data.get('summary')
|
||||
|
||||
intel_col1, intel_col2 = st.columns([1, 2])
|
||||
with intel_col1:
|
||||
if ce_id:
|
||||
st.success(f"✅ Linked to Company Explorer (ID: {ce_id})")
|
||||
ce_data = enrichment.get('ce_data', {})
|
||||
|
||||
vertical = ce_data.get('industry_ai') or ce_data.get('vertical')
|
||||
summary = ce_data.get('research_dossier') or ce_data.get('summary')
|
||||
|
||||
if vertical and vertical != 'None':
|
||||
st.info(f"**Industry:** {vertical}")
|
||||
else:
|
||||
st.warning("Industry Analysis pending...")
|
||||
|
||||
if summary:
|
||||
with st.expander("Show AI Research Dossier", expanded=True):
|
||||
st.write(summary)
|
||||
|
||||
if st.button("🔄 Refresh CE Data", key=f"refresh_{row['id']}"):
|
||||
with st.spinner("Fetching..."):
|
||||
refresh_ce_data(row['id'], ce_id)
|
||||
st.rerun()
|
||||
|
||||
with intel_col2:
|
||||
if summary:
|
||||
with st.expander("Show AI Research Dossier", expanded=True):
|
||||
st.write(summary)
|
||||
else:
|
||||
st.warning("⚠️ Not synced with Company Explorer yet")
|
||||
if st.button("🚀 Sync to Company Explorer", key=f"sync_single_{row['id']}"):
|
||||
with st.spinner("Syncing..."):
|
||||
sync_single_lead(row['id'])
|
||||
st.rerun()
|
||||
else:
|
||||
st.warning("⚠️ Not synced with Company Explorer yet")
|
||||
if st.button("🚀 Sync to Company Explorer", key=f"sync_single_{row['id']}"):
|
||||
with st.spinner("Syncing..."):
|
||||
sync_single_lead(row['id'])
|
||||
st.rerun()
|
||||
|
||||
st.divider()
|
||||
|
||||
# --- SECTION 3: RESPONSE DRAFT ---
|
||||
st.markdown("### ✉️ Response Draft")
|
||||
# --- SECTION 2: ORIGINAL EMAIL ---
|
||||
with st.expander("✉️ View Original Email Content"):
|
||||
st.text(clean_html_to_text(row['raw_body']))
|
||||
if st.checkbox("Show Raw HTML", key=f"raw_{row['id']}"):
|
||||
st.code(row['raw_body'], language="html")
|
||||
|
||||
st.divider()
|
||||
|
||||
# --- SECTION 3: RESPONSE DRAFT (Full Width) ---
|
||||
st.markdown("### 📝 Response Draft")
|
||||
if row['status'] != 'new' and ce_id:
|
||||
if st.button("✨ Generate Expert Reply", key=f"gen_{row['id']}", type="primary"):
|
||||
with st.spinner("Writing email..."):
|
||||
|
||||
Reference in New Issue
Block a user