[31388f42] Feature: Integrate Roboplanet Contact Forms into Lead Engine

This commit integrates the Roboplanet website contact form submissions into the Lead Engine, allowing them to be processed alongside TradingTwins leads. Key changes: - **Database Schema Update (db.py):** Added a new source column to the leads table for tracking lead origin (TradingTwins or Website-Formular). Includes a migration check to safely add the column. - **Improved HTML Parsing (ingest.py):** Refined the `parse_roboplanet_form` function to accurately extract data from the specific HTML structure of Roboplanet contact form emails. - **Enhanced Ingestion Logic (trading_twins_ingest.py):** - Renamed `fetch_tradingtwins_emails` to `fetch_new_leads_emails` and updated it to fetch emails from both lead sources. - Modified `process_leads` to dynamically select the correct parser based on email subject. - Ensured `source` field is correctly populated and `is_low_quality` checks are applied for both lead types. - **UI Enhancement (app.py):** Updated the Streamlit UI to visually distinguish lead types with icons and improved the "Low Quality Lead" warning message. This feature enables a unified processing pipeline for different lead sources and provides better visibility in the Lead Engine dashboard.
2026-03-02 19:19:01 +00:00
parent 04013920ee
commit efaa43858d
4 changed files with 94 additions and 45 deletions
--- a/lead-engine/ingest.py
+++ b/lead-engine/ingest.py
@@ -28,38 +28,44 @@ def parse_tradingtwins_email(body):
    data['raw_body'] = body
    return data

-def parse_roboplanet_form(body):
+def parse_roboplanet_form(html_body):
    """
    Parses the Roboplanet website contact form (HTML format).
-    Example: <b>Vorname:</b> BÄKO <br><b>Nachname:</b> eG <br><b>Email:</b> Alexander.Grau@baeko-hr.de ...
+    Example: <b>Vorname:</b> Gordana <br><b>Nachname:</b> Dumitrovic <br>...
    """
    data = {}
    
-    # Helper to strip HTML tags if needed, but we'll use regex on the content
-    patterns = {
-        'contact_first': r'Vorname:</b>\s*(.*?)\s*<br>',
-        'contact_last': r'Nachname:</b>\s*(.*?)\s*<br>',
-        'email': r'Email:</b>\s*(.*?)\s*<br>',
-        'phone': r'Telefon:</b>\s*(.*?)\s*<br>',
-        'company': r'Firma:</b>\s*(.*?)\s*<br>',
-        'zip': r'PLZ:</b>\s*(.*?)\s*<br>',
-        'message': r'Nachricht:</b>\s*(.*?)\s*(?:<br>|--|$)'
+    # Map label names in HTML to our keys
+    field_map = {
+        'Vorname': 'contact_first', 
+        'Nachname': 'contact_last', 
+        'Email': 'email',
+        'Telefon': 'phone',
+        'Firma': 'company',
+        'PLZ': 'zip',
+        'Nachricht': 'message'
    }
    
-    for key, pattern in patterns.items():
-        # Use re.DOTALL for message if it spans lines, but usually it's one block
-        match = re.search(pattern, body, re.IGNORECASE | re.DOTALL)
+    for label, key in field_map.items():
+        # Pattern: <b>Label:</b> Value <br>
+        pattern = fr'<b>{re.escape(label)}:</b>\s*(.*?)\s*<br>'
+        match = re.search(pattern, html_body, re.DOTALL | re.IGNORECASE)
        if match:
-            # Clean HTML tags from the captured value if any
-            val = re.sub(r'<.*?>', '', match.group(1)).strip()
-            data[key] = val
+            raw_val = match.group(1).strip()
+            clean_val = re.sub(r'<[^>]+>', '', raw_val).strip() # Clean any leftover HTML tags
+            data[key] = clean_val
            
-    # Combine names
-    if 'contact_first' in data and 'contact_last' in data:
+    # Composite fields
+    if data.get('contact_first') and data.get('contact_last'):
        data['contact'] = f"{data['contact_first']} {data['contact_last']}"
-    
+
    # For Roboplanet forms, we use the timestamp as ID or a hash if missing
-    data['raw_body'] = body
+    # We need to ensure 'id' is present for db.py compatibility
+    if not data.get('source_id'):
+         data['source_id'] = f"rp_unknown_{int(datetime.now().timestamp())}"
+    data['id'] = data['source_id']
+    
+    data['raw_body'] = html_body
    return data

 def ingest_mock_leads():