From a8715c8dce4f3f25008fb8a08986999683fdb7db Mon Sep 17 00:00:00 2001
From: Floke Label: Value
Nachname: eG
Email: Alexander.Grau@baeko-hr.de ...
+ """
+ data = {}
+
+ # Helper to strip HTML tags if needed, but we'll use regex on the content
+ patterns = {
+ 'contact_first': r'Vorname:\s*(.*?)\s*
',
+ 'contact_last': r'Nachname:\s*(.*?)\s*
',
+ 'email': r'Email:\s*(.*?)\s*
',
+ 'phone': r'Telefon:\s*(.*?)\s*
',
+ 'company': r'Firma:\s*(.*?)\s*
',
+ 'zip': r'PLZ:\s*(.*?)\s*
',
+ 'message': r'Nachricht:\s*(.*?)\s*(?:
|--|$)'
+ }
+
+ for key, pattern in patterns.items():
+ # Use re.DOTALL for message if it spans lines, but usually it's one block
+ match = re.search(pattern, body, re.IGNORECASE | re.DOTALL)
+ if match:
+ # Clean HTML tags from the captured value if any
+ val = re.sub(r'<.*?>', '', match.group(1)).strip()
+ data[key] = val
+
+ # Combine names
+ if 'contact_first' in data and 'contact_last' in data:
+ data['contact'] = f"{data['contact_first']} {data['contact_last']}"
+
+ # For Roboplanet forms, we use the timestamp as ID or a hash if missing
+ data['raw_body'] = body
+ return data
+
def ingest_mock_leads():
# Mock data from the session context
leads = [
diff --git a/lead-engine/trading_twins_ingest.py b/lead-engine/trading_twins_ingest.py
new file mode 100644
index 00000000..6e6e5987
--- /dev/null
+++ b/lead-engine/trading_twins_ingest.py
@@ -0,0 +1,143 @@
+import os
+import sys
+import re
+import logging
+import requests
+import json
+from datetime import datetime
+from dotenv import load_dotenv
+
+# Ensure we can import from root directory
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+# Import from root modules
+try:
+ from company_explorer_connector import handle_company_workflow
+except ImportError:
+ # Fallback/Mock for testing if run in isolation without full env
+ def handle_company_workflow(company_name):
+ return {"status": "mock", "data": {"name": company_name, "id": "mock-id"}}
+
+# Configuration
+load_dotenv(override=True)
+CLIENT_ID = os.getenv("INFO_Application_ID")
+TENANT_ID = os.getenv("INFO_Tenant_ID")
+CLIENT_SECRET = os.getenv("INFO_Secret")
+USER_EMAIL = "info@robo-planet.de"
+
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+def get_access_token():
+ url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
+ data = {
+ "client_id": CLIENT_ID,
+ "scope": "https://graph.microsoft.com/.default",
+ "client_secret": CLIENT_SECRET,
+ "grant_type": "client_credentials"
+ }
+ response = requests.post(url, data=data)
+ response.raise_for_status()
+ return response.json().get("access_token")
+
+def fetch_tradingtwins_emails(token, limit=20):
+ url = f"https://graph.microsoft.com/v1.0/users/{USER_EMAIL}/messages"
+ headers = {
+ "Authorization": f"Bearer {token}",
+ "Content-Type": "application/json"
+ }
+ # Filter for Tradingtwins subject
+ params = {
+ "$top": limit,
+ "$select": "id,subject,receivedDateTime,body",
+ "$orderby": "receivedDateTime desc"
+ }
+ response = requests.get(url, headers=headers, params=params)
+ if response.status_code != 200:
+ logger.error(f"Graph API Error: {response.status_code} - {response.text}")
+ return []
+
+ all_msgs = response.json().get("value", [])
+ # Filter strictly for the subject pattern we saw
+ return [m for m in all_msgs if "Neue Anfrage zum Thema Roboter" in m.get('subject', '')]
+
+def parse_tradingtwins_html(html_body):
+ """
+ Extracts data from the Tradingtwins HTML table structure.
+ Pattern:
]*> -> Finds the start of the value paragraph + # (.*?) -> Captures the value + #
-> Ends at closing paragraph tag + pattern = fr'>\s*{re.escape(label)}:\s*.*?]*>(.*?)
' + + match = re.search(pattern, html_body, re.DOTALL | re.IGNORECASE) + if match: + # Clean up the value (remove HTML tags inside if any, though usually plain text) + raw_val = match.group(1).strip() + # Remove any link tags if present (e.g. for email/phone) + clean_val = re.sub(r'<[^>]+>', '', raw_val).strip() + data[key] = clean_val + + # Composite fields + if data.get('first_name') and data.get('last_name'): + data['contact_name'] = f"{data['first_name']} {data['last_name']}" + + return data + +def process_leads(): + try: + token = get_access_token() + emails = fetch_tradingtwins_emails(token) + logger.info(f"Found {len(emails)} Tradingtwins emails.") + + for email in emails: + body = email.get('body', {}).get('content', '') + lead_data = parse_tradingtwins_html(body) + + company_name = lead_data.get('company') + if not company_name or company_name == '-': + # Fallback if company is empty (sometimes happens with private persons) + # Use contact name as company name + company_name = lead_data.get('contact_name') + + if not company_name: + logger.warning(f"Skipping email {email['id']}: No company or contact name found.") + continue + + logger.info(f"Processing Lead: {company_name} (ID: {lead_data.get('lead_id')})") + + # Trigger Company Explorer Workflow + # Note: In a real scenario, we might want to check if we already processed this message ID + # to avoid duplicates. For now, we rely on the Company Explorer's deduplication. + logger.info(f" -> Triggering Company Explorer for '{company_name}'...") + result = handle_company_workflow(company_name) + + logger.info(f" -> Result: {result.get('status')} (ID: {result.get('data', {}).get('id')})") + + except Exception as e: + logger.error(f"Error in process_leads: {e}") + +if __name__ == "__main__": + process_leads() diff --git a/trading_twins_tool.py b/trading_twins_tool.py index 773cc18b..1f4df58a 100644 --- a/trading_twins_tool.py +++ b/trading_twins_tool.py @@ -1,6 +1,16 @@ import json import time import os +import sys + +# Ensure we can import from lead-engine +sys.path.append(os.path.join(os.path.dirname(__file__), 'lead-engine')) +try: + from trading_twins_ingest import process_leads +except ImportError: + print("Warning: Could not import trading_twins_ingest from lead-engine. Email ingestion disabled.") + process_leads = None + from company_explorer_connector import handle_company_workflow def run_trading_twins_process(target_company_name: str): @@ -46,6 +56,14 @@ def run_trading_twins_process(target_company_name: str): print(f"Trading Twins Analyse für {target_company_name} abgeschlossen.") print(f"{'='*50}\n") +def run_email_ingest(): + """Starts the automated email ingestion process for Tradingtwins leads.""" + if process_leads: + print("\nStarting automated email ingestion via Microsoft Graph...") + process_leads() + print("Email ingestion completed.") + else: + print("Error: Email ingestion module not available.") if __name__ == "__main__": # Simulieren der Umgebungsvariablen für diesen Testlauf, falls nicht gesetzt @@ -54,26 +72,28 @@ if __name__ == "__main__": if "COMPANY_EXPLORER_API_PASSWORD" not in os.environ: os.environ["COMPANY_EXPLORER_API_PASSWORD"] = "gemini" - # Testfall 1: Ein Unternehmen, das wahrscheinlich bereits existiert - # Da 'Robo-Planet GmbH' bei den vorherigen Läufen erstellt wurde, sollte es jetzt gefunden werden. - run_trading_twins_process("Robo-Planet GmbH") - - # Kurze Pause zwischen den Testläufen - time.sleep(5) - - # Testfall 1b: Ein bekanntes, real existierendes Unternehmen - run_trading_twins_process("Klinikum Landkreis Erding") - - # Kurze Pause zwischen den Testläufen - time.sleep(5) - - # Testfall 2: Ein neues, eindeutiges Unternehmen - new_unique_company_name = f"Trading Twins New Target {int(time.time())}" - run_trading_twins_process(new_unique_company_name) - - # Kurze Pause - time.sleep(5) + print("Trading Twins Tool - Main Menu") + print("1. Process specific company name") + print("2. Ingest leads from Email (info@robo-planet.de)") + print("3. Run demo sequence (Robo-Planet, Erding, etc.)") - # Testfall 3: Ein weiteres neues Unternehmen, um die Erstellung zu prüfen - another_new_company_name = f"Another Demo Corp {int(time.time())}" - run_trading_twins_process(another_new_company_name) + choice = input("\nSelect option (1-3): ").strip() + + if choice == "1": + name = input("Enter company name: ").strip() + if name: + run_trading_twins_process(name) + elif choice == "2": + run_email_ingest() + elif choice == "3": + # Testfall 1: Ein Unternehmen, das wahrscheinlich bereits existiert + run_trading_twins_process("Robo-Planet GmbH") + time.sleep(2) + # Testfall 1b: Ein bekanntes, real existierendes Unternehmen + run_trading_twins_process("Klinikum Landkreis Erding") + time.sleep(2) + # Testfall 2: Ein neues, eindeutiges Unternehmen + new_unique_company_name = f"Trading Twins New Target {int(time.time())}" + run_trading_twins_process(new_unique_company_name) + else: + print("Invalid choice.")