import os import sys import re import logging import requests import json from datetime import datetime from dotenv import load_dotenv # Ensure we can import from root directory sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Import from root modules try: from company_explorer_connector import handle_company_workflow except ImportError: # Fallback/Mock for testing if run in isolation without full env def handle_company_workflow(company_name): return {"status": "mock", "data": {"name": company_name, "id": "mock-id"}} # Configuration load_dotenv(override=True) CLIENT_ID = os.getenv("INFO_Application_ID") TENANT_ID = os.getenv("INFO_Tenant_ID") CLIENT_SECRET = os.getenv("INFO_Secret") USER_EMAIL = "info@robo-planet.de" # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def get_access_token(): url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token" data = { "client_id": CLIENT_ID, "scope": "https://graph.microsoft.com/.default", "client_secret": CLIENT_SECRET, "grant_type": "client_credentials" } response = requests.post(url, data=data) response.raise_for_status() return response.json().get("access_token") def fetch_tradingtwins_emails(token, limit=20): url = f"https://graph.microsoft.com/v1.0/users/{USER_EMAIL}/messages" headers = { "Authorization": f"Bearer {token}", "Content-Type": "application/json" } # Filter for Tradingtwins subject params = { "$top": limit, "$select": "id,subject,receivedDateTime,body", "$orderby": "receivedDateTime desc" } response = requests.get(url, headers=headers, params=params) if response.status_code != 200: logger.error(f"Graph API Error: {response.status_code} - {response.text}") return [] all_msgs = response.json().get("value", []) # Filter strictly for the subject pattern we saw return [m for m in all_msgs if "Neue Anfrage zum Thema Roboter" in m.get('subject', '')] def parse_tradingtwins_html(html_body): """ Extracts data from the Tradingtwins HTML table structure. Pattern:
Label:
...Value
""" data = {} # Map label names in HTML to our keys field_map = { 'Firma': 'company', 'Vorname': 'first_name', 'Nachname': 'last_name', 'E-Mail': 'email', 'Rufnummer': 'phone', 'Einsatzzweck': 'purpose', 'Reinigungs-Fläche': 'area', 'PLZ': 'zip', 'Stadt': 'city', 'Lead-ID': 'lead_id' } for label, key in field_map.items(): # Regex explanation: # >\s*{label}:\s* -> Finds the label inside a p tag, ending with colon # .*? -> Non-greedy match for table cell closing/opening #]*> -> Finds the start of the value paragraph # (.*?) -> Captures the value #
-> Ends at closing paragraph tag pattern = fr'>\s*{re.escape(label)}:\s*.*?]*>(.*?)
' match = re.search(pattern, html_body, re.DOTALL | re.IGNORECASE) if match: # Clean up the value (remove HTML tags inside if any, though usually plain text) raw_val = match.group(1).strip() # Remove any link tags if present (e.g. for email/phone) clean_val = re.sub(r'<[^>]+>', '', raw_val).strip() data[key] = clean_val # Composite fields if data.get('first_name') and data.get('last_name'): data['contact_name'] = f"{data['first_name']} {data['last_name']}" return data def process_leads(): try: token = get_access_token() emails = fetch_tradingtwins_emails(token) logger.info(f"Found {len(emails)} Tradingtwins emails.") for email in emails: body = email.get('body', {}).get('content', '') lead_data = parse_tradingtwins_html(body) company_name = lead_data.get('company') if not company_name or company_name == '-': # Fallback if company is empty (sometimes happens with private persons) # Use contact name as company name company_name = lead_data.get('contact_name') if not company_name: logger.warning(f"Skipping email {email['id']}: No company or contact name found.") continue logger.info(f"Processing Lead: {company_name} (ID: {lead_data.get('lead_id')})") # Trigger Company Explorer Workflow # Note: In a real scenario, we might want to check if we already processed this message ID # to avoid duplicates. For now, we rely on the Company Explorer's deduplication. logger.info(f" -> Triggering Company Explorer for '{company_name}'...") result = handle_company_workflow(company_name) logger.info(f" -> Result: {result.get('status')} (ID: {result.get('data', {}).get('id')})") except Exception as e: logger.error(f"Error in process_leads: {e}") if __name__ == "__main__": process_leads()