diff --git a/lead-engine/lookup_role.py b/lead-engine/lookup_role.py new file mode 100644 index 00000000..32613b8d --- /dev/null +++ b/lead-engine/lookup_role.py @@ -0,0 +1,113 @@ +import os +import requests +import re +from dotenv import load_dotenv + +# Load env from root +env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '.env')) +load_dotenv(dotenv_path=env_path, override=True) + +SERP_API_KEY = os.getenv("SERP_API") + +if not SERP_API_KEY: + print(f"DEBUG: Failed to load SERP_API from {env_path}") + # Fallback: try reading directly if file exists + try: + with open(env_path, 'r') as f: + for line in f: + if line.startswith('SERP_API='): + SERP_API_KEY = line.split('=')[1].strip().strip('"') + print("DEBUG: Loaded key via manual parsing.") + except: + pass + +import json + +# --- Helper: Get Gemini Key --- +def get_gemini_key(): + try: + key_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'gemini_api_key.txt') + if os.path.exists(key_path): + with open(key_path, 'r') as f: + return f.read().strip() + except: + pass + return os.getenv("GEMINI_API_KEY") + +def extract_role_with_llm(name, company, search_results): + """Uses Gemini to identify the job title from search snippets.""" + api_key = get_gemini_key() + if not api_key: return None + + context = "\n".join([f"- {r.get('title')}: {r.get('snippet')}" for r in search_results]) + + prompt = f""" + Analyze these Google Search results to identify the professional role of "{name}" at "{company}". + + SEARCH RESULTS: + {context} + + TASK: + Extract the exact Job Title / Role. Look for terms like "Geschäftsführer", "CEO", "CFO", "Leiter", "Head of", "Manager", "Inhaber", "Arzt". + + RULES: + 1. If multiple roles appear (e.g. "CFO & CEO"), pick the most senior one current role. + 2. Return ONLY the role string. No full sentences. + 3. If absolutely no role is mentioned in the snippets, return "Unbekannt". + + Example Input: "Georg Stahl ... CFO at KLEMM..." + Example Output: CFO + """ + + url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}" + try: + response = requests.post(url, headers={'Content-Type': 'application/json'}, json={"contents": [{"parts": [{"text": prompt}]}]}) + if response.status_code == 200: + role = response.json()['candidates'][0]['content']['parts'][0]['text'].strip() + # Cleanup: remove punctuation at the end + role = role.rstrip('.') + return None if "Unbekannt" in role else role + except: + pass + return None + +def lookup_person_role(name, company): + """ + Searches for a person's role via SerpAPI and extracts it using LLM. + """ + if not SERP_API_KEY: + print("Error: SERP_API key not found in .env") + return None + + # Broad query to find role/position + query = f'{name} {company} Position Job' + + params = { + "engine": "google", + "q": query, + "api_key": SERP_API_KEY, + "num": 5, + "hl": "de", # Force German UI + "gl": "de" # Force German Location + } + + try: + response = requests.get("https://serpapi.com/search", params=params) + response.raise_for_status() + data = response.json() + + organic_results = data.get("organic_results", []) + if not organic_results: + return None + + # Delegate extraction to LLM + return extract_role_with_llm(name, company, organic_results) + + except Exception as e: + print(f"SerpAPI lookup failed: {e}") + return None + +if __name__ == "__main__": + # Test cases + print(f"Markus Drees: {lookup_person_role('Markus Drees', 'Ärztehaus Rünthe')}") + print(f"Georg Stahl: {lookup_person_role('Georg Stahl', 'Klemm Bohrtechnik GmbH')}")