import os import requests import re from dotenv import load_dotenv # Try loading .env only if file exists (Local Dev), otherwise rely on Docker Env env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '.env')) if os.path.exists(env_path): load_dotenv(dotenv_path=env_path, override=True) SERP_API_KEY = os.getenv("SERP_API") if not SERP_API_KEY: print(f"DEBUG: SERP_API not found in environment.") import json # --- Helper: Get Gemini Key --- def get_gemini_key(): candidates = [ "gemini_api_key.txt", # Current dir "/app/gemini_api_key.txt", # Docker default os.path.join(os.path.dirname(__file__), "gemini_api_key.txt"), # Script dir os.path.join(os.path.dirname(os.path.dirname(__file__)), 'gemini_api_key.txt') # Parent dir ] for path in candidates: if os.path.exists(path): try: with open(path, 'r') as f: return f.read().strip() except: pass return os.getenv("GEMINI_API_KEY") def extract_role_with_llm(name, company, search_results): """Uses Gemini to identify the job title from search snippets.""" api_key = get_gemini_key() if not api_key: return None context = "\n".join([f"- {r.get('title')}: {r.get('snippet')}" for r in search_results]) prompt = f""" Analyze these Google Search results to identify the professional role of "{name}" at "{company}". SEARCH RESULTS: {context} TASK: Extract the professional Job Title / Role. Look for: - Management: "Geschäftsführer", "Vorstand", "CFO", "Mitglied der Klinikleitung" - Department Heads: "Leiter", "Bereichsleitung", "Head of", "Pflegedienstleitung" - Specialized: "Arzt", "Ingenieur", "Einkäufer" RULES: 1. Extract the most specific and senior current role. 2. Return ONLY the role string (e.g. "Bereichsleitung Patientenmanagement"). 3. Maximum length: 60 characters. 4. If no role is found, return "Unbekannt". """ url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}" try: response = requests.post(url, headers={'Content-Type': 'application/json'}, json={"contents": [{"parts": [{"text": prompt}]}]}) if response.status_code == 200: role = response.json()['candidates'][0]['content']['parts'][0]['text'].strip() # Remove markdown formatting if any role = role.replace('**', '').replace('"', '').rstrip('.') return None if "Unbekannt" in role else role else: print(f"DEBUG: Gemini API Error {response.status_code}: {response.text}") except Exception as e: print(f"DEBUG: Gemini API Exception: {e}") return None def lookup_person_role(name, company): """ Searches for a person's role via SerpAPI and extracts it using LLM. Uses a multi-step search strategy to find the best snippets. """ if not SERP_API_KEY: print("Error: SERP_API key not found in .env") return None # Step 1: Highly specific search queries = [ f'site:linkedin.com "{name}" "{company}"', f'"{name}" "{company}" position', f'{name} {company}' ] all_results = [] for query in queries: params = { "engine": "google", "q": query, "api_key": SERP_API_KEY, "num": 3, "hl": "de", "gl": "de" } try: response = requests.get("https://serpapi.com/search", params=params) response.raise_for_status() data = response.json() results = data.get("organic_results", []) if results: all_results.extend(results) # If we have good results, we don't necessarily need more searches if len(all_results) >= 3: break except Exception as e: print(f"SerpAPI lookup failed for query '{query}': {e}") if not all_results: return None # Delegate extraction to LLM with the best results found return extract_role_with_llm(name, company, all_results) if __name__ == "__main__": # Test cases print(f"Markus Drees: {lookup_person_role('Markus Drees', 'Ärztehaus Rünthe')}") print(f"Georg Stahl: {lookup_person_role('Georg Stahl', 'Klemm Bohrtechnik GmbH')}") print(f"Steve Trüby: {lookup_person_role('Steve Trüby', 'RehaKlinikum Bad Säckingen GmbH')}")