From d08899a7a9f95f990ef8beff72660f362b2cb8e2 Mon Sep 17 00:00:00 2001 From: Floke Date: Mon, 2 Mar 2026 09:52:51 +0000 Subject: [PATCH] [31388f42] Implement hierarchical search strategy for more robust role discovery --- lead-engine/lookup_role.py | 78 ++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/lead-engine/lookup_role.py b/lead-engine/lookup_role.py index 49c36d3e..d3c6743a 100644 --- a/lead-engine/lookup_role.py +++ b/lead-engine/lookup_role.py @@ -48,15 +48,17 @@ def extract_role_with_llm(name, company, search_results): {context} TASK: - Extract the exact Job Title / Role. Look for terms like "Geschäftsführer", "CEO", "CFO", "Leiter", "Head of", "Manager", "Inhaber", "Arzt". + Extract the professional Job Title / Role. + Look for: + - Management: "Geschäftsführer", "Vorstand", "CFO", "Mitglied der Klinikleitung" + - Department Heads: "Leiter", "Bereichsleitung", "Head of", "Pflegedienstleitung" + - Specialized: "Arzt", "Ingenieur", "Einkäufer" RULES: - 1. If multiple roles appear (e.g. "CFO & CEO"), pick the most senior one current role. - 2. Return ONLY the role string. No full sentences. - 3. If absolutely no role is mentioned in the snippets, return "Unbekannt". - - Example Input: "Georg Stahl ... CFO at KLEMM..." - Example Output: CFO + 1. Extract the most specific and senior current role. + 2. Return ONLY the role string (e.g. "Bereichsleitung Patientenmanagement"). + 3. Maximum length: 60 characters. + 4. If no role is found, return "Unbekannt". """ url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}" @@ -64,8 +66,8 @@ def extract_role_with_llm(name, company, search_results): response = requests.post(url, headers={'Content-Type': 'application/json'}, json={"contents": [{"parts": [{"text": prompt}]}]}) if response.status_code == 200: role = response.json()['candidates'][0]['content']['parts'][0]['text'].strip() - # Cleanup: remove punctuation at the end - role = role.rstrip('.') + # Remove markdown formatting if any + role = role.replace('**', '').replace('"', '').rstrip('.') return None if "Unbekannt" in role else role else: print(f"DEBUG: Gemini API Error {response.status_code}: {response.text}") @@ -76,40 +78,52 @@ def extract_role_with_llm(name, company, search_results): def lookup_person_role(name, company): """ Searches for a person's role via SerpAPI and extracts it using LLM. + Uses a multi-step search strategy to find the best snippets. """ if not SERP_API_KEY: print("Error: SERP_API key not found in .env") return None - # Broad query to find role/position - query = f'{name} {company} Position Job' + # Step 1: Highly specific search + queries = [ + f'site:linkedin.com "{name}" "{company}"', + f'"{name}" "{company}" position', + f'{name} {company}' + ] - params = { - "engine": "google", - "q": query, - "api_key": SERP_API_KEY, - "num": 5, - "hl": "de", # Force German UI - "gl": "de" # Force German Location - } + all_results = [] + for query in queries: + params = { + "engine": "google", + "q": query, + "api_key": SERP_API_KEY, + "num": 3, + "hl": "de", + "gl": "de" + } - try: - response = requests.get("https://serpapi.com/search", params=params) - response.raise_for_status() - data = response.json() - - organic_results = data.get("organic_results", []) - if not organic_results: - return None + try: + response = requests.get("https://serpapi.com/search", params=params) + response.raise_for_status() + data = response.json() + + results = data.get("organic_results", []) + if results: + all_results.extend(results) + # If we have good results, we don't necessarily need more searches + if len(all_results) >= 3: + break + except Exception as e: + print(f"SerpAPI lookup failed for query '{query}': {e}") - # Delegate extraction to LLM - return extract_role_with_llm(name, company, organic_results) - - except Exception as e: - print(f"SerpAPI lookup failed: {e}") + if not all_results: return None + # Delegate extraction to LLM with the best results found + return extract_role_with_llm(name, company, all_results) + if __name__ == "__main__": # Test cases print(f"Markus Drees: {lookup_person_role('Markus Drees', 'Ärztehaus Rünthe')}") print(f"Georg Stahl: {lookup_person_role('Georg Stahl', 'Klemm Bohrtechnik GmbH')}") + print(f"Steve Trüby: {lookup_person_role('Steve Trüby', 'RehaKlinikum Bad Säckingen GmbH')}")