130 lines
4.5 KiB
Python
130 lines
4.5 KiB
Python
import os
|
|
import requests
|
|
import re
|
|
from dotenv import load_dotenv
|
|
|
|
# Try loading .env only if file exists (Local Dev), otherwise rely on Docker Env
|
|
env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '.env'))
|
|
if os.path.exists(env_path):
|
|
load_dotenv(dotenv_path=env_path, override=True)
|
|
|
|
SERP_API_KEY = os.getenv("SERP_API")
|
|
|
|
if not SERP_API_KEY:
|
|
print(f"DEBUG: SERP_API not found in environment.")
|
|
|
|
import json
|
|
|
|
# --- Helper: Get Gemini Key ---
|
|
def get_gemini_key():
|
|
candidates = [
|
|
"gemini_api_key.txt", # Current dir
|
|
"/app/gemini_api_key.txt", # Docker default
|
|
os.path.join(os.path.dirname(__file__), "gemini_api_key.txt"), # Script dir
|
|
os.path.join(os.path.dirname(os.path.dirname(__file__)), 'gemini_api_key.txt') # Parent dir
|
|
]
|
|
|
|
for path in candidates:
|
|
if os.path.exists(path):
|
|
try:
|
|
with open(path, 'r') as f:
|
|
return f.read().strip()
|
|
except:
|
|
pass
|
|
|
|
return os.getenv("GEMINI_API_KEY")
|
|
|
|
def extract_role_with_llm(name, company, search_results):
|
|
"""Uses Gemini to identify the job title from search snippets."""
|
|
api_key = get_gemini_key()
|
|
if not api_key: return None
|
|
|
|
context = "\n".join([f"- {r.get('title')}: {r.get('snippet')}" for r in search_results])
|
|
|
|
prompt = f"""
|
|
Analyze these Google Search results to identify the professional role of "{name}" at "{company}".
|
|
|
|
SEARCH RESULTS:
|
|
{context}
|
|
|
|
TASK:
|
|
Extract the professional Job Title / Role.
|
|
Look for:
|
|
- Management: "Geschäftsführer", "Vorstand", "CFO", "Mitglied der Klinikleitung"
|
|
- Department Heads: "Leiter", "Bereichsleitung", "Head of", "Pflegedienstleitung"
|
|
- Specialized: "Arzt", "Ingenieur", "Einkäufer"
|
|
|
|
RULES:
|
|
1. Extract the most specific and senior current role.
|
|
2. Return ONLY the role string (e.g. "Bereichsleitung Patientenmanagement").
|
|
3. Maximum length: 60 characters.
|
|
4. If no role is found, return "Unbekannt".
|
|
"""
|
|
|
|
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"
|
|
try:
|
|
response = requests.post(url, headers={'Content-Type': 'application/json'}, json={"contents": [{"parts": [{"text": prompt}]}]})
|
|
if response.status_code == 200:
|
|
role = response.json()['candidates'][0]['content']['parts'][0]['text'].strip()
|
|
# Remove markdown formatting if any
|
|
role = role.replace('**', '').replace('"', '').rstrip('.')
|
|
return None if "Unbekannt" in role else role
|
|
else:
|
|
print(f"DEBUG: Gemini API Error {response.status_code}: {response.text}")
|
|
except Exception as e:
|
|
print(f"DEBUG: Gemini API Exception: {e}")
|
|
return None
|
|
|
|
def lookup_person_role(name, company):
|
|
"""
|
|
Searches for a person's role via SerpAPI and extracts it using LLM.
|
|
Uses a multi-step search strategy to find the best snippets.
|
|
"""
|
|
if not SERP_API_KEY:
|
|
print("Error: SERP_API key not found in .env")
|
|
return None
|
|
|
|
# Step 1: Highly specific search
|
|
queries = [
|
|
f'site:linkedin.com "{name}" "{company}"',
|
|
f'"{name}" "{company}" position',
|
|
f'{name} {company}'
|
|
]
|
|
|
|
all_results = []
|
|
for query in queries:
|
|
params = {
|
|
"engine": "google",
|
|
"q": query,
|
|
"api_key": SERP_API_KEY,
|
|
"num": 3,
|
|
"hl": "de",
|
|
"gl": "de"
|
|
}
|
|
|
|
try:
|
|
response = requests.get("https://serpapi.com/search", params=params)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
results = data.get("organic_results", [])
|
|
if results:
|
|
all_results.extend(results)
|
|
# If we have good results, we don't necessarily need more searches
|
|
if len(all_results) >= 3:
|
|
break
|
|
except Exception as e:
|
|
print(f"SerpAPI lookup failed for query '{query}': {e}")
|
|
|
|
if not all_results:
|
|
return None
|
|
|
|
# Delegate extraction to LLM with the best results found
|
|
return extract_role_with_llm(name, company, all_results)
|
|
|
|
if __name__ == "__main__":
|
|
# Test cases
|
|
print(f"Markus Drees: {lookup_person_role('Markus Drees', 'Ärztehaus Rünthe')}")
|
|
print(f"Georg Stahl: {lookup_person_role('Georg Stahl', 'Klemm Bohrtechnik GmbH')}")
|
|
print(f"Steve Trüby: {lookup_person_role('Steve Trüby', 'RehaKlinikum Bad Säckingen GmbH')}")
|