Files
Brancheneinstufung2/lead-engine/lookup_role.py

116 lines
3.9 KiB
Python

import os
import requests
import re
from dotenv import load_dotenv
# Try loading .env only if file exists (Local Dev), otherwise rely on Docker Env
env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '.env'))
if os.path.exists(env_path):
load_dotenv(dotenv_path=env_path, override=True)
SERP_API_KEY = os.getenv("SERP_API")
if not SERP_API_KEY:
print(f"DEBUG: SERP_API not found in environment.")
import json
# --- Helper: Get Gemini Key ---
def get_gemini_key():
candidates = [
"gemini_api_key.txt", # Current dir
"/app/gemini_api_key.txt", # Docker default
os.path.join(os.path.dirname(__file__), "gemini_api_key.txt"), # Script dir
os.path.join(os.path.dirname(os.path.dirname(__file__)), 'gemini_api_key.txt') # Parent dir
]
for path in candidates:
if os.path.exists(path):
try:
with open(path, 'r') as f:
return f.read().strip()
except:
pass
return os.getenv("GEMINI_API_KEY")
def extract_role_with_llm(name, company, search_results):
"""Uses Gemini to identify the job title from search snippets."""
api_key = get_gemini_key()
if not api_key: return None
context = "\n".join([f"- {r.get('title')}: {r.get('snippet')}" for r in search_results])
prompt = f"""
Analyze these Google Search results to identify the professional role of "{name}" at "{company}".
SEARCH RESULTS:
{context}
TASK:
Extract the exact Job Title / Role. Look for terms like "Geschäftsführer", "CEO", "CFO", "Leiter", "Head of", "Manager", "Inhaber", "Arzt".
RULES:
1. If multiple roles appear (e.g. "CFO & CEO"), pick the most senior one current role.
2. Return ONLY the role string. No full sentences.
3. If absolutely no role is mentioned in the snippets, return "Unbekannt".
Example Input: "Georg Stahl ... CFO at KLEMM..."
Example Output: CFO
"""
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"
try:
response = requests.post(url, headers={'Content-Type': 'application/json'}, json={"contents": [{"parts": [{"text": prompt}]}]})
if response.status_code == 200:
role = response.json()['candidates'][0]['content']['parts'][0]['text'].strip()
# Cleanup: remove punctuation at the end
role = role.rstrip('.')
return None if "Unbekannt" in role else role
else:
print(f"DEBUG: Gemini API Error {response.status_code}: {response.text}")
except Exception as e:
print(f"DEBUG: Gemini API Exception: {e}")
return None
def lookup_person_role(name, company):
"""
Searches for a person's role via SerpAPI and extracts it using LLM.
"""
if not SERP_API_KEY:
print("Error: SERP_API key not found in .env")
return None
# Broad query to find role/position
query = f'{name} {company} Position Job'
params = {
"engine": "google",
"q": query,
"api_key": SERP_API_KEY,
"num": 5,
"hl": "de", # Force German UI
"gl": "de" # Force German Location
}
try:
response = requests.get("https://serpapi.com/search", params=params)
response.raise_for_status()
data = response.json()
organic_results = data.get("organic_results", [])
if not organic_results:
return None
# Delegate extraction to LLM
return extract_role_with_llm(name, company, organic_results)
except Exception as e:
print(f"SerpAPI lookup failed: {e}")
return None
if __name__ == "__main__":
# Test cases
print(f"Markus Drees: {lookup_person_role('Markus Drees', 'Ärztehaus Rünthe')}")
print(f"Georg Stahl: {lookup_person_role('Georg Stahl', 'Klemm Bohrtechnik GmbH')}")