bugfix
This commit is contained in:
@@ -246,12 +246,12 @@ def serp_website_lookup(company_name):
|
|||||||
"""
|
"""
|
||||||
Ermittelt über SERPAPI (Google-Suche) die Website zum Unternehmen.
|
Ermittelt über SERPAPI (Google-Suche) die Website zum Unternehmen.
|
||||||
- Verwendet als Query den Firmennamen.
|
- Verwendet als Query den Firmennamen.
|
||||||
- Filtert Ergebnisse anhand einer Blacklist (z.B. bloomberg.com, northdata.de).
|
- Filtert Ergebnisse anhand einer Blacklist (z.B. bloomberg.com, northdata.de, finanzen.net, handelsblatt.com).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Die gefundene Website-URL oder "k.A.", falls kein passendes Ergebnis gefunden wurde.
|
Die gefundene Website-URL oder "k.A.", falls kein passendes Ergebnis gefunden wurde.
|
||||||
"""
|
"""
|
||||||
# Blacklist von Domains, die wir nicht verwenden wollen
|
# Blacklist unerwünschter Domains
|
||||||
blacklist = ["bloomberg.com", "northdata.de", "finanzen.net", "handelsblatt.com"]
|
blacklist = ["bloomberg.com", "northdata.de", "finanzen.net", "handelsblatt.com"]
|
||||||
try:
|
try:
|
||||||
with open("serpApiKey.txt", "r") as f:
|
with open("serpApiKey.txt", "r") as f:
|
||||||
@@ -267,14 +267,15 @@ def serp_website_lookup(company_name):
|
|||||||
"api_key": serp_key,
|
"api_key": serp_key,
|
||||||
"hl": "de"
|
"hl": "de"
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.get("https://serpapi.com/search", params=params, timeout=10)
|
response = requests.get("https://serpapi.com/search", params=params, timeout=10)
|
||||||
data = response.json()
|
data = response.json()
|
||||||
if "organic_results" in data:
|
if "organic_results" in data:
|
||||||
for result in data["organic_results"]:
|
for result in data["organic_results"]:
|
||||||
# Extrahiere URL und prüfe, ob sie in der Blacklist enthalten ist
|
|
||||||
url = result.get("link", "")
|
url = result.get("link", "")
|
||||||
if url and not any(black_item in url for black_item in blacklist):
|
# Überprüfe, ob die URL nicht in der Blacklist enthalten ist
|
||||||
|
if url and not any(bad in url for bad in blacklist):
|
||||||
debug_print(f"SERP-Website Lookup: Gefundene Website '{url}' für {company_name}")
|
debug_print(f"SERP-Website Lookup: Gefundene Website '{url}' für {company_name}")
|
||||||
return url
|
return url
|
||||||
return "k.A."
|
return "k.A."
|
||||||
@@ -283,6 +284,7 @@ def serp_website_lookup(company_name):
|
|||||||
return "k.A."
|
return "k.A."
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ==================== NEUE FUNKTION: process_verification_only ====================
|
# ==================== NEUE FUNKTION: process_verification_only ====================
|
||||||
def process_verification_only():
|
def process_verification_only():
|
||||||
debug_print("Starte Verifizierungsmodus (Modus 51) im Batch-Prozess...")
|
debug_print("Starte Verifizierungsmodus (Modus 51) im Batch-Prozess...")
|
||||||
|
|||||||
Reference in New Issue
Block a user