From 6e768a092b49efe72895ec8311b2030d6dfe19a7 Mon Sep 17 00:00:00 2001 From: Floke Date: Tue, 8 Apr 2025 19:39:55 +0000 Subject: [PATCH] bugfix --- brancheneinstufung.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/brancheneinstufung.py b/brancheneinstufung.py index 7852790f..53d0c738 100644 --- a/brancheneinstufung.py +++ b/brancheneinstufung.py @@ -60,6 +60,47 @@ def retry_on_failure(func): return wrapper # ==================== LOGGING & HELPER FUNCTIONS ==================== + +def serp_website_lookup(company_name): + """ + Ermittelt über SERPAPI (Google-Suche) die Website zum Unternehmen. + - Verwendet als Query den Firmennamen. + - Filtert Ergebnisse anhand einer Blacklist (z.B. bloomberg.com, northdata.de, finanzen.net, handelsblatt.com). + + Returns: + Die gefundene Website-URL oder "k.A.", falls kein passendes Ergebnis gefunden wurde. + """ + # Blacklist unerwünschter Domains + blacklist = ["bloomberg.com", "northdata.de", "finanzen.net", "handelsblatt.com"] + try: + with open("serpApiKey.txt", "r") as f: + serp_key = f.read().strip() + except Exception as e: + debug_print(f"Fehler beim Lesen des SerpAPI-Schlüssels: {e}") + return "k.A." + + query = f"{company_name} Website" + params = { + "engine": "google", + "q": query, + "api_key": serp_key, + "hl": "de" + } + try: + response = requests.get("https://serpapi.com/search", params=params, timeout=10) + data = response.json() + if "organic_results" in data: + for result in data["organic_results"]: + url = result.get("link", "") + # Filtere Ergebnisse, die in der Blacklist stehen + if url and not any(bad in url for bad in blacklist): + debug_print(f"SERP-Website Lookup: Gefundene Website '{url}' für {company_name}") + return url + return "k.A." + except Exception as e: + debug_print(f"Fehler beim SERP-API Website Lookup für {company_name}: {e}") + return "k.A." + def create_log_filename(mode): now = datetime.now().strftime("%d-%m-%Y_%H-%M") ver_short = Config.VERSION.replace(".", "")