diff --git a/brancheneinstufung.py b/brancheneinstufung.py index 78dadb89..8a20ec17 100644 --- a/brancheneinstufung.py +++ b/brancheneinstufung.py @@ -103,6 +103,7 @@ def get_wikipedia_data(name, website_hint=""): parts = website_hint.replace("https://", "").replace("http://", "").split(".") if len(parts) > 1: begriffe.append(parts[0]) + for suchbegriff in begriffe: results = wikipedia.search(suchbegriff, results=3) for title in results: @@ -114,6 +115,12 @@ def get_wikipedia_data(name, website_hint=""): html = requests.get(url).text soup = BeautifulSoup(html, 'html.parser') infobox = soup.find("table", class_=["infobox", "infobox vcard"]) + if not infobox: + tables = soup.find_all("table") + for table in tables: + if any("Branche" in (th.text if th else '') for th in table.find_all("th")): + infobox = table + break branche = umsatz = "" if infobox: for row in infobox.find_all("tr"): @@ -183,7 +190,7 @@ for i in range(start, min(start + DURCHLÄUFE, len(sheet_values))): techniker_reason ] - sheet.update(range_name=f"G{i+2}:P{i+2}", values=[values]) + sheet.update(range_name=f"G{i+1}:P{i+1}", values=[values]) time.sleep(5) print("✅ Durchläufe abgeschlossen")