diff --git a/brancheneinstufung.py b/brancheneinstufung.py index ac1f31f7..6e9bf2b3 100644 --- a/brancheneinstufung.py +++ b/brancheneinstufung.py @@ -15,7 +15,7 @@ from difflib import SequenceMatcher from lxml import html as lh # === KONFIGURATION === -VERSION = "1.0.5-xpath" +VERSION = "1.0.8-wiki-api" LANG = "de" CREDENTIALS = "service_account.json" SHEET_URL = "https://docs.google.com/spreadsheets/d/1u_gHr9JUfmV1-iviRzbSe3575QEp7KLhK5jFV_gJcgo" @@ -55,8 +55,8 @@ def parse_infobox_xpath(html_text): branche = "k.A." umsatz = "k.A." try: - branche_xpath = doc.xpath("//table[contains(@class, 'infobox')]//tr[th[contains(text(), 'Branche')]]/td/text()") - umsatz_xpath = doc.xpath("//table[contains(@class, 'infobox')]//tr[th[contains(translate(text(),'UMSATZ','umsatz'), 'umsatz')]]/td/text()") + branche_xpath = doc.xpath("//table[contains(@class, 'infobox')]//tr[th[contains(normalize-space(), 'Branche') or contains(normalize-space(), 'Tätigkeitsfeld')]]/td/text()") + umsatz_xpath = doc.xpath("//table[contains(@class, 'infobox')]//tr[th[contains(translate(normalize-space(), 'UMSATZ', 'umsatz'), 'umsatz')]]/td/text()") if branche_xpath: branche = branche_xpath[0].strip() if umsatz_xpath: