bugfix
This commit is contained in:
@@ -3367,15 +3367,33 @@ class DataProcessor:
|
||||
debug_print(f" -> Nutze vorhandene URL aus Spalte M: {url_to_parse}")
|
||||
new_wiki_data_extracted = self.wiki_scraper.extract_company_data(url_to_parse)
|
||||
else:
|
||||
# --- Nur wenn M leer/ungültig ist, starte die Suche ---
|
||||
debug_print(f" -> Spalte M ('{url_to_parse}') ungültig/leer. Starte Wiki-Suche...")
|
||||
# ... (Suchlogik wie zuvor, nutzt crm_wiki_url als Fallback) ...
|
||||
valid_crm_wiki_url = crm_wiki_url if crm_wiki_url and crm_wiki_url.strip() not in ["", "k.A."] else None
|
||||
article_page = None; current_website = website_url if website_url and website_url != 'k.A.' else original_website
|
||||
if valid_crm_wiki_url: # ... (Prüfe CRM Vorschlag) ...
|
||||
else: article_page = self.wiki_scraper.search_company_article(company_name, current_website)
|
||||
if article_page: new_wiki_data_extracted = self.wiki_scraper.extract_company_data(article_page.url)
|
||||
else: new_wiki_data_extracted = {'url': 'Kein Artikel gefunden', 'first_paragraph': 'k.A.', 'branche': 'k.A.', 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.'}
|
||||
article_page = None # Initialisiere article_page
|
||||
current_website_for_validation = website_url if website_url and website_url != 'k.A.' else original_website
|
||||
|
||||
# --- KORREKTE EINRÜCKUNG HIER ---
|
||||
if valid_crm_wiki_url:
|
||||
debug_print(f" -> Prüfe CRM Vorschlag L: {valid_crm_wiki_url}")
|
||||
page = self.wiki_scraper._fetch_page_content(valid_crm_wiki_url.split('/')[-1])
|
||||
if page and self.wiki_scraper._validate_article(page, company_name, current_website_for_validation):
|
||||
article_page = page
|
||||
else:
|
||||
debug_print(f" -> CRM Vorschlag L nicht validiert. Starte Suche...")
|
||||
# Wenn CRM-Vorschlag nicht validiert, Suche trotzdem starten
|
||||
article_page = self.wiki_scraper.search_company_article(company_name, current_website_for_validation)
|
||||
else:
|
||||
# --- DIESE ZEILE IST JETZT KORREKT EINGERÜCKT UNTER DEM ELSE ---
|
||||
debug_print(f" -> Kein CRM Vorschlag L. Starte Suche...")
|
||||
article_page = self.wiki_scraper.search_company_article(company_name, current_website_for_validation)
|
||||
# --- ENDE KORREKTE EINRÜCKUNG ---
|
||||
|
||||
if article_page:
|
||||
debug_print(f" -> Artikel gefunden durch Suche: {article_page.url}")
|
||||
new_wiki_data_extracted = self.wiki_scraper.extract_company_data(article_page.url)
|
||||
else:
|
||||
debug_print(f" -> Kein passender Wikipedia Artikel durch Suche gefunden.")
|
||||
new_wiki_data_extracted = {'url': 'Kein Artikel gefunden', 'first_paragraph': 'k.A.', 'branche': 'k.A.', 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.'}
|
||||
|
||||
# --- WICHTIG: Überschreibe wiki_data mit den NEUEN Ergebnissen ---
|
||||
if new_wiki_data_extracted:
|
||||
|
||||
Reference in New Issue
Block a user