bugfix
This commit is contained in:
@@ -3367,15 +3367,33 @@ class DataProcessor:
|
|||||||
debug_print(f" -> Nutze vorhandene URL aus Spalte M: {url_to_parse}")
|
debug_print(f" -> Nutze vorhandene URL aus Spalte M: {url_to_parse}")
|
||||||
new_wiki_data_extracted = self.wiki_scraper.extract_company_data(url_to_parse)
|
new_wiki_data_extracted = self.wiki_scraper.extract_company_data(url_to_parse)
|
||||||
else:
|
else:
|
||||||
# --- Nur wenn M leer/ungültig ist, starte die Suche ---
|
|
||||||
debug_print(f" -> Spalte M ('{url_to_parse}') ungültig/leer. Starte Wiki-Suche...")
|
debug_print(f" -> Spalte M ('{url_to_parse}') ungültig/leer. Starte Wiki-Suche...")
|
||||||
# ... (Suchlogik wie zuvor, nutzt crm_wiki_url als Fallback) ...
|
|
||||||
valid_crm_wiki_url = crm_wiki_url if crm_wiki_url and crm_wiki_url.strip() not in ["", "k.A."] else None
|
valid_crm_wiki_url = crm_wiki_url if crm_wiki_url and crm_wiki_url.strip() not in ["", "k.A."] else None
|
||||||
article_page = None; current_website = website_url if website_url and website_url != 'k.A.' else original_website
|
article_page = None # Initialisiere article_page
|
||||||
if valid_crm_wiki_url: # ... (Prüfe CRM Vorschlag) ...
|
current_website_for_validation = website_url if website_url and website_url != 'k.A.' else original_website
|
||||||
else: article_page = self.wiki_scraper.search_company_article(company_name, current_website)
|
|
||||||
if article_page: new_wiki_data_extracted = self.wiki_scraper.extract_company_data(article_page.url)
|
# --- KORREKTE EINRÜCKUNG HIER ---
|
||||||
else: new_wiki_data_extracted = {'url': 'Kein Artikel gefunden', 'first_paragraph': 'k.A.', 'branche': 'k.A.', 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.'}
|
if valid_crm_wiki_url:
|
||||||
|
debug_print(f" -> Prüfe CRM Vorschlag L: {valid_crm_wiki_url}")
|
||||||
|
page = self.wiki_scraper._fetch_page_content(valid_crm_wiki_url.split('/')[-1])
|
||||||
|
if page and self.wiki_scraper._validate_article(page, company_name, current_website_for_validation):
|
||||||
|
article_page = page
|
||||||
|
else:
|
||||||
|
debug_print(f" -> CRM Vorschlag L nicht validiert. Starte Suche...")
|
||||||
|
# Wenn CRM-Vorschlag nicht validiert, Suche trotzdem starten
|
||||||
|
article_page = self.wiki_scraper.search_company_article(company_name, current_website_for_validation)
|
||||||
|
else:
|
||||||
|
# --- DIESE ZEILE IST JETZT KORREKT EINGERÜCKT UNTER DEM ELSE ---
|
||||||
|
debug_print(f" -> Kein CRM Vorschlag L. Starte Suche...")
|
||||||
|
article_page = self.wiki_scraper.search_company_article(company_name, current_website_for_validation)
|
||||||
|
# --- ENDE KORREKTE EINRÜCKUNG ---
|
||||||
|
|
||||||
|
if article_page:
|
||||||
|
debug_print(f" -> Artikel gefunden durch Suche: {article_page.url}")
|
||||||
|
new_wiki_data_extracted = self.wiki_scraper.extract_company_data(article_page.url)
|
||||||
|
else:
|
||||||
|
debug_print(f" -> Kein passender Wikipedia Artikel durch Suche gefunden.")
|
||||||
|
new_wiki_data_extracted = {'url': 'Kein Artikel gefunden', 'first_paragraph': 'k.A.', 'branche': 'k.A.', 'umsatz': 'k.A.', 'mitarbeiter': 'k.A.', 'categories': 'k.A.'}
|
||||||
|
|
||||||
# --- WICHTIG: Überschreibe wiki_data mit den NEUEN Ergebnissen ---
|
# --- WICHTIG: Überschreibe wiki_data mit den NEUEN Ergebnissen ---
|
||||||
if new_wiki_data_extracted:
|
if new_wiki_data_extracted:
|
||||||
|
|||||||
Reference in New Issue
Block a user