bugfix
This commit is contained in:
@@ -1272,12 +1272,77 @@ def count_linkedin_contacts(company_name, website, position_query):
|
|||||||
debug_print(f"Fehler bei der SerpAPI-Suche (Count): {e}")
|
debug_print(f"Fehler bei der SerpAPI-Suche (Count): {e}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# ==================== MAIN-FUNKTION ====================
|
# ----------------- DataProcessor-Klasse inklusive neuer SERP-API Website Lookup-Methode -----------------
|
||||||
|
class DataProcessor:
|
||||||
|
def __init__(self):
|
||||||
|
self.sheet_handler = GoogleSheetHandler()
|
||||||
|
self.wiki_scraper = WikipediaScraper()
|
||||||
|
|
||||||
|
def process_serp_website_lookup(self):
|
||||||
|
debug_print("Starte SERP-API Website Lookup für alle Zeilen ohne CRM-Website (Spalte D).")
|
||||||
|
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
|
||||||
|
# Prüfe, ob in Spalte D (Index 3) bereits ein Website-Wert vorhanden ist
|
||||||
|
current_website = row[3] if len(row) > 3 else ""
|
||||||
|
if current_website.strip() == "":
|
||||||
|
company_name = row[1] if len(row) > 1 else ""
|
||||||
|
new_website = serp_website_lookup(company_name)
|
||||||
|
if new_website != "k.A.":
|
||||||
|
self.sheet_handler.sheet.update(values=[[new_website]], range_name=f"D{i}")
|
||||||
|
debug_print(f"Zeile {i}: Neue Website gefunden und in Spalte D eingetragen: {new_website}")
|
||||||
|
else:
|
||||||
|
debug_print(f"Zeile {i}: Keine Website gefunden für {company_name}.")
|
||||||
|
time.sleep(Config.RETRY_DELAY)
|
||||||
|
else:
|
||||||
|
debug_print(f"Zeile {i}: CRM-Website bereits vorhanden, überspringe.")
|
||||||
|
|
||||||
|
def process_rows(self, num_rows=None):
|
||||||
|
global MODE
|
||||||
|
if MODE == "1":
|
||||||
|
self.process_rows_complete() # Vollständige Verarbeitung (sofern implementiert)
|
||||||
|
elif MODE == "11":
|
||||||
|
# Re-Evaluation markierter Zeilen (nur "x" in Spalte A)
|
||||||
|
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
|
||||||
|
if row[0].strip().lower() == "x":
|
||||||
|
self._process_single_row(i, row)
|
||||||
|
elif MODE == "21":
|
||||||
|
# Website-Scraping Testmodus: Nur Website-Rohtext & Zusammenfassung extrahieren
|
||||||
|
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
|
||||||
|
self._process_single_row(i, row, process_wiki=False, process_chatgpt=False)
|
||||||
|
elif MODE == "22":
|
||||||
|
# SERP-API Website Lookup: Füllt Spalte D, wenn leer, mit dem SERP-Ergebnis
|
||||||
|
self.process_serp_website_lookup()
|
||||||
|
elif MODE == "31":
|
||||||
|
# Nur ChatGPT-Auswertung: Alle ChatGPT-Routinen werden ausgeführt (ohne Wiki und Website)
|
||||||
|
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
|
||||||
|
self._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
|
||||||
|
elif MODE == "41":
|
||||||
|
# Nur Wikipedia-Scraping
|
||||||
|
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
|
||||||
|
self._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
|
||||||
|
elif MODE == "51":
|
||||||
|
process_verification_only()
|
||||||
|
elif MODE == "6":
|
||||||
|
process_contact_research()
|
||||||
|
elif MODE == "8":
|
||||||
|
process_batch_token_count()
|
||||||
|
else:
|
||||||
|
start_index = self.sheet_handler.get_start_index()
|
||||||
|
print(f"Starte bei Zeile {start_index+1}")
|
||||||
|
rows_processed = 0
|
||||||
|
for i, row in enumerate(self.sheet_handler.sheet_values[1:], start=2):
|
||||||
|
if i < start_index:
|
||||||
|
continue
|
||||||
|
if num_rows is not None and rows_processed >= num_rows:
|
||||||
|
break
|
||||||
|
self._process_single_row(i, row)
|
||||||
|
rows_processed += 1
|
||||||
|
|
||||||
|
# ----------------- Main-Funktion -----------------
|
||||||
def main():
|
def main():
|
||||||
global MODE, LOG_FILE
|
global MODE, LOG_FILE
|
||||||
print("Bitte wählen Sie den Betriebsmodus:")
|
print("Bitte wählen Sie den Betriebsmodus:")
|
||||||
print("1: Vollständige Verarbeitung (alle Funktionen)")
|
print("1: Vollständige Verarbeitung (alle Funktionen)")
|
||||||
print("11: Re-Evaluation markierter Zeilen (nur 'x' in Spalte A)")
|
print("11: Re-Evaluation markierter Zeilen (nur 'x' in Spalte A)")
|
||||||
print("21: Website-Scraping Testmodus (nur Website-Rohtext & Zusammenfassung)")
|
print("21: Website-Scraping Testmodus (nur Website-Rohtext & Zusammenfassung)")
|
||||||
print("22: SERP-API Website Lookup (nur Website-Daten ermitteln)")
|
print("22: SERP-API Website Lookup (nur Website-Daten ermitteln)")
|
||||||
print("31: Nur ChatGPT-Auswertung (alle ChatGPT-Routinen)")
|
print("31: Nur ChatGPT-Auswertung (alle ChatGPT-Routinen)")
|
||||||
@@ -1301,30 +1366,23 @@ def main():
|
|||||||
if MODE == "1":
|
if MODE == "1":
|
||||||
dp.process_rows() # Vollständige Verarbeitung
|
dp.process_rows() # Vollständige Verarbeitung
|
||||||
elif MODE == "11":
|
elif MODE == "11":
|
||||||
# Re-Evaluation markierter Zeilen (nur "x" in Spalte A)
|
|
||||||
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
||||||
if row[0].strip().lower() == "x":
|
if row[0].strip().lower() == "x":
|
||||||
dp._process_single_row(i, row)
|
dp._process_single_row(i, row)
|
||||||
elif MODE == "21":
|
elif MODE == "21":
|
||||||
# Website-Scraping Testmodus: Nur Website-Rohtext & Zusammenfassung extrahieren
|
|
||||||
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
||||||
dp._process_single_row(i, row, process_wiki=False, process_chatgpt=False)
|
dp._process_single_row(i, row, process_wiki=False, process_chatgpt=False)
|
||||||
elif MODE == "22":
|
elif MODE == "22":
|
||||||
# SERP-API Website Lookup: Überprüft jede Zeile, ob in Spalte D keine Website vorhanden ist,
|
|
||||||
# und sucht dann via SERP-API nach einer Website, die in Spalte D eingetragen wird.
|
|
||||||
dp.process_serp_website_lookup()
|
dp.process_serp_website_lookup()
|
||||||
elif MODE == "31":
|
elif MODE == "31":
|
||||||
# Nur ChatGPT-Auswertung: Alle ChatGPT-Routinen (ohne Wikipedia und Website) werden ausgeführt.
|
|
||||||
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
||||||
dp._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
|
dp._process_single_row(i, row, process_wiki=False, process_chatgpt=True)
|
||||||
elif MODE == "41":
|
elif MODE == "41":
|
||||||
# Nur Wikipedia-Scraping
|
|
||||||
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
||||||
dp._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
|
dp._process_single_row(i, row, process_wiki=True, process_chatgpt=False)
|
||||||
elif MODE == "51":
|
elif MODE == "51":
|
||||||
process_verification_only()
|
process_verification_only()
|
||||||
elif MODE == "6":
|
elif MODE == "6":
|
||||||
# Contact Research (LinkedIn)
|
|
||||||
process_contact_research()
|
process_contact_research()
|
||||||
elif MODE == "8":
|
elif MODE == "8":
|
||||||
process_batch_token_count()
|
process_batch_token_count()
|
||||||
@@ -1335,7 +1393,7 @@ def main():
|
|||||||
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
for i, row in enumerate(dp.sheet_handler.sheet_values[1:], start=2):
|
||||||
if i < start_index:
|
if i < start_index:
|
||||||
continue
|
continue
|
||||||
if rows_processed >= 1: # Hier kann die Anzahl angepasst werden.
|
if rows_processed >= 1:
|
||||||
break
|
break
|
||||||
dp._process_single_row(i, row)
|
dp._process_single_row(i, row)
|
||||||
rows_processed += 1
|
rows_processed += 1
|
||||||
|
|||||||
Reference in New Issue
Block a user