dealfront_enrichment.py aktualisiert

This commit is contained in:
2025-07-14 07:56:33 +00:00
parent 908563a2a5
commit b29bbdfd09

View File

@@ -2,6 +2,8 @@ import os
import json import json
import time import time
import logging import logging
import tempfile
import shutil
import pandas as pd import pandas as pd
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
@@ -39,10 +41,14 @@ class DealfrontScraper:
chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920,1200") chrome_options.add_argument("--window-size=1920,1200")
# TEMP DIR für User Data (verhindert Konflikte)
self._tmpdir = tempfile.mkdtemp()
chrome_options.add_argument(f"--user-data-dir={self._tmpdir}")
try: try:
self.driver = webdriver.Chrome(options=chrome_options) self.driver = webdriver.Chrome(options=chrome_options)
except Exception as e: except Exception as e:
logger.critical("WebDriver konnte nicht initialisiert werden.", exc_info=True) logger.critical("WebDriver konnte nicht initialisiert werden.", exc_info=True)
shutil.rmtree(self._tmpdir, ignore_errors=True)
raise raise
self.wait = WebDriverWait(self.driver, 30) self.wait = WebDriverWait(self.driver, 30)
self.username, self.password = self._load_credentials() self.username, self.password = self._load_credentials()
@@ -241,8 +247,10 @@ class DealfrontScraper:
return all_companies return all_companies
def close(self): def close(self):
if self.driver: if hasattr(self, "driver") and self.driver:
self.driver.quit() self.driver.quit()
if hasattr(self, "_tmpdir"):
shutil.rmtree(self._tmpdir, ignore_errors=True)
if __name__ == "__main__": if __name__ == "__main__":
scraper = None scraper = None