fix(company-explorer): enhance impressum scraping debug logging

- Increased logging verbosity in to track raw input to LLM and raw LLM response. - This helps diagnose why Impressum data extraction might be failing for specific company websites.
2026-01-08 13:42:22 +00:00
parent 2314664796
commit 2281e69763
4 changed files with 53 additions and 4 deletions
--- a/company-explorer/backend/scripts/debug_frauenrath.py
+++ b/company-explorer/backend/scripts/debug_frauenrath.py
@@ -0,0 +1,42 @@
+import logging
+import sys
+import os
+
+# Setup paths
+# sys.path.append("/app") # No longer needed, running from correct dir
+
+# Mock settings for standalone run
+os.environ["GEMINI_API_KEY"] = "dummy" # The real one is loaded from file in config.py, hope it works
+os.environ["SERP_API_KEY"] = "dummy"
+
+# Correct relative imports
+from ..services.scraping import ScraperService
+from ..config import settings
+
+# Enable logging
+logging.basicConfig(level=logging.INFO)
+
+def debug_scrape():
+    url = "https://frauenrath.de/"
+    print(f"Scraping {url}...")
+    
+    scraper = ScraperService()
+    # We need the real API key for LLM extraction to work
+    if not settings.GEMINI_API_KEY:
+        print("ERROR: GEMINI_API_KEY not found in settings! Ensure it's in .env or a file.")
+        return
+
+    result = scraper.scrape_url(url)
+    
+    print("\n--- RESULT ---")
+    print(f"Title: {result.get('title')}")
+    
+    imp = result.get('impressum')
+    if imp:
+        print("\n--- IMPRESSUM DATA ---")
+        print(imp)
+    else:
+        print("\n--- NO IMPRESSUM DATA ---")
+
+if __name__ == "__main__":
+    debug_scrape()