Files
Brancheneinstufung2/company-explorer/backend/scripts/debug_frauenrath.py
Floke 44b78bd6b6 fix(company-explorer): enhance impressum scraping debug logging
- Increased logging verbosity in  to track raw input to LLM and raw LLM response.
- This helps diagnose why Impressum data extraction might be failing for specific company websites.
2026-01-08 16:14:01 +01:00

42 lines
1.1 KiB
Python

import logging
import sys
import os
# Setup paths
# sys.path.append("/app") # No longer needed, running from correct dir
# Mock settings for standalone run
os.environ["GEMINI_API_KEY"] = "dummy" # The real one is loaded from file in config.py, hope it works
os.environ["SERP_API_KEY"] = "dummy"
# Correct relative imports
from ..services.scraping import ScraperService
from ..config import settings
# Enable logging
logging.basicConfig(level=logging.INFO)
def debug_scrape():
url = "https://frauenrath.de/"
print(f"Scraping {url}...")
scraper = ScraperService()
# We need the real API key for LLM extraction to work
if not settings.GEMINI_API_KEY:
print("ERROR: GEMINI_API_KEY not found in settings! Ensure it's in .env or a file.")
return
result = scraper.scrape_url(url)
print("\n--- RESULT ---")
print(f"Title: {result.get('title')}")
imp = result.get('impressum')
if imp:
print("\n--- IMPRESSUM DATA ---")
print(imp)
else:
print("\n--- NO IMPRESSUM DATA ---")
if __name__ == "__main__":
debug_scrape()