Brancheneinstufung2/company-explorer/backend/scripts/debug_frauenrath.py

import logging
import sys
import os

# Setup paths
# sys.path.append("/app") # No longer needed, running from correct dir

# Mock settings for standalone run
os.environ["GEMINI_API_KEY"] = "dummy" # The real one is loaded from file in config.py, hope it works
os.environ["SERP_API_KEY"] = "dummy"

# Correct relative imports
from ..services.scraping import ScraperService
from ..config import settings

# Enable logging
logging.basicConfig(level=logging.INFO)

def debug_scrape():
    url = "https://frauenrath.de/"
    print(f"Scraping {url}...")

    scraper = ScraperService()
    # We need the real API key for LLM extraction to work
    if not settings.GEMINI_API_KEY:
        print("ERROR: GEMINI_API_KEY not found in settings! Ensure it's in .env or a file.")
        return

    result = scraper.scrape_url(url)

    print("\n--- RESULT ---")
    print(f"Title: {result.get('title')}")

    imp = result.get('impressum')
    if imp:
        print("\n--- IMPRESSUM DATA ---")
        print(imp)
    else:
        print("\n--- NO IMPRESSUM DATA ---")

if __name__ == "__main__":
    debug_scrape()