fix(company-explorer): enhance impressum scraping debug logging
- Increased logging verbosity in to track raw input to LLM and raw LLM response. - This helps diagnose why Impressum data extraction might be failing for specific company websites.
This commit is contained in:
42
company-explorer/backend/scripts/debug_frauenrath.py
Normal file
42
company-explorer/backend/scripts/debug_frauenrath.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Setup paths
|
||||
# sys.path.append("/app") # No longer needed, running from correct dir
|
||||
|
||||
# Mock settings for standalone run
|
||||
os.environ["GEMINI_API_KEY"] = "dummy" # The real one is loaded from file in config.py, hope it works
|
||||
os.environ["SERP_API_KEY"] = "dummy"
|
||||
|
||||
# Correct relative imports
|
||||
from ..services.scraping import ScraperService
|
||||
from ..config import settings
|
||||
|
||||
# Enable logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
def debug_scrape():
|
||||
url = "https://frauenrath.de/"
|
||||
print(f"Scraping {url}...")
|
||||
|
||||
scraper = ScraperService()
|
||||
# We need the real API key for LLM extraction to work
|
||||
if not settings.GEMINI_API_KEY:
|
||||
print("ERROR: GEMINI_API_KEY not found in settings! Ensure it's in .env or a file.")
|
||||
return
|
||||
|
||||
result = scraper.scrape_url(url)
|
||||
|
||||
print("\n--- RESULT ---")
|
||||
print(f"Title: {result.get('title')}")
|
||||
|
||||
imp = result.get('impressum')
|
||||
if imp:
|
||||
print("\n--- IMPRESSUM DATA ---")
|
||||
print(imp)
|
||||
else:
|
||||
print("\n--- NO IMPRESSUM DATA ---")
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug_scrape()
|
||||
Reference in New Issue
Block a user