fix(company-explorer): enhance impressum scraping debug logging
- Increased logging verbosity in to track raw input to LLM and raw LLM response. - This helps diagnose why Impressum data extraction might be failing for specific company websites.
This commit is contained in:
@@ -155,6 +155,8 @@ class ScraperService:
|
||||
|
||||
raw_text = soup.get_text(separator=' ', strip=True)[:10000] # Limit context
|
||||
|
||||
logger.debug(f"Impressum raw text sent to LLM ({len(raw_text)} chars): {raw_text[:500]}...")
|
||||
|
||||
# LLM Extraction
|
||||
prompt = f"""
|
||||
Extract the official company details from this German 'Impressum' text.
|
||||
@@ -166,10 +168,11 @@ class ScraperService:
|
||||
"""
|
||||
|
||||
response_text = call_gemini(prompt, json_mode=True, temperature=0.1)
|
||||
logger.debug(f"Impressum LLM raw response ({len(response_text)} chars): {response_text[:500]}...")
|
||||
return json.loads(clean_json_response(response_text))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Impressum scrape failed for {url}: {e}")
|
||||
logger.error(f"Impressum scrape failed for {url}: {e}", exc_info=True) # Log full traceback
|
||||
return None
|
||||
|
||||
def _parse_html(self, html_content: bytes) -> Dict[str, str]:
|
||||
|
||||
Reference in New Issue
Block a user