fix(company-explorer): handle inconsistent LLM list responses in scraper

- Added logic to automatically flatten list-wrapped JSON responses from LLM in Impressum extraction. - Fixed 'Unknown Legal Name' issue by ensuring property access on objects, not lists. - Finalized v0.3.0 features and updated documentation with Lessons Learned.
2026-01-08 13:53:11 +00:00
parent 2281e69763
commit d603fa76c1
2 changed files with 19 additions and 11 deletions
--- a/company-explorer/backend/services/scraping.py
+++ b/company-explorer/backend/services/scraping.py
@@ -169,7 +169,15 @@ class ScraperService:
            
            response_text = call_gemini(prompt, json_mode=True, temperature=0.1)
            logger.debug(f"Impressum LLM raw response ({len(response_text)} chars): {response_text[:500]}...")
-            return json.loads(clean_json_response(response_text))
+            
+            result = json.loads(clean_json_response(response_text))
+            
+            # --- FIX: Handle List vs Dict ---
+            # If LLM returns a list like [{...}], take the first element
+            if isinstance(result, list) and len(result) > 0:
+                result = result[0]
+            
+            return result
            
        except Exception as e:
            logger.error(f"Impressum scrape failed for {url}: {e}", exc_info=True) # Log full traceback