feat(company-explorer): add impressum scraping, robust json parsing, and enhanced ui polling
- Implemented Impressum scraping with Root-URL fallback and enhanced keyword detection. - Added 'clean_json_response' helper to strip Markdown from LLM outputs, preventing JSONDecodeErrors. - Improved numeric extraction for German formatting (thousands separators vs decimals). - Updated Inspector UI with Polling logic for auto-refresh and display of AI Dossier and Legal Data. - Added Manual Override for Website URL.
This commit is contained in:
@@ -383,6 +383,18 @@ def run_analysis_task(company_id: int, url: str):
|
|||||||
)
|
)
|
||||||
db.add(new_signal)
|
db.add(new_signal)
|
||||||
|
|
||||||
|
# Save Full Analysis Blob (Business Model + Evidence)
|
||||||
|
existing_analysis = db.query(EnrichmentData).filter(
|
||||||
|
EnrichmentData.company_id == company.id,
|
||||||
|
EnrichmentData.source_type == "ai_analysis"
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if not existing_analysis:
|
||||||
|
db.add(EnrichmentData(company_id=company.id, source_type="ai_analysis", content=analysis))
|
||||||
|
else:
|
||||||
|
existing_analysis.content = analysis
|
||||||
|
existing_analysis.updated_at = datetime.utcnow()
|
||||||
|
|
||||||
company.status = "ENRICHED"
|
company.status = "ENRICHED"
|
||||||
company.last_classification_at = datetime.utcnow()
|
company.last_classification_at = datetime.utcnow()
|
||||||
logger.info(f"Robotics analysis complete for {company.name}.")
|
logger.info(f"Robotics analysis complete for {company.name}.")
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ def extract_numeric_value(raw_value: str, is_umsatz: bool = False) -> str:
|
|||||||
"""
|
"""
|
||||||
Extracts a numeric value from a string, handling 'Mio', 'Mrd', etc.
|
Extracts a numeric value from a string, handling 'Mio', 'Mrd', etc.
|
||||||
Returns string representation of the number or 'k.A.'.
|
Returns string representation of the number or 'k.A.'.
|
||||||
|
Handles German number formatting (1.000 = 1000, 1,5 = 1.5).
|
||||||
"""
|
"""
|
||||||
if not raw_value:
|
if not raw_value:
|
||||||
return "k.A."
|
return "k.A."
|
||||||
@@ -134,25 +135,50 @@ def extract_numeric_value(raw_value: str, is_umsatz: bool = False) -> str:
|
|||||||
|
|
||||||
# Simple multiplier handling
|
# Simple multiplier handling
|
||||||
multiplier = 1.0
|
multiplier = 1.0
|
||||||
if 'mrd' in raw_value or 'billion' in raw_value:
|
if 'mrd' in raw_value or 'billion' in raw_value or 'bn' in raw_value:
|
||||||
multiplier = 1000.0 if is_umsatz else 1000000000.0
|
multiplier = 1000.0 if is_umsatz else 1000000000.0
|
||||||
elif 'mio' in raw_value or 'million' in raw_value:
|
elif 'mio' in raw_value or 'million' in raw_value or 'mn' in raw_value:
|
||||||
multiplier = 1.0 if is_umsatz else 1000000.0
|
multiplier = 1.0 if is_umsatz else 1000000.0
|
||||||
elif 'tsd' in raw_value or 'thousand' in raw_value:
|
elif 'tsd' in raw_value or 'thousand' in raw_value:
|
||||||
multiplier = 0.001 if is_umsatz else 1000.0
|
multiplier = 0.001 if is_umsatz else 1000.0
|
||||||
|
|
||||||
# Extract number
|
# Extract number candidates
|
||||||
# Matches 123,45 or 123.45
|
# Regex for "1.000,50" or "1,000.50" or "1000"
|
||||||
matches = re.findall(r'(\d+[.,]?\d*)', raw_value)
|
matches = re.findall(r'(\d+[\.,]?\d*[\.,]?\d*)', raw_value)
|
||||||
if not matches:
|
if not matches:
|
||||||
return "k.A."
|
return "k.A."
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Take the first number found
|
num_str = matches[0]
|
||||||
num_str = matches[0].replace(',', '.')
|
|
||||||
# Fix for thousands separator if like 1.000.000 -> 1000000
|
# Heuristic for German formatting (1.000,00) vs English (1,000.00)
|
||||||
|
# If it contains both, the last separator is likely the decimal
|
||||||
|
if '.' in num_str and ',' in num_str:
|
||||||
|
if num_str.rfind(',') > num_str.rfind('.'):
|
||||||
|
# German: 1.000,00 -> remove dots, replace comma with dot
|
||||||
|
num_str = num_str.replace('.', '').replace(',', '.')
|
||||||
|
else:
|
||||||
|
# English: 1,000.00 -> remove commas
|
||||||
|
num_str = num_str.replace(',', '')
|
||||||
|
elif '.' in num_str:
|
||||||
|
# Ambiguous: 1.005 could be 1005 or 1.005
|
||||||
|
# Assumption: If it's employees (integer), and looks like "1.xxx", it's likely thousands
|
||||||
|
parts = num_str.split('.')
|
||||||
|
if len(parts) > 1 and len(parts[-1]) == 3 and not is_umsatz:
|
||||||
|
# Likely thousands separator for employees (e.g. 1.005)
|
||||||
|
num_str = num_str.replace('.', '')
|
||||||
|
elif is_umsatz and len(parts) > 1 and len(parts[-1]) == 3:
|
||||||
|
# For revenue, 375.6 vs 1.000 is tricky.
|
||||||
|
# But usually revenue in millions is small numbers with decimals (250.5).
|
||||||
|
# Large integers usually mean thousands.
|
||||||
|
# Let's assume dot is decimal for revenue unless context implies otherwise,
|
||||||
|
# but for "375.6" it works. For "1.000" it becomes 1.0.
|
||||||
|
# Let's keep dot as decimal for revenue by default unless we detect multiple dots
|
||||||
if num_str.count('.') > 1:
|
if num_str.count('.') > 1:
|
||||||
num_str = num_str.replace('.', '')
|
num_str = num_str.replace('.', '')
|
||||||
|
elif ',' in num_str:
|
||||||
|
# German decimal: 1,5 -> 1.5
|
||||||
|
num_str = num_str.replace(',', '.')
|
||||||
|
|
||||||
val = float(num_str) * multiplier
|
val = float(num_str) * multiplier
|
||||||
|
|
||||||
@@ -173,6 +199,20 @@ def fuzzy_similarity(str1: str, str2: str) -> float:
|
|||||||
return 0.0
|
return 0.0
|
||||||
return fuzz.ratio(str1, str2) / 100.0
|
return fuzz.ratio(str1, str2) / 100.0
|
||||||
|
|
||||||
|
def clean_json_response(response_text: str) -> str:
|
||||||
|
"""
|
||||||
|
Cleans LLM response to ensure valid JSON.
|
||||||
|
Removes Markdown code blocks (```json ... ```).
|
||||||
|
"""
|
||||||
|
if not response_text: return "{}"
|
||||||
|
|
||||||
|
# Remove markdown code blocks
|
||||||
|
cleaned = re.sub(r'^```json\s*', '', response_text, flags=re.MULTILINE)
|
||||||
|
cleaned = re.sub(r'^```\s*', '', cleaned, flags=re.MULTILINE)
|
||||||
|
cleaned = re.sub(r'\s*```$', '', cleaned, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
return cleaned.strip()
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# 3. LLM WRAPPER (GEMINI)
|
# 3. LLM WRAPPER (GEMINI)
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from ..lib.core_utils import call_gemini
|
from ..lib.core_utils import call_gemini, clean_json_response
|
||||||
from ..config import settings
|
from ..config import settings
|
||||||
from ..database import SessionLocal, RoboticsCategory
|
from ..database import SessionLocal, RoboticsCategory
|
||||||
|
|
||||||
@@ -55,7 +55,7 @@ class ClassificationService:
|
|||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor.
|
You are a Senior B2B Market Analyst for 'Roboplanet', a specialized robotics distributor.
|
||||||
Your task is to analyze a target company based on their website text to determine their **operational need** for service robotics.
|
Your task is to analyze the target company based on their website text and create a concise **Dossier**.
|
||||||
|
|
||||||
--- TARGET COMPANY ---
|
--- TARGET COMPANY ---
|
||||||
Name: {company_name}
|
Name: {company_name}
|
||||||
@@ -66,36 +66,33 @@ class ClassificationService:
|
|||||||
You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige".
|
You MUST assign the company to exactly ONE of these industries. If unsure, choose the closest match or "Sonstige".
|
||||||
{json.dumps(self.allowed_industries, ensure_ascii=False)}
|
{json.dumps(self.allowed_industries, ensure_ascii=False)}
|
||||||
|
|
||||||
--- ANALYSIS GUIDELINES (CHAIN OF THOUGHT) ---
|
--- ANALYSIS PART 1: BUSINESS MODEL ---
|
||||||
1. **Infrastructure Analysis:** What physical assets does this company likely operate based on their business model?
|
1. Identify the core products/services.
|
||||||
- Factories / Production Plants? (-> Needs Cleaning, Security, Intralogistics)
|
2. Summarize in 2-3 German sentences: What do they do and for whom? (Target: "business_model")
|
||||||
- Large Warehouses? (-> Needs Intralogistics, Security, Floor Washing)
|
|
||||||
- Offices / Headquarters? (-> Needs Vacuuming, Window Cleaning)
|
|
||||||
- Critical Infrastructure (Solar Parks, Wind Farms)? (-> Needs Perimeter Security, Inspection)
|
|
||||||
- Hotels / Hospitals? (-> Needs Service, Cleaning, Transport)
|
|
||||||
|
|
||||||
2. **Provider vs. User Distinction (CRITICAL):**
|
--- ANALYSIS PART 2: INFRASTRUCTURE & POTENTIAL (Chain of Thought) ---
|
||||||
- If a company SELLS cleaning products (e.g., 3M, Henkel), they do NOT necessarily have a higher need for cleaning robots than any other manufacturer. Do not score them high just because the word "cleaning" appears. Score them based on their *factories*.
|
1. **Infrastructure Scan:** Look for evidence of physical assets like *Factories, Large Warehouses, Production Lines, Campuses, Hospitals*.
|
||||||
- If a company SELLS security services, they might be a potential PARTNER, but check if they *manage* sites.
|
2. **Provider vs. User Check:**
|
||||||
|
- Does the company USE this infrastructure (Potential Customer)?
|
||||||
|
- Or do they SELL products for it (Competitor/Partner)?
|
||||||
|
- *Example:* "Cleaning" -> Do they sell soap (Provider) or do they have a 50,000sqm factory (User)?
|
||||||
|
3. **Evidence Extraction:** Extract 1-2 key sentences from the text proving this infrastructure. (Target: "infrastructure_evidence")
|
||||||
|
|
||||||
3. **Scale Assessment:**
|
--- ANALYSIS PART 3: SCORING (0-100) ---
|
||||||
- 5 locations implies more need than 1.
|
Based on the identified infrastructure, score the potential for these categories:
|
||||||
- "Global player" implies large facilities.
|
|
||||||
|
|
||||||
--- SCORING CATEGORIES (0-100) ---
|
|
||||||
Based on the current strategic focus of Roboplanet:
|
|
||||||
|
|
||||||
{category_guidance}
|
{category_guidance}
|
||||||
|
|
||||||
--- OUTPUT FORMAT (JSON ONLY) ---
|
--- OUTPUT FORMAT (JSON ONLY) ---
|
||||||
{{
|
{{
|
||||||
"industry": "String (from list)",
|
"industry": "String (from list)",
|
||||||
"summary": "Concise analysis of their infrastructure and business model (German)",
|
"business_model": "2-3 sentences summary (German)",
|
||||||
|
"infrastructure_evidence": "1-2 key sentences proving physical assets (German)",
|
||||||
"potentials": {{
|
"potentials": {{
|
||||||
"cleaning": {{ "score": 0-100, "reason": "Specific reasoning based on infrastructure (e.g. 'Operates 5 production plants in DE')." }},
|
"cleaning": {{ "score": 0-100, "reason": "Reasoning based on infrastructure." }},
|
||||||
"transport": {{ "score": 0-100, "reason": "..." }},
|
"transport": {{ "score": 0-100, "reason": "Reasoning based on logistics volume." }},
|
||||||
"security": {{ "score": 0-100, "reason": "..." }},
|
"security": {{ "score": 0-100, "reason": "Reasoning based on perimeter/assets." }},
|
||||||
"service": {{ "score": 0-100, "reason": "..." }}
|
"service": {{ "score": 0-100, "reason": "Reasoning based on guest interaction." }}
|
||||||
}}
|
}}
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
@@ -106,7 +103,7 @@ class ClassificationService:
|
|||||||
json_mode=True,
|
json_mode=True,
|
||||||
temperature=0.1 # Very low temp for analytical reasoning
|
temperature=0.1 # Very low temp for analytical reasoning
|
||||||
)
|
)
|
||||||
return json.loads(response_text)
|
return json.loads(clean_json_response(response_text))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Classification failed: {e}")
|
logger.error(f"Classification failed: {e}")
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|||||||
@@ -2,9 +2,11 @@ import logging
|
|||||||
import requests
|
import requests
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from typing import Optional, Dict
|
from typing import Optional, Dict
|
||||||
from ..lib.core_utils import clean_text, retry_on_failure
|
from ..lib.core_utils import clean_text, retry_on_failure, call_gemini, clean_json_response
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -22,6 +24,7 @@ class ScraperService:
|
|||||||
def scrape_url(self, url: str) -> Dict[str, str]:
|
def scrape_url(self, url: str) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Fetches a URL and returns cleaned text content + meta info.
|
Fetches a URL and returns cleaned text content + meta info.
|
||||||
|
Also attempts to find and scrape the Impressum (Imprint).
|
||||||
"""
|
"""
|
||||||
if not url.startswith("http"):
|
if not url.startswith("http"):
|
||||||
url = "https://" + url
|
url = "https://" + url
|
||||||
@@ -38,7 +41,36 @@ class ScraperService:
|
|||||||
logger.warning(f"Skipping non-HTML content for {url}: {content_type}")
|
logger.warning(f"Skipping non-HTML content for {url}: {content_type}")
|
||||||
return {"error": "Not HTML"}
|
return {"error": "Not HTML"}
|
||||||
|
|
||||||
return self._parse_html(response.content)
|
# Parse Main Page
|
||||||
|
result = self._parse_html(response.content)
|
||||||
|
|
||||||
|
# --- IMPRESSUM LOGIC ---
|
||||||
|
soup = BeautifulSoup(response.content, 'html.parser')
|
||||||
|
impressum_url = self._find_impressum_link(soup, url)
|
||||||
|
|
||||||
|
# FALLBACK: If deep URL (e.g. /ueber-uns/) yielded no Impressum, try Root URL
|
||||||
|
if not impressum_url and url.count('/') > 3:
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
root_url = f"{parsed.scheme}://{parsed.netloc}/"
|
||||||
|
logger.info(f"No Impressum on deep URL. Checking Root: {root_url}")
|
||||||
|
|
||||||
|
root_resp = requests.get(root_url, headers=headers, timeout=10, verify=False)
|
||||||
|
if root_resp.status_code == 200:
|
||||||
|
root_soup = BeautifulSoup(root_resp.content, 'html.parser')
|
||||||
|
impressum_url = self._find_impressum_link(root_soup, root_url)
|
||||||
|
except Exception as ex:
|
||||||
|
logger.warning(f"Root URL fallback failed: {ex}")
|
||||||
|
|
||||||
|
if impressum_url:
|
||||||
|
logger.info(f"Found Impressum URL: {impressum_url}")
|
||||||
|
impressum_data = self._scrape_impressum_data(impressum_url)
|
||||||
|
result["impressum"] = impressum_data
|
||||||
|
else:
|
||||||
|
logger.info(f"No Impressum link found for {url}")
|
||||||
|
result["impressum"] = None
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
except requests.exceptions.SSLError:
|
except requests.exceptions.SSLError:
|
||||||
# Retry with HTTP if HTTPS fails
|
# Retry with HTTP if HTTPS fails
|
||||||
@@ -50,13 +82,96 @@ class ScraperService:
|
|||||||
logger.error(f"Scraping failed for {url}: {e}")
|
logger.error(f"Scraping failed for {url}: {e}")
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
def _find_impressum_link(self, soup: BeautifulSoup, base_url: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Scans all links for keywords like 'Impressum', 'Legal', 'Imprint'.
|
||||||
|
Returns the absolute URL.
|
||||||
|
"""
|
||||||
|
keywords = ["impressum", "imprint", "legal notice", "anbieterkennzeichnung", "rechtliches", "legal", "disclaimer"]
|
||||||
|
|
||||||
|
# Candidate tracking
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
for a in soup.find_all('a', href=True):
|
||||||
|
text = clean_text(a.get_text()).lower()
|
||||||
|
href = a['href'].lower()
|
||||||
|
|
||||||
|
# Debug log for potential candidates (verbose)
|
||||||
|
# if "imp" in text or "imp" in href:
|
||||||
|
# logger.debug(f"Checking link: '{text}' -> {href}")
|
||||||
|
|
||||||
|
# Check text content or href keywords
|
||||||
|
if any(kw in text for kw in keywords) or any(kw in href for kw in keywords):
|
||||||
|
# Avoid mailto links or purely social links if possible
|
||||||
|
if "mailto:" in href or "tel:" in href or "javascript:" in href:
|
||||||
|
continue
|
||||||
|
|
||||||
|
full_url = urljoin(base_url, a['href'])
|
||||||
|
|
||||||
|
# Prioritize 'impressum' in text over href
|
||||||
|
score = 0
|
||||||
|
if "impressum" in text: score += 10
|
||||||
|
if "impressum" in href: score += 5
|
||||||
|
|
||||||
|
candidates.append((score, full_url))
|
||||||
|
|
||||||
|
if candidates:
|
||||||
|
# Sort by score desc
|
||||||
|
candidates.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
best_match = candidates[0][1]
|
||||||
|
logger.info(f"Impressum Link Selection: Found {len(candidates)} candidates. Winner: {best_match}")
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _scrape_impressum_data(self, url: str) -> Dict[str, str]:
|
||||||
|
"""
|
||||||
|
Fetches the Impressum page and uses LLM to extract structured data.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
headers = {'User-Agent': random.choice(USER_AGENTS)}
|
||||||
|
response = requests.get(url, headers=headers, timeout=self.timeout, verify=False)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.content, 'html.parser')
|
||||||
|
# Aggressive cleaning for Impressum too
|
||||||
|
for element in soup(['script', 'style', 'noscript', 'iframe', 'svg', 'header', 'footer', 'nav']):
|
||||||
|
element.decompose()
|
||||||
|
|
||||||
|
raw_text = soup.get_text(separator=' ', strip=True)[:10000] # Limit context
|
||||||
|
|
||||||
|
# LLM Extraction
|
||||||
|
prompt = f"""
|
||||||
|
Extract the official company details from this German 'Impressum' text.
|
||||||
|
Return JSON ONLY. Keys: 'legal_name', 'street', 'zip', 'city', 'email', 'phone', 'ceo_name'.
|
||||||
|
If a field is missing, use null.
|
||||||
|
|
||||||
|
Text:
|
||||||
|
{raw_text}
|
||||||
|
"""
|
||||||
|
|
||||||
|
response_text = call_gemini(prompt, json_mode=True, temperature=0.1)
|
||||||
|
return json.loads(clean_json_response(response_text))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Impressum scrape failed for {url}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _parse_html(self, html_content: bytes) -> Dict[str, str]:
|
def _parse_html(self, html_content: bytes) -> Dict[str, str]:
|
||||||
soup = BeautifulSoup(html_content, 'html.parser')
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
|
||||||
# 1. Cleanup Junk
|
# 1. Cleanup Junk (Aggressive, matching legacy logic)
|
||||||
for element in soup(['script', 'style', 'noscript', 'iframe', 'svg', 'header', 'footer', 'nav', 'aside', 'form', 'button']):
|
# Removed 'a' tags to prevent menu links from polluting the text analysis
|
||||||
|
for element in soup(['script', 'style', 'noscript', 'iframe', 'svg', 'header', 'footer', 'nav', 'aside', 'form', 'button', 'a']):
|
||||||
element.decompose()
|
element.decompose()
|
||||||
|
|
||||||
|
# 1b. Remove common Cookie Banners / Popups by class/id heuristics
|
||||||
|
for div in soup.find_all("div"):
|
||||||
|
classes = str(div.get("class", "")).lower()
|
||||||
|
ids = str(div.get("id", "")).lower()
|
||||||
|
if any(x in classes or x in ids for x in ["cookie", "consent", "banner", "popup", "modal", "disclaimer"]):
|
||||||
|
div.decompose()
|
||||||
|
|
||||||
# 2. Extract Title & Meta Description
|
# 2. Extract Title & Meta Description
|
||||||
title = soup.title.string if soup.title else ""
|
title = soup.title.string if soup.title else ""
|
||||||
meta_desc = ""
|
meta_desc = ""
|
||||||
|
|||||||
@@ -38,25 +38,52 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
|
|||||||
const [loading, setLoading] = useState(false)
|
const [loading, setLoading] = useState(false)
|
||||||
const [isProcessing, setIsProcessing] = useState(false)
|
const [isProcessing, setIsProcessing] = useState(false)
|
||||||
|
|
||||||
|
// Polling Logic
|
||||||
|
useEffect(() => {
|
||||||
|
let interval: NodeJS.Timeout;
|
||||||
|
if (isProcessing) {
|
||||||
|
interval = setInterval(() => {
|
||||||
|
fetchData(true) // Silent fetch
|
||||||
|
}, 2000)
|
||||||
|
}
|
||||||
|
return () => clearInterval(interval)
|
||||||
|
}, [isProcessing, companyId]) // Dependencies
|
||||||
|
|
||||||
// Manual Override State
|
// Manual Override State
|
||||||
const [isEditingWiki, setIsEditingWiki] = useState(false)
|
const [isEditingWiki, setIsEditingWiki] = useState(false)
|
||||||
const [wikiUrlInput, setWikiUrlInput] = useState("")
|
const [wikiUrlInput, setWikiUrlInput] = useState("")
|
||||||
const [isEditingWebsite, setIsEditingWebsite] = useState(false)
|
const [isEditingWebsite, setIsEditingWebsite] = useState(false)
|
||||||
const [websiteInput, setWebsiteInput] = useState("")
|
const [websiteInput, setWebsiteInput] = useState("")
|
||||||
|
|
||||||
const fetchData = () => {
|
const fetchData = (silent = false) => {
|
||||||
if (!companyId) return
|
if (!companyId) return
|
||||||
setLoading(true)
|
if (!silent) setLoading(true)
|
||||||
|
|
||||||
axios.get(`${apiBase}/companies/${companyId}`)
|
axios.get(`${apiBase}/companies/${companyId}`)
|
||||||
.then(res => setData(res.data))
|
.then(res => {
|
||||||
|
const newData = res.data
|
||||||
|
setData(newData)
|
||||||
|
|
||||||
|
// Auto-stop processing if status changes to ENRICHED or we see data
|
||||||
|
if (isProcessing) {
|
||||||
|
const hasWiki = newData.enrichment_data?.some((e:any) => e.source_type === 'wikipedia')
|
||||||
|
const hasAnalysis = newData.enrichment_data?.some((e:any) => e.source_type === 'ai_analysis')
|
||||||
|
|
||||||
|
// If we were waiting for Discover (Wiki) or Analyze (AI)
|
||||||
|
if ((hasWiki && newData.status === 'DISCOVERED') || (hasAnalysis && newData.status === 'ENRICHED')) {
|
||||||
|
setIsProcessing(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
.catch(console.error)
|
.catch(console.error)
|
||||||
.finally(() => setLoading(false))
|
.finally(() => { if (!silent) setLoading(false) })
|
||||||
}
|
}
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetchData()
|
fetchData()
|
||||||
setIsEditingWiki(false)
|
setIsEditingWiki(false)
|
||||||
setIsEditingWebsite(false)
|
setIsEditingWebsite(false)
|
||||||
|
setIsProcessing(false) // Reset on ID change
|
||||||
}, [companyId])
|
}, [companyId])
|
||||||
|
|
||||||
const handleDiscover = async () => {
|
const handleDiscover = async () => {
|
||||||
@@ -64,10 +91,9 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
|
|||||||
setIsProcessing(true)
|
setIsProcessing(true)
|
||||||
try {
|
try {
|
||||||
await axios.post(`${apiBase}/enrich/discover`, { company_id: companyId })
|
await axios.post(`${apiBase}/enrich/discover`, { company_id: companyId })
|
||||||
setTimeout(fetchData, 3000)
|
// Polling effect will handle the rest
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(e)
|
console.error(e)
|
||||||
} finally {
|
|
||||||
setIsProcessing(false)
|
setIsProcessing(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -77,10 +103,9 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
|
|||||||
setIsProcessing(true)
|
setIsProcessing(true)
|
||||||
try {
|
try {
|
||||||
await axios.post(`${apiBase}/enrich/analyze`, { company_id: companyId })
|
await axios.post(`${apiBase}/enrich/analyze`, { company_id: companyId })
|
||||||
setTimeout(fetchData, 5000)
|
// Polling effect will handle the rest
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(e)
|
console.error(e)
|
||||||
} finally {
|
|
||||||
setIsProcessing(false)
|
setIsProcessing(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -121,6 +146,11 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
|
|||||||
const wiki = wikiEntry?.content
|
const wiki = wikiEntry?.content
|
||||||
const isLocked = wikiEntry?.is_locked
|
const isLocked = wikiEntry?.is_locked
|
||||||
|
|
||||||
|
const aiAnalysis = data?.enrichment_data?.find(e => e.source_type === 'ai_analysis')?.content
|
||||||
|
|
||||||
|
const scrapeData = data?.enrichment_data?.find(e => e.source_type === 'website_scrape')?.content
|
||||||
|
const impressum = scrapeData?.impressum
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="fixed inset-y-0 right-0 w-[550px] bg-slate-900 border-l border-slate-800 shadow-2xl transform transition-transform duration-300 ease-in-out z-40 overflow-y-auto">
|
<div className="fixed inset-y-0 right-0 w-[550px] bg-slate-900 border-l border-slate-800 shadow-2xl transform transition-transform duration-300 ease-in-out z-40 overflow-y-auto">
|
||||||
{loading ? (
|
{loading ? (
|
||||||
@@ -135,7 +165,7 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
|
|||||||
<h2 className="text-xl font-bold text-white leading-tight">{data.name}</h2>
|
<h2 className="text-xl font-bold text-white leading-tight">{data.name}</h2>
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
<button
|
<button
|
||||||
onClick={fetchData}
|
onClick={() => fetchData(true)}
|
||||||
className="p-1.5 text-slate-500 hover:text-white transition-colors"
|
className="p-1.5 text-slate-500 hover:text-white transition-colors"
|
||||||
title="Refresh"
|
title="Refresh"
|
||||||
>
|
>
|
||||||
@@ -227,6 +257,59 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="p-6 space-y-8">
|
<div className="p-6 space-y-8">
|
||||||
|
|
||||||
|
{/* Impressum / Legal Data (NEW) */}
|
||||||
|
{impressum && (
|
||||||
|
<div className="bg-slate-950 rounded-lg p-4 border border-slate-800 flex flex-col gap-2">
|
||||||
|
<div className="flex items-center gap-2 mb-1">
|
||||||
|
<div className="p-1 bg-slate-800 rounded text-slate-400">
|
||||||
|
<Briefcase className="h-3 w-3" />
|
||||||
|
</div>
|
||||||
|
<span className="text-[10px] uppercase font-bold text-slate-500 tracking-wider">Official Legal Data</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="text-sm font-medium text-white">
|
||||||
|
{impressum.legal_name || "Unknown Legal Name"}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex items-start gap-2 text-xs text-slate-400">
|
||||||
|
<MapPin className="h-3 w-3 mt-0.5 shrink-0" />
|
||||||
|
<div>
|
||||||
|
<div>{impressum.street}</div>
|
||||||
|
<div>{impressum.zip} {impressum.city}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{(impressum.email || impressum.phone) && (
|
||||||
|
<div className="mt-2 pt-2 border-t border-slate-900 flex gap-4 text-[10px] text-slate-500 font-mono">
|
||||||
|
{impressum.email && <span>{impressum.email}</span>}
|
||||||
|
{impressum.phone && <span>{impressum.phone}</span>}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* AI Analysis Dossier (NEW) */}
|
||||||
|
{aiAnalysis && (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h3 className="text-sm font-semibold text-slate-400 uppercase tracking-wider flex items-center gap-2">
|
||||||
|
<Bot className="h-4 w-4" /> AI Strategic Dossier
|
||||||
|
</h3>
|
||||||
|
<div className="bg-slate-800/30 rounded-xl p-5 border border-slate-800/50 space-y-4">
|
||||||
|
<div>
|
||||||
|
<div className="text-[10px] text-blue-400 uppercase font-bold tracking-tight mb-1">Business Model</div>
|
||||||
|
<p className="text-sm text-slate-200 leading-relaxed">{aiAnalysis.business_model || "No summary available."}</p>
|
||||||
|
</div>
|
||||||
|
{aiAnalysis.infrastructure_evidence && (
|
||||||
|
<div className="pt-4 border-t border-slate-800/50">
|
||||||
|
<div className="text-[10px] text-orange-400 uppercase font-bold tracking-tight mb-1">Infrastructure Evidence</div>
|
||||||
|
<p className="text-sm text-slate-300 italic leading-relaxed">"{aiAnalysis.infrastructure_evidence}"</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Wikipedia Section */}
|
{/* Wikipedia Section */}
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
@@ -309,7 +392,7 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
|
|||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<div className="text-[10px] text-slate-500 uppercase font-bold tracking-tight">Revenue</div>
|
<div className="text-[10px] text-slate-500 uppercase font-bold tracking-tight">Revenue</div>
|
||||||
<div className="text-sm text-slate-200 font-medium">{wiki.umsatz ? `${wiki.umsatz} Mio. €` : 'k.A.'}</div>
|
<div className="text-sm text-slate-200 font-medium">{wiki.umsatz && wiki.umsatz !== 'k.A.' ? `${wiki.umsatz} Mio. €` : 'k.A.'}</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user