feat(company-explorer): add impressum scraping, robust json parsing, and enhanced ui polling

- Implemented Impressum scraping with Root-URL fallback and enhanced keyword detection.
- Added 'clean_json_response' helper to strip Markdown from LLM outputs, preventing JSONDecodeErrors.
- Improved numeric extraction for German formatting (thousands separators vs decimals).
- Updated Inspector UI with Polling logic for auto-refresh and display of AI Dossier and Legal Data.
- Added Manual Override for Website URL.
This commit is contained in:
2026-01-08 11:59:11 +00:00
parent e4b59b1571
commit 88c9d487be
5 changed files with 296 additions and 49 deletions

View File

@@ -38,25 +38,52 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
const [loading, setLoading] = useState(false)
const [isProcessing, setIsProcessing] = useState(false)
// Polling Logic
useEffect(() => {
let interval: NodeJS.Timeout;
if (isProcessing) {
interval = setInterval(() => {
fetchData(true) // Silent fetch
}, 2000)
}
return () => clearInterval(interval)
}, [isProcessing, companyId]) // Dependencies
// Manual Override State
const [isEditingWiki, setIsEditingWiki] = useState(false)
const [wikiUrlInput, setWikiUrlInput] = useState("")
const [isEditingWebsite, setIsEditingWebsite] = useState(false)
const [websiteInput, setWebsiteInput] = useState("")
const fetchData = () => {
const fetchData = (silent = false) => {
if (!companyId) return
setLoading(true)
if (!silent) setLoading(true)
axios.get(`${apiBase}/companies/${companyId}`)
.then(res => setData(res.data))
.then(res => {
const newData = res.data
setData(newData)
// Auto-stop processing if status changes to ENRICHED or we see data
if (isProcessing) {
const hasWiki = newData.enrichment_data?.some((e:any) => e.source_type === 'wikipedia')
const hasAnalysis = newData.enrichment_data?.some((e:any) => e.source_type === 'ai_analysis')
// If we were waiting for Discover (Wiki) or Analyze (AI)
if ((hasWiki && newData.status === 'DISCOVERED') || (hasAnalysis && newData.status === 'ENRICHED')) {
setIsProcessing(false)
}
}
})
.catch(console.error)
.finally(() => setLoading(false))
.finally(() => { if (!silent) setLoading(false) })
}
useEffect(() => {
fetchData()
setIsEditingWiki(false)
setIsEditingWebsite(false)
setIsProcessing(false) // Reset on ID change
}, [companyId])
const handleDiscover = async () => {
@@ -64,10 +91,9 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
setIsProcessing(true)
try {
await axios.post(`${apiBase}/enrich/discover`, { company_id: companyId })
setTimeout(fetchData, 3000)
// Polling effect will handle the rest
} catch (e) {
console.error(e)
} finally {
setIsProcessing(false)
}
}
@@ -77,10 +103,9 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
setIsProcessing(true)
try {
await axios.post(`${apiBase}/enrich/analyze`, { company_id: companyId })
setTimeout(fetchData, 5000)
// Polling effect will handle the rest
} catch (e) {
console.error(e)
} finally {
setIsProcessing(false)
}
}
@@ -120,6 +145,11 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
const wikiEntry = data?.enrichment_data?.find(e => e.source_type === 'wikipedia')
const wiki = wikiEntry?.content
const isLocked = wikiEntry?.is_locked
const aiAnalysis = data?.enrichment_data?.find(e => e.source_type === 'ai_analysis')?.content
const scrapeData = data?.enrichment_data?.find(e => e.source_type === 'website_scrape')?.content
const impressum = scrapeData?.impressum
return (
<div className="fixed inset-y-0 right-0 w-[550px] bg-slate-900 border-l border-slate-800 shadow-2xl transform transition-transform duration-300 ease-in-out z-40 overflow-y-auto">
@@ -135,7 +165,7 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
<h2 className="text-xl font-bold text-white leading-tight">{data.name}</h2>
<div className="flex items-center gap-2">
<button
onClick={fetchData}
onClick={() => fetchData(true)}
className="p-1.5 text-slate-500 hover:text-white transition-colors"
title="Refresh"
>
@@ -227,6 +257,59 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
</div>
<div className="p-6 space-y-8">
{/* Impressum / Legal Data (NEW) */}
{impressum && (
<div className="bg-slate-950 rounded-lg p-4 border border-slate-800 flex flex-col gap-2">
<div className="flex items-center gap-2 mb-1">
<div className="p-1 bg-slate-800 rounded text-slate-400">
<Briefcase className="h-3 w-3" />
</div>
<span className="text-[10px] uppercase font-bold text-slate-500 tracking-wider">Official Legal Data</span>
</div>
<div className="text-sm font-medium text-white">
{impressum.legal_name || "Unknown Legal Name"}
</div>
<div className="flex items-start gap-2 text-xs text-slate-400">
<MapPin className="h-3 w-3 mt-0.5 shrink-0" />
<div>
<div>{impressum.street}</div>
<div>{impressum.zip} {impressum.city}</div>
</div>
</div>
{(impressum.email || impressum.phone) && (
<div className="mt-2 pt-2 border-t border-slate-900 flex gap-4 text-[10px] text-slate-500 font-mono">
{impressum.email && <span>{impressum.email}</span>}
{impressum.phone && <span>{impressum.phone}</span>}
</div>
)}
</div>
)}
{/* AI Analysis Dossier (NEW) */}
{aiAnalysis && (
<div className="space-y-4">
<h3 className="text-sm font-semibold text-slate-400 uppercase tracking-wider flex items-center gap-2">
<Bot className="h-4 w-4" /> AI Strategic Dossier
</h3>
<div className="bg-slate-800/30 rounded-xl p-5 border border-slate-800/50 space-y-4">
<div>
<div className="text-[10px] text-blue-400 uppercase font-bold tracking-tight mb-1">Business Model</div>
<p className="text-sm text-slate-200 leading-relaxed">{aiAnalysis.business_model || "No summary available."}</p>
</div>
{aiAnalysis.infrastructure_evidence && (
<div className="pt-4 border-t border-slate-800/50">
<div className="text-[10px] text-orange-400 uppercase font-bold tracking-tight mb-1">Infrastructure Evidence</div>
<p className="text-sm text-slate-300 italic leading-relaxed">"{aiAnalysis.infrastructure_evidence}"</p>
</div>
)}
</div>
</div>
)}
{/* Wikipedia Section */}
<div className="space-y-4">
<div className="flex items-center justify-between">
@@ -309,7 +392,7 @@ export function Inspector({ companyId, onClose, apiBase }: InspectorProps) {
</div>
<div>
<div className="text-[10px] text-slate-500 uppercase font-bold tracking-tight">Revenue</div>
<div className="text-sm text-slate-200 font-medium">{wiki.umsatz ? `${wiki.umsatz} Mio. €` : 'k.A.'}</div>
<div className="text-sm text-slate-200 font-medium">{wiki.umsatz && wiki.umsatz !== 'k.A.' ? `${wiki.umsatz} Mio. €` : 'k.A.'}</div>
</div>
</div>