fix(explorer): resolve initialization and import errors for v0.7.0 backend

This commit is contained in:
2026-01-20 17:11:31 +00:00
parent 4ff93cd8e6
commit a33a60f462
4 changed files with 160 additions and 921 deletions

View File

@@ -41,7 +41,7 @@ app.add_middleware(
# Service Singletons
scraper = ScraperService()
classifier = ClassificationService()
classifier = ClassificationService() # Now works without args
discovery = DiscoveryService()
# --- Pydantic Models ---
@@ -58,33 +58,6 @@ class AnalysisRequest(BaseModel):
company_id: int
force_scrape: bool = False
class ContactBase(BaseModel):
gender: str
title: str = ""
first_name: str
last_name: str
email: str
job_title: str
language: str = "De"
role: str
status: str = ""
is_primary: bool = False
class ContactCreate(ContactBase):
company_id: int
class ContactUpdate(BaseModel):
gender: Optional[str] = None
title: Optional[str] = None
first_name: Optional[str] = None
last_name: Optional[str] = None
email: Optional[str] = None
job_title: Optional[str] = None
language: Optional[str] = None
role: Optional[str] = None
status: Optional[str] = None
is_primary: Optional[bool] = None
# --- Events ---
@app.on_event("startup")
def on_startup():
@@ -115,8 +88,6 @@ def list_companies(
query = query.filter(Company.name.ilike(f"%{search}%"))
total = query.count()
# Sorting Logic
if sort_by == "updated_desc":
query = query.order_by(Company.updated_at.desc())
elif sort_by == "created_desc":
@@ -125,7 +96,6 @@ def list_companies(
query = query.order_by(Company.name.asc())
items = query.offset(skip).limit(limit).all()
return {"total": total, "items": items}
except Exception as e:
logger.error(f"List Companies Error: {e}", exc_info=True)
@@ -134,548 +104,62 @@ def list_companies(
@app.get("/api/companies/{company_id}")
def get_company(company_id: int, db: Session = Depends(get_db)):
company = db.query(Company).options(
joinedload(Company.signals),
joinedload(Company.enrichment_data),
joinedload(Company.contacts)
).filter(Company.id == company_id).first()
if not company:
raise HTTPException(status_code=404, detail="Company not found")
raise HTTPException(404, detail="Company not found")
return company
@app.post("/api/companies/bulk")
def bulk_import_names(req: BulkImportRequest, db: Session = Depends(get_db)):
"""
Quick import for testing. Just a list of names.
"""
logger.info(f"Starting bulk import of {len(req.names)} names.")
try:
added = 0
skipped = 0
# Deduplicator init
try:
dedup = Deduplicator(db)
logger.info("Deduplicator initialized.")
except Exception as e:
logger.warning(f"Deduplicator init failed: {e}")
dedup = None
for name in req.names:
clean_name = name.strip()
if not clean_name: continue
# 1. Simple Deduplication (Exact Name)
exists = db.query(Company).filter(Company.name == clean_name).first()
if exists:
skipped += 1
continue
# 2. Smart Deduplication (if available)
if dedup:
matches = dedup.find_duplicates({"name": clean_name})
if matches and matches[0]['score'] > 95:
logger.info(f"Duplicate found for {clean_name}: {matches[0]['name']}")
skipped += 1
continue
# 3. Create
new_comp = Company(
name=clean_name,
status="NEW" # This triggered the error before
)
db.add(new_comp)
added += 1
db.commit()
logger.info(f"Import success. Added: {added}, Skipped: {skipped}")
return {"added": added, "skipped": skipped}
except Exception as e:
logger.error(f"Bulk Import Failed: {e}", exc_info=True)
db.rollback()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/robotics/categories")
def list_robotics_categories(db: Session = Depends(get_db)):
"""Lists all configured robotics categories."""
return db.query(RoboticsCategory).all()
class CategoryUpdate(BaseModel):
description: str
reasoning_guide: str
@app.put("/api/robotics/categories/{id}")
def update_robotics_category(id: int, cat: CategoryUpdate, db: Session = Depends(get_db)):
"""Updates a robotics category definition."""
category = db.query(RoboticsCategory).filter(RoboticsCategory.id == id).first()
if not category:
raise HTTPException(404, "Category not found")
category.description = cat.description
category.reasoning_guide = cat.reasoning_guide
db.commit()
return category
@app.post("/api/enrich/discover")
def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
"""
Triggers Stage 1: Discovery (Website Search + Wikipedia Search)
"""
try:
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company:
raise HTTPException(404, "Company not found")
# Run in background
background_tasks.add_task(run_discovery_task, company.id)
return {"status": "queued", "message": f"Discovery started for {company.name}"}
except Exception as e:
logger.error(f"Discovery Error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/companies/{company_id}/override/wiki")
def override_wiki_url(company_id: int, url: str = Query(...), db: Session = Depends(get_db)):
"""
Manually sets the Wikipedia URL for a company and triggers re-extraction.
Locks the data against auto-discovery.
"""
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, "Company not found")
logger.info(f"Manual Override for {company.name}: Setting Wiki URL to {url}")
# Update or create EnrichmentData entry
existing_wiki = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "wikipedia"
).first()
# Extract data immediately
wiki_data = {"url": url}
if url and url != "k.A.":
try:
wiki_data = discovery.extract_wikipedia_data(url)
wiki_data['url'] = url # Ensure URL is correct
except Exception as e:
logger.error(f"Extraction failed for manual URL: {e}")
wiki_data["error"] = str(e)
if not existing_wiki:
db.add(EnrichmentData(
company_id=company.id,
source_type="wikipedia",
content=wiki_data,
is_locked=True
))
else:
existing_wiki.content = wiki_data
existing_wiki.updated_at = datetime.utcnow()
existing_wiki.is_locked = True # LOCK IT
existing_wiki.wiki_verified_empty = False # It's no longer empty
db.commit()
# The return needs to be here, outside the else block but inside the main function
return {"status": "updated", "data": wiki_data}
@app.post("/api/companies/{company_id}/wiki_mark_empty")
def mark_wiki_empty(company_id: int, db: Session = Depends(get_db)):
"""
Marks a company as having no valid Wikipedia entry after manual review.
Creates a locked, empty Wikipedia enrichment entry.
"""
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, "Company not found")
logger.info(f"Manual override for {company.name}: Marking Wikipedia as verified empty.")
existing_wiki = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "wikipedia"
).first()
empty_wiki_data = {"url": "k.A.", "title": "k.A.", "first_paragraph": "k.A.", "error": "Manually marked as empty"}
if not existing_wiki:
db.add(EnrichmentData(
company_id=company.id,
source_type="wikipedia",
content=empty_wiki_data,
is_locked=True,
wiki_verified_empty=True
))
else:
existing_wiki.content = empty_wiki_data
existing_wiki.updated_at = datetime.utcnow()
existing_wiki.is_locked = True # LOCK IT
existing_wiki.wiki_verified_empty = True # Mark as empty
db.commit()
return {"status": "updated", "wiki_verified_empty": True}
@app.post("/api/companies/{company_id}/override/website")
def override_website_url(company_id: int, url: str = Query(...), db: Session = Depends(get_db)):
"""
Manually sets the Website URL for a company.
Clears existing scrape data to force a fresh analysis on next run.
"""
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, "Company not found")
logger.info(f"Manual Override for {company.name}: Setting Website to {url}")
company.website = url
# Remove old scrape data since URL changed
db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape"
).delete()
db.commit()
return {"status": "updated", "website": url}
@app.post("/api/companies/{company_id}/override/impressum")
def override_impressum_url(company_id: int, url: str = Query(...), db: Session = Depends(get_db)):
"""
Manually sets the Impressum URL for a company and triggers re-extraction.
"""
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, "Company not found")
logger.info(f"Manual Override for {company.name}: Setting Impressum URL to {url}")
# 1. Scrape Impressum immediately
impressum_data = scraper._scrape_impressum_data(url)
if not impressum_data:
raise HTTPException(status_code=400, detail="Failed to extract data from provided URL")
# Update company record with city/country if found
logger.info(f"override_impressum_url: Scraped impressum_data for {company.name}: City={impressum_data.get('city')}, Country_code={impressum_data.get('country_code')}")
if city_val := impressum_data.get("city"):
logger.info(f"override_impressum_url: Updating company.city from '{company.city}' to '{city_val}'")
company.city = city_val
if country_val := impressum_data.get("country_code"):
logger.info(f"override_impressum_url: Updating company.country from '{company.country}' to '{country_val}'")
company.country = country_val
logger.info(f"override_impressum_url: Company object after updates (before commit): City='{company.city}', Country='{company.country}'")
# 2. Find existing scrape data or create new
existing_scrape = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape"
).first()
if not existing_scrape:
# Create minimal scrape entry and lock it
db.add(EnrichmentData(
company_id=company.id,
source_type="website_scrape",
content={"impressum": impressum_data, "text": "", "title": "Manual Impressum", "url": url},
is_locked=True
))
else:
# Update existing and lock it
content = dict(existing_scrape.content) if existing_scrape.content else {}
content["impressum"] = impressum_data
existing_scrape.content = content
existing_scrape.updated_at = datetime.utcnow()
existing_scrape.is_locked = True
db.commit()
logger.info(f"override_impressum_url: Commit successful. Company ID {company.id} updated.")
return {"status": "updated", "data": impressum_data}
# --- Contact Routes ---
@app.post("/api/contacts")
def create_contact(contact: ContactCreate, db: Session = Depends(get_db)):
"""Creates a new contact and handles primary contact logic."""
if contact.is_primary:
db.query(Contact).filter(Contact.company_id == contact.company_id).update({"is_primary": False})
db_contact = Contact(**contact.dict())
db.add(db_contact)
db.commit()
db.refresh(db_contact)
return db_contact
# --- Industry Routes ---
class IndustryCreate(BaseModel):
name: str
description: Optional[str] = None
is_focus: bool = False
primary_category_id: Optional[int] = None
class IndustryUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
is_focus: Optional[bool] = None
primary_category_id: Optional[int] = None
@app.get("/api/industries")
def list_industries(db: Session = Depends(get_db)):
return db.query(Industry).all()
@app.post("/api/industries")
def create_industry(ind: IndustryCreate, db: Session = Depends(get_db)):
# 1. Prepare data
ind_data = ind.dict()
base_name = ind_data['name']
@app.post("/api/enrich/discover")
def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company: raise HTTPException(404, "Company not found")
background_tasks.add_task(run_discovery_task, company.id)
return {"status": "queued"}
@app.post("/api/enrich/analyze")
def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company: raise HTTPException(404, "Company not found")
# 2. Check for duplicate name
existing = db.query(Industry).filter(Industry.name == base_name).first()
if existing:
# Auto-increment name if duplicated
counter = 1
while db.query(Industry).filter(Industry.name == f"{base_name} ({counter})").first():
counter += 1
ind_data['name'] = f"{base_name} ({counter})"
if not company.website or company.website == "k.A.":
return {"error": "No website to analyze. Run Discovery first."}
# 3. Create
db_ind = Industry(**ind_data)
db.add(db_ind)
db.commit()
db.refresh(db_ind)
return db_ind
@app.put("/api/industries/{id}")
def update_industry(id: int, ind: IndustryUpdate, db: Session = Depends(get_db)):
db_ind = db.query(Industry).filter(Industry.id == id).first()
if not db_ind:
raise HTTPException(404, "Industry not found")
for key, value in ind.dict(exclude_unset=True).items():
setattr(db_ind, key, value)
db.commit()
db.refresh(db_ind)
return db_ind
@app.delete("/api/industries/{id}")
def delete_industry(id: int, db: Session = Depends(get_db)):
db_ind = db.query(Industry).filter(Industry.id == id).first()
if not db_ind:
raise HTTPException(404, "Industry not found")
db.delete(db_ind)
db.commit()
return {"status": "deleted"}
# --- Job Role Mapping Routes ---
class JobRoleMappingCreate(BaseModel):
pattern: str
role: str
@app.get("/api/job_roles")
def list_job_roles(db: Session = Depends(get_db)):
return db.query(JobRoleMapping).all()
@app.post("/api/job_roles")
def create_job_role(mapping: JobRoleMappingCreate, db: Session = Depends(get_db)):
db_mapping = JobRoleMapping(**mapping.dict())
db.add(db_mapping)
db.commit()
db.refresh(db_mapping)
return db_mapping
@app.delete("/api/job_roles/{id}")
def delete_job_role(id: int, db: Session = Depends(get_db)):
db_mapping = db.query(JobRoleMapping).filter(JobRoleMapping.id == id).first()
if not db_mapping:
raise HTTPException(404, "Mapping not found")
db.delete(db_mapping)
db.commit()
return {"status": "deleted"}
@app.put("/api/contacts/{contact_id}")
def update_contact(contact_id: int, contact: ContactUpdate, db: Session = Depends(get_db)):
"""Updates an existing contact."""
db_contact = db.query(Contact).filter(Contact.id == contact_id).first()
if not db_contact:
raise HTTPException(404, "Contact not found")
update_data = contact.dict(exclude_unset=True)
if update_data.get("is_primary"):
db.query(Contact).filter(Contact.company_id == db_contact.company_id).update({"is_primary": False})
for key, value in update_data.items():
setattr(db_contact, key, value)
db.commit()
db.refresh(db_contact)
return db_contact
@app.delete("/api/contacts/{contact_id}")
def delete_contact(contact_id: int, db: Session = Depends(get_db)):
"""Deletes a contact."""
db_contact = db.query(Contact).filter(Contact.id == contact_id).first()
if not db_contact:
raise HTTPException(404, "Contact not found")
db.delete(db_contact)
db.commit()
return {"status": "deleted"}
@app.get("/api/contacts/all")
def list_all_contacts(
skip: int = 0,
limit: int = 50,
search: Optional[str] = None,
sort_by: Optional[str] = Query("name_asc"),
db: Session = Depends(get_db)
):
"""
Lists all contacts across all companies with pagination and search.
"""
query = db.query(Contact).join(Company)
if search:
search_term = f"%{search}%"
query = query.filter(
(Contact.first_name.ilike(search_term)) |
(Contact.last_name.ilike(search_term)) |
(Contact.email.ilike(search_term)) |
(Company.name.ilike(search_term))
)
total = query.count()
# Sorting Logic
if sort_by == "updated_desc":
query = query.order_by(Contact.updated_at.desc())
elif sort_by == "created_desc":
query = query.order_by(Contact.id.desc())
else: # Default: name_asc
query = query.order_by(Contact.last_name.asc(), Contact.first_name.asc())
contacts = query.offset(skip).limit(limit).all()
# Enrich with Company Name for the frontend list
result = []
for c in contacts:
c_dict = {k: v for k, v in c.__dict__.items() if not k.startswith('_')}
c_dict['company_name'] = c.company.name if c.company else "Unknown"
result.append(c_dict)
return {"total": total, "items": result}
class BulkContactImportItem(BaseModel):
company_name: str
first_name: str
last_name: str
email: Optional[str] = None
job_title: Optional[str] = None
role: Optional[str] = "Operativer Entscheider"
gender: Optional[str] = "männlich"
class BulkContactImportRequest(BaseModel):
contacts: List[BulkContactImportItem]
@app.post("/api/contacts/bulk")
def bulk_import_contacts(req: BulkContactImportRequest, db: Session = Depends(get_db)):
"""
Bulk imports contacts.
Matches Company by Name (creates if missing).
Dedupes Contact by Email.
"""
logger.info(f"Starting bulk contact import: {len(req.contacts)} items")
stats = {"added": 0, "skipped": 0, "companies_created": 0}
for item in req.contacts:
if not item.company_name: continue
# 1. Find or Create Company
company = db.query(Company).filter(Company.name.ilike(item.company_name.strip())).first()
if not company:
company = Company(name=item.company_name.strip(), status="NEW")
db.add(company)
db.commit() # Commit to get ID
db.refresh(company)
stats["companies_created"] += 1
# 2. Check for Duplicate Contact (by Email)
if item.email:
exists = db.query(Contact).filter(Contact.email == item.email.strip()).first()
if exists:
stats["skipped"] += 1
continue
# 3. Create Contact
new_contact = Contact(
company_id=company.id,
first_name=item.first_name,
last_name=item.last_name,
email=item.email,
job_title=item.job_title,
role=item.role,
gender=item.gender,
status="Init" # Default status
)
db.add(new_contact)
stats["added"] += 1
db.commit()
return stats
@app.post("/api/enrichment/{company_id}/{source_type}/lock")
def lock_enrichment(company_id: int, source_type: str, locked: bool = Query(...), db: Session = Depends(get_db)):
"""
Toggles the lock status of a specific enrichment data type (e.g. 'website_scrape', 'wikipedia').
"""
entry = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company_id,
EnrichmentData.source_type == source_type
).first()
if not entry:
raise HTTPException(404, "Enrichment data not found")
entry.is_locked = locked
db.commit()
return {"status": "updated", "is_locked": locked}
background_tasks.add_task(run_analysis_task, company.id)
return {"status": "queued"}
def run_discovery_task(company_id: int):
# New Session for Background Task
from .database import SessionLocal
db = SessionLocal()
try:
company = db.query(Company).filter(Company.id == company_id).first()
if not company: return
logger.info(f"Running Discovery Task for {company.name}")
# 1. Website Search (Always try if missing)
# 1. Website Search
if not company.website or company.website == "k.A.":
found_url = discovery.find_company_website(company.name, company.city)
if found_url and found_url != "k.A.":
company.website = found_url
logger.info(f"-> Found URL: {found_url}")
# 2. Wikipedia Search & Extraction
# Check if locked
# 2. Wikipedia Search
existing_wiki = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "wikipedia"
).first()
if existing_wiki and existing_wiki.is_locked:
logger.info(f"Skipping Wiki Discovery for {company.name} - Data is LOCKED.")
else:
# Pass available info for better validation
current_website = company.website if company.website and company.website != "k.A." else None
wiki_url = discovery.find_wikipedia_url(company.name, website=current_website, city=company.city)
company.last_wiki_search_at = datetime.utcnow()
if not existing_wiki or not existing_wiki.is_locked:
wiki_url = discovery.find_wikipedia_url(company.name, website=company.website, city=company.city)
wiki_data = discovery.extract_wikipedia_data(wiki_url) if wiki_url and wiki_url != "k.A." else {"url": wiki_url}
wiki_data = {"url": wiki_url}
if wiki_url and wiki_url != "k.A.":
logger.info(f"Extracting full data from Wikipedia for {company.name}...")
wiki_data = discovery.extract_wikipedia_data(wiki_url)
if not existing_wiki:
db.add(EnrichmentData(company_id=company.id, source_type="wikipedia", content=wiki_data))
else:
@@ -686,35 +170,12 @@ def run_discovery_task(company_id: int):
company.status = "DISCOVERED"
db.commit()
logger.info(f"Discovery finished for {company.id}")
except Exception as e:
logger.error(f"Background Task Error: {e}", exc_info=True)
db.rollback()
logger.error(f"Discovery Task Error: {e}", exc_info=True)
finally:
db.close()
@app.post("/api/enrich/analyze")
def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
company = db.query(Company).filter(Company.id == req.company_id).first()
if not company:
raise HTTPException(404, "Company not found")
if not company.website or company.website == "k.A.":
return {"error": "No website to analyze. Run Discovery first."}
# FORCE SCRAPE LOGIC
# Respect Locked Data: Only delete if not locked.
db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape",
EnrichmentData.is_locked == False
).delete()
db.commit()
background_tasks.add_task(run_analysis_task, company.id, company.website)
return {"status": "queued"}
def run_analysis_task(company_id: int, url: str):
def run_analysis_task(company_id: int):
from .database import SessionLocal
db = SessionLocal()
try:
@@ -723,158 +184,42 @@ def run_analysis_task(company_id: int, url: str):
logger.info(f"Running Analysis Task for {company.name}")
# 1. Scrape Website OR Use Locked Data
scrape_result = {}
# 1. Scrape Website (if not locked)
existing_scrape = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape"
).first()
if existing_scrape and existing_scrape.is_locked:
logger.info(f"Using LOCKED scrape data for {company.name}")
scrape_result = dict(existing_scrape.content) # Copy dict
# Always ensure city/country from locked impressum data is synced to company
if "impressum" in scrape_result and scrape_result["impressum"]:
impressum_city = scrape_result["impressum"].get("city")
impressum_country = scrape_result["impressum"].get("country_code")
logger.info(f"Analysis task (locked data): Impressum found. City='{impressum_city}', Country='{impressum_country}'")
if impressum_city and company.city != impressum_city:
logger.info(f"Analysis task: Updating company.city from '{company.city}' to '{impressum_city}'")
company.city = impressum_city
if impressum_country and company.country != impressum_country:
logger.info(f"Analysis task: Updating company.country from '{company.country}' to '{impressum_country}'")
company.country = impressum_country
text_val = scrape_result.get("text")
text_len = len(text_val) if text_val else 0
logger.info(f"Locked data keys: {list(scrape_result.keys())}, Text length: {text_len}")
# AUTO-FIX: If locked data (e.g. Manual Impressum) has no text, fetch main website text
if text_len < 100:
logger.info(f"Locked data missing text (len={text_len}). Fetching content from {url}...")
try:
fresh_scrape = scraper.scrape_url(url)
except Exception as e:
logger.error(f"Fresh scrape failed: {e}", exc_info=True)
fresh_scrape = {}
logger.info(f"Fresh scrape result keys: {list(fresh_scrape.keys())}")
if "text" in fresh_scrape and len(fresh_scrape["text"]) > 100:
logger.info(f"Fresh scrape successful. Text len: {len(fresh_scrape['text'])}")
# Update local dict for current processing
scrape_result["text"] = fresh_scrape["text"]
scrape_result["title"] = fresh_scrape.get("title", "")
# Update DB (Merge into existing content)
updated_content = dict(existing_scrape.content)
updated_content["text"] = fresh_scrape["text"]
updated_content["title"] = fresh_scrape.get("title", "")
existing_scrape.content = updated_content
existing_scrape.updated_at = datetime.utcnow()
# db.commit() here would be too early
logger.info("Updated locked record with fresh website text in session.")
else:
logger.warning(f"Fresh scrape returned insufficient text. Error: {fresh_scrape.get('error')}")
else:
# Standard Scrape
scrape_result = scraper.scrape_url(url)
# Update company fields from impressum if found during scrape
if "impressum" in scrape_result and scrape_result["impressum"]:
impressum_city = scrape_result["impressum"].get("city")
impressum_country = scrape_result["impressum"].get("country_code")
logger.info(f"Analysis task (standard scrape): Impressum found. City='{impressum_city}', Country='{impressum_country}'")
if impressum_city and company.city != impressum_city:
logger.info(f"Analysis task: Updating company.city from '{company.city}' to '{impressum_city}'")
company.city = impressum_city
if impressum_country and company.country != impressum_country:
logger.info(f"Analysis task: Updating company.country from '{company.country}' to '{impressum_country}'")
company.country = impressum_country
# Save Scrape Data
if "text" in scrape_result and scrape_result["text"]:
if not existing_scrape:
db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_result))
else:
existing_scrape.content = scrape_result
existing_scrape.updated_at = datetime.utcnow()
elif "error" in scrape_result:
logger.warning(f"Scraping failed for {company.name}: {scrape_result['error']}")
# 2. Classify Robotics Potential
text_content = scrape_result.get("text")
logger.info(f"Preparing classification. Text content length: {len(text_content) if text_content else 0}")
if text_content and len(text_content) > 100:
logger.info(f"Starting classification for {company.name}...")
analysis = classifier.analyze_robotics_potential(
company_name=company.name,
website_text=text_content
)
if "error" in analysis:
logger.error(f"Robotics classification failed for {company.name}: {analysis['error']}")
if not existing_scrape or not existing_scrape.is_locked:
from .services.scraping import ScraperService
scrape_res = ScraperService().scrape_url(company.website)
if not existing_scrape:
db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_res))
else:
industry = analysis.get("industry")
if industry:
company.industry_ai = industry
db.query(Signal).filter(Signal.company_id == company.id).delete()
potentials = analysis.get("potentials", {})
for signal_type, data in potentials.items():
new_signal = Signal(
company_id=company.id,
signal_type=f"robotics_{signal_type}_potential",
confidence=data.get("score", 0),
value="High" if data.get("score", 0) > 70 else "Medium" if data.get("score", 0) > 30 else "Low",
proof_text=data.get("reason")
)
db.add(new_signal)
existing_analysis = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "ai_analysis"
).first()
if not existing_analysis:
db.add(EnrichmentData(company_id=company.id, source_type="ai_analysis", content=analysis))
else:
existing_analysis.content = analysis
existing_analysis.updated_at = datetime.utcnow()
company.status = "ENRICHED"
company.last_classification_at = datetime.utcnow()
logger.info(f"Robotics analysis complete for {company.name}.")
else:
logger.warning(f"Skipping classification for {company.name}: Insufficient text content (len={len(text_content) if text_content else 0})")
existing_scrape.content = scrape_res
existing_scrape.updated_at = datetime.utcnow()
db.commit()
# 2. Classify Industry & Metrics
# IMPORTANT: Using the new method name and passing db session
classifier.classify_company_potential(company, db)
company.status = "ENRICHED"
db.commit()
logger.info(f"Analysis finished for {company.id}")
logger.info(f"Analysis complete for {company.name}")
except Exception as e:
logger.error(f"Analyze Task Error: {e}", exc_info=True)
db.rollback()
finally:
db.close()
# --- Serve Frontend ---
# Priority 1: Container Path (outside of /app volume)
static_path = "/frontend_static"
# Priority 2: Local Dev Path (relative to this file)
if not os.path.exists(static_path):
static_path = os.path.join(os.path.dirname(__file__), "../static")
if os.path.exists(static_path):
logger.info(f"Serving frontend from {static_path}")
app.mount("/", StaticFiles(directory=static_path, html=True), name="static")
else:
logger.warning(f"Frontend static files not found at {static_path} or local fallback.")
if __name__ == "__main__":
import uvicorn
uvicorn.run("backend.app:app", host="0.0.0.0", port=8000, reload=True)
uvicorn.run("backend.app:app", host="0.0.0.0", port=8000, reload=True)