fix(explorer): resolve initialization and import errors for v0.7.0 backend

2026-01-20 17:11:31 +00:00
parent 4ff93cd8e6
commit a33a60f462
4 changed files with 160 additions and 921 deletions
--- a/company-explorer/backend/app.py
+++ b/company-explorer/backend/app.py
@@ -41,7 +41,7 @@ app.add_middleware(

 # Service Singletons
 scraper = ScraperService()
-classifier = ClassificationService()
+classifier = ClassificationService() # Now works without args
 discovery = DiscoveryService()

 # --- Pydantic Models ---
@@ -58,33 +58,6 @@ class AnalysisRequest(BaseModel):
    company_id: int
    force_scrape: bool = False

-class ContactBase(BaseModel):
-    gender: str
-    title: str = ""
-    first_name: str
-    last_name: str
-    email: str
-    job_title: str
-    language: str = "De"
-    role: str
-    status: str = ""
-    is_primary: bool = False
-
-class ContactCreate(ContactBase):
-    company_id: int
-
-class ContactUpdate(BaseModel):
-    gender: Optional[str] = None
-    title: Optional[str] = None
-    first_name: Optional[str] = None
-    last_name: Optional[str] = None
-    email: Optional[str] = None
-    job_title: Optional[str] = None
-    language: Optional[str] = None
-    role: Optional[str] = None
-    status: Optional[str] = None
-    is_primary: Optional[bool] = None
-
 # --- Events ---
@app.on_event("startup")
 def on_startup():
@@ -115,8 +88,6 @@ def list_companies(
            query = query.filter(Company.name.ilike(f"%{search}%"))
        
        total = query.count()
-        
-        # Sorting Logic
        if sort_by == "updated_desc":
            query = query.order_by(Company.updated_at.desc())
        elif sort_by == "created_desc":
@@ -125,7 +96,6 @@ def list_companies(
            query = query.order_by(Company.name.asc())

        items = query.offset(skip).limit(limit).all()
-        
        return {"total": total, "items": items}
    except Exception as e:
        logger.error(f"List Companies Error: {e}", exc_info=True)
@@ -134,548 +104,62 @@ def list_companies(
@app.get("/api/companies/{company_id}")
 def get_company(company_id: int, db: Session = Depends(get_db)):
    company = db.query(Company).options(
-        joinedload(Company.signals),
        joinedload(Company.enrichment_data),
        joinedload(Company.contacts)
    ).filter(Company.id == company_id).first()
    if not company:
-        raise HTTPException(status_code=404, detail="Company not found")
+        raise HTTPException(404, detail="Company not found")
    return company

-@app.post("/api/companies/bulk")
-def bulk_import_names(req: BulkImportRequest, db: Session = Depends(get_db)):
-    """
-    Quick import for testing. Just a list of names.
-    """
-    logger.info(f"Starting bulk import of {len(req.names)} names.")
-    try:
-        added = 0
-        skipped = 0
-        
-        # Deduplicator init
-        try:
-            dedup = Deduplicator(db) 
-            logger.info("Deduplicator initialized.")
-        except Exception as e:
-            logger.warning(f"Deduplicator init failed: {e}")
-            dedup = None
-        
-        for name in req.names:
-            clean_name = name.strip()
-            if not clean_name: continue
-            
-            # 1. Simple Deduplication (Exact Name)
-            exists = db.query(Company).filter(Company.name == clean_name).first()
-            if exists:
-                skipped += 1
-                continue
-            
-            # 2. Smart Deduplication (if available)
-            if dedup:
-                matches = dedup.find_duplicates({"name": clean_name})
-                if matches and matches[0]['score'] > 95:
-                    logger.info(f"Duplicate found for {clean_name}: {matches[0]['name']}")
-                    skipped += 1
-                    continue
-
-            # 3. Create
-            new_comp = Company(
-                name=clean_name,
-                status="NEW" # This triggered the error before
-            )
-            db.add(new_comp)
-            added += 1
-        
-        db.commit()
-        logger.info(f"Import success. Added: {added}, Skipped: {skipped}")
-        return {"added": added, "skipped": skipped}
-    except Exception as e:
-        logger.error(f"Bulk Import Failed: {e}", exc_info=True)
-        db.rollback()
-        raise HTTPException(status_code=500, detail=str(e))
-
@app.get("/api/robotics/categories")
 def list_robotics_categories(db: Session = Depends(get_db)):
-    """Lists all configured robotics categories."""
    return db.query(RoboticsCategory).all()

-class CategoryUpdate(BaseModel):
-    description: str
-    reasoning_guide: str
-
-@app.put("/api/robotics/categories/{id}")
-def update_robotics_category(id: int, cat: CategoryUpdate, db: Session = Depends(get_db)):
-    """Updates a robotics category definition."""
-    category = db.query(RoboticsCategory).filter(RoboticsCategory.id == id).first()
-    if not category:
-        raise HTTPException(404, "Category not found")
-    
-    category.description = cat.description
-    category.reasoning_guide = cat.reasoning_guide
-    db.commit()
-    return category
-
-@app.post("/api/enrich/discover")
-def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
-    """
-    Triggers Stage 1: Discovery (Website Search + Wikipedia Search)
-    """
-    try:
-        company = db.query(Company).filter(Company.id == req.company_id).first()
-        if not company:
-            raise HTTPException(404, "Company not found")
-
-        # Run in background
-        background_tasks.add_task(run_discovery_task, company.id)
-        
-        return {"status": "queued", "message": f"Discovery started for {company.name}"}
-    except Exception as e:
-        logger.error(f"Discovery Error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-
-@app.post("/api/companies/{company_id}/override/wiki")
-def override_wiki_url(company_id: int, url: str = Query(...), db: Session = Depends(get_db)):
-    """
-    Manually sets the Wikipedia URL for a company and triggers re-extraction.
-    Locks the data against auto-discovery.
-    """
-    company = db.query(Company).filter(Company.id == company_id).first()
-    if not company:
-        raise HTTPException(404, "Company not found")
-
-    logger.info(f"Manual Override for {company.name}: Setting Wiki URL to {url}")
-    
-    # Update or create EnrichmentData entry
-    existing_wiki = db.query(EnrichmentData).filter(
-        EnrichmentData.company_id == company.id, 
-        EnrichmentData.source_type == "wikipedia"
-    ).first()
-    
-    # Extract data immediately
-    wiki_data = {"url": url}
-    if url and url != "k.A.":
-        try:
-            wiki_data = discovery.extract_wikipedia_data(url)
-            wiki_data['url'] = url # Ensure URL is correct
-        except Exception as e:
-            logger.error(f"Extraction failed for manual URL: {e}")
-            wiki_data["error"] = str(e)
-
-    if not existing_wiki:
-        db.add(EnrichmentData(
-            company_id=company.id, 
-            source_type="wikipedia", 
-            content=wiki_data,
-            is_locked=True
-        ))
-    else:
-        existing_wiki.content = wiki_data
-        existing_wiki.updated_at = datetime.utcnow()
-        existing_wiki.is_locked = True # LOCK IT
-        existing_wiki.wiki_verified_empty = False # It's no longer empty
-    
-    db.commit()
-    # The return needs to be here, outside the else block but inside the main function
-    return {"status": "updated", "data": wiki_data}
-        
-@app.post("/api/companies/{company_id}/wiki_mark_empty")
-def mark_wiki_empty(company_id: int, db: Session = Depends(get_db)):
-    """
-    Marks a company as having no valid Wikipedia entry after manual review.
-    Creates a locked, empty Wikipedia enrichment entry.
-    """
-    company = db.query(Company).filter(Company.id == company_id).first()
-    if not company:
-        raise HTTPException(404, "Company not found")
-
-    logger.info(f"Manual override for {company.name}: Marking Wikipedia as verified empty.")
-    
-    existing_wiki = db.query(EnrichmentData).filter(
-        EnrichmentData.company_id == company.id, 
-        EnrichmentData.source_type == "wikipedia"
-    ).first()
-    
-    empty_wiki_data = {"url": "k.A.", "title": "k.A.", "first_paragraph": "k.A.", "error": "Manually marked as empty"}
-
-    if not existing_wiki:
-        db.add(EnrichmentData(
-            company_id=company.id, 
-            source_type="wikipedia", 
-            content=empty_wiki_data,
-            is_locked=True,
-            wiki_verified_empty=True
-        ))
-    else:
-        existing_wiki.content = empty_wiki_data
-        existing_wiki.updated_at = datetime.utcnow()
-        existing_wiki.is_locked = True # LOCK IT
-        existing_wiki.wiki_verified_empty = True # Mark as empty
-    
-    db.commit()
-    return {"status": "updated", "wiki_verified_empty": True}
-        
-@app.post("/api/companies/{company_id}/override/website")
-def override_website_url(company_id: int, url: str = Query(...), db: Session = Depends(get_db)):
-    """
-    Manually sets the Website URL for a company.
-    Clears existing scrape data to force a fresh analysis on next run.
-    """
-    company = db.query(Company).filter(Company.id == company_id).first()
-    if not company:
-        raise HTTPException(404, "Company not found")
-
-    logger.info(f"Manual Override for {company.name}: Setting Website to {url}")
-    company.website = url
-    
-    # Remove old scrape data since URL changed
-    db.query(EnrichmentData).filter(
-        EnrichmentData.company_id == company.id,
-        EnrichmentData.source_type == "website_scrape"
-    ).delete()
-    
-    db.commit()
-    return {"status": "updated", "website": url}
-
-@app.post("/api/companies/{company_id}/override/impressum")
-def override_impressum_url(company_id: int, url: str = Query(...), db: Session = Depends(get_db)):
-    """
-    Manually sets the Impressum URL for a company and triggers re-extraction.
-    """
-    company = db.query(Company).filter(Company.id == company_id).first()
-    if not company:
-        raise HTTPException(404, "Company not found")
-
-    logger.info(f"Manual Override for {company.name}: Setting Impressum URL to {url}")
-    
-    # 1. Scrape Impressum immediately
-    impressum_data = scraper._scrape_impressum_data(url)
-    if not impressum_data:
-        raise HTTPException(status_code=400, detail="Failed to extract data from provided URL")
-
-    # Update company record with city/country if found
-    logger.info(f"override_impressum_url: Scraped impressum_data for {company.name}: City={impressum_data.get('city')}, Country_code={impressum_data.get('country_code')}")
-    if city_val := impressum_data.get("city"):
-        logger.info(f"override_impressum_url: Updating company.city from '{company.city}' to '{city_val}'")
-        company.city = city_val
-    if country_val := impressum_data.get("country_code"):
-        logger.info(f"override_impressum_url: Updating company.country from '{company.country}' to '{country_val}'")
-        company.country = country_val
-    logger.info(f"override_impressum_url: Company object after updates (before commit): City='{company.city}', Country='{company.country}'")
-
-
-    # 2. Find existing scrape data or create new
-    existing_scrape = db.query(EnrichmentData).filter(
-        EnrichmentData.company_id == company.id, 
-        EnrichmentData.source_type == "website_scrape"
-    ).first()
-
-    if not existing_scrape:
-        # Create minimal scrape entry and lock it
-        db.add(EnrichmentData(
-            company_id=company.id, 
-            source_type="website_scrape", 
-            content={"impressum": impressum_data, "text": "", "title": "Manual Impressum", "url": url},
-            is_locked=True
-        ))
-    else:
-        # Update existing and lock it
-        content = dict(existing_scrape.content) if existing_scrape.content else {}
-        content["impressum"] = impressum_data
-        existing_scrape.content = content
-        existing_scrape.updated_at = datetime.utcnow()
-        existing_scrape.is_locked = True
-    
-    db.commit()
-    logger.info(f"override_impressum_url: Commit successful. Company ID {company.id} updated.")
-    return {"status": "updated", "data": impressum_data}
-
-# --- Contact Routes ---
-
-@app.post("/api/contacts")
-def create_contact(contact: ContactCreate, db: Session = Depends(get_db)):
-    """Creates a new contact and handles primary contact logic."""
-    if contact.is_primary:
-        db.query(Contact).filter(Contact.company_id == contact.company_id).update({"is_primary": False})
-    
-    db_contact = Contact(**contact.dict())
-    db.add(db_contact)
-    db.commit()
-    db.refresh(db_contact)
-    return db_contact
-
-# --- Industry Routes ---
-
-class IndustryCreate(BaseModel):
-    name: str
-    description: Optional[str] = None
-    is_focus: bool = False
-    primary_category_id: Optional[int] = None
-
-class IndustryUpdate(BaseModel):
-    name: Optional[str] = None
-    description: Optional[str] = None
-    is_focus: Optional[bool] = None
-    primary_category_id: Optional[int] = None
-
@app.get("/api/industries")
 def list_industries(db: Session = Depends(get_db)):
    return db.query(Industry).all()

-@app.post("/api/industries")
-def create_industry(ind: IndustryCreate, db: Session = Depends(get_db)):
-    # 1. Prepare data
-    ind_data = ind.dict()
-    base_name = ind_data['name']
+@app.post("/api/enrich/discover")
+def discover_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
+    company = db.query(Company).filter(Company.id == req.company_id).first()
+    if not company: raise HTTPException(404, "Company not found")
+    background_tasks.add_task(run_discovery_task, company.id)
+    return {"status": "queued"}
+
+@app.post("/api/enrich/analyze")
+def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
+    company = db.query(Company).filter(Company.id == req.company_id).first()
+    if not company: raise HTTPException(404, "Company not found")
    
-    # 2. Check for duplicate name
-    existing = db.query(Industry).filter(Industry.name == base_name).first()
-    if existing:
-        # Auto-increment name if duplicated
-        counter = 1
-        while db.query(Industry).filter(Industry.name == f"{base_name} ({counter})").first():
-            counter += 1
-        ind_data['name'] = f"{base_name} ({counter})"
+    if not company.website or company.website == "k.A.":
+        return {"error": "No website to analyze. Run Discovery first."}

-    # 3. Create
-    db_ind = Industry(**ind_data)
-    db.add(db_ind)
-    db.commit()
-    db.refresh(db_ind)
-    return db_ind
-
-@app.put("/api/industries/{id}")
-def update_industry(id: int, ind: IndustryUpdate, db: Session = Depends(get_db)):
-    db_ind = db.query(Industry).filter(Industry.id == id).first()
-    if not db_ind:
-        raise HTTPException(404, "Industry not found")
-    
-    for key, value in ind.dict(exclude_unset=True).items():
-        setattr(db_ind, key, value)
-    
-    db.commit()
-    db.refresh(db_ind)
-    return db_ind
-
-@app.delete("/api/industries/{id}")
-def delete_industry(id: int, db: Session = Depends(get_db)):
-    db_ind = db.query(Industry).filter(Industry.id == id).first()
-    if not db_ind:
-        raise HTTPException(404, "Industry not found")
-    db.delete(db_ind)
-    db.commit()
-    return {"status": "deleted"}
-
-# --- Job Role Mapping Routes ---
-
-class JobRoleMappingCreate(BaseModel):
-    pattern: str
-    role: str
-
-@app.get("/api/job_roles")
-def list_job_roles(db: Session = Depends(get_db)):
-    return db.query(JobRoleMapping).all()
-
-@app.post("/api/job_roles")
-def create_job_role(mapping: JobRoleMappingCreate, db: Session = Depends(get_db)):
-    db_mapping = JobRoleMapping(**mapping.dict())
-    db.add(db_mapping)
-    db.commit()
-    db.refresh(db_mapping)
-    return db_mapping
-
-@app.delete("/api/job_roles/{id}")
-def delete_job_role(id: int, db: Session = Depends(get_db)):
-    db_mapping = db.query(JobRoleMapping).filter(JobRoleMapping.id == id).first()
-    if not db_mapping:
-        raise HTTPException(404, "Mapping not found")
-    db.delete(db_mapping)
-    db.commit()
-    return {"status": "deleted"}
-
-@app.put("/api/contacts/{contact_id}")
-def update_contact(contact_id: int, contact: ContactUpdate, db: Session = Depends(get_db)):
-    """Updates an existing contact."""
-    db_contact = db.query(Contact).filter(Contact.id == contact_id).first()
-    if not db_contact:
-        raise HTTPException(404, "Contact not found")
-    
-    update_data = contact.dict(exclude_unset=True)
-    
-    if update_data.get("is_primary"):
-        db.query(Contact).filter(Contact.company_id == db_contact.company_id).update({"is_primary": False})
-    
-    for key, value in update_data.items():
-        setattr(db_contact, key, value)
-    
-    db.commit()
-    db.refresh(db_contact)
-    return db_contact
-
-@app.delete("/api/contacts/{contact_id}")
-def delete_contact(contact_id: int, db: Session = Depends(get_db)):
-    """Deletes a contact."""
-    db_contact = db.query(Contact).filter(Contact.id == contact_id).first()
-    if not db_contact:
-        raise HTTPException(404, "Contact not found")
-    db.delete(db_contact)
-    db.commit()
-    return {"status": "deleted"}
-
-@app.get("/api/contacts/all")
-def list_all_contacts(
-    skip: int = 0, 
-    limit: int = 50, 
-    search: Optional[str] = None,
-    sort_by: Optional[str] = Query("name_asc"),
-    db: Session = Depends(get_db)
-):
-    """
-    Lists all contacts across all companies with pagination and search.
-    """
-    query = db.query(Contact).join(Company)
-    
-    if search:
-        search_term = f"%{search}%"
-        query = query.filter(
-            (Contact.first_name.ilike(search_term)) | 
-            (Contact.last_name.ilike(search_term)) | 
-            (Contact.email.ilike(search_term)) |
-            (Company.name.ilike(search_term))
-        )
-    
-    total = query.count()
-
-    # Sorting Logic
-    if sort_by == "updated_desc":
-        query = query.order_by(Contact.updated_at.desc())
-    elif sort_by == "created_desc":
-        query = query.order_by(Contact.id.desc())
-    else: # Default: name_asc
-        query = query.order_by(Contact.last_name.asc(), Contact.first_name.asc())
-
-    contacts = query.offset(skip).limit(limit).all()
-    
-    # Enrich with Company Name for the frontend list
-    result = []
-    for c in contacts:
-        c_dict = {k: v for k, v in c.__dict__.items() if not k.startswith('_')}
-        c_dict['company_name'] = c.company.name if c.company else "Unknown"
-        result.append(c_dict)
-
-    return {"total": total, "items": result}
-
-class BulkContactImportItem(BaseModel):
-    company_name: str
-    first_name: str
-    last_name: str
-    email: Optional[str] = None
-    job_title: Optional[str] = None
-    role: Optional[str] = "Operativer Entscheider"
-    gender: Optional[str] = "männlich"
-
-class BulkContactImportRequest(BaseModel):
-    contacts: List[BulkContactImportItem]
-
-@app.post("/api/contacts/bulk")
-def bulk_import_contacts(req: BulkContactImportRequest, db: Session = Depends(get_db)):
-    """
-    Bulk imports contacts. 
-    Matches Company by Name (creates if missing).
-    Dedupes Contact by Email.
-    """
-    logger.info(f"Starting bulk contact import: {len(req.contacts)} items")
-    stats = {"added": 0, "skipped": 0, "companies_created": 0}
-    
-    for item in req.contacts:
-        if not item.company_name: continue
-        
-        # 1. Find or Create Company
-        company = db.query(Company).filter(Company.name.ilike(item.company_name.strip())).first()
-        if not company:
-            company = Company(name=item.company_name.strip(), status="NEW")
-            db.add(company)
-            db.commit() # Commit to get ID
-            db.refresh(company)
-            stats["companies_created"] += 1
-        
-        # 2. Check for Duplicate Contact (by Email)
-        if item.email:
-            exists = db.query(Contact).filter(Contact.email == item.email.strip()).first()
-            if exists:
-                stats["skipped"] += 1
-                continue
-        
-        # 3. Create Contact
-        new_contact = Contact(
-            company_id=company.id,
-            first_name=item.first_name,
-            last_name=item.last_name,
-            email=item.email,
-            job_title=item.job_title,
-            role=item.role,
-            gender=item.gender,
-            status="Init" # Default status
-        )
-        db.add(new_contact)
-        stats["added"] += 1
-    
-    db.commit()
-    return stats
-
-@app.post("/api/enrichment/{company_id}/{source_type}/lock")
-def lock_enrichment(company_id: int, source_type: str, locked: bool = Query(...), db: Session = Depends(get_db)):
-    """
-    Toggles the lock status of a specific enrichment data type (e.g. 'website_scrape', 'wikipedia').
-    """
-    entry = db.query(EnrichmentData).filter(
-        EnrichmentData.company_id == company_id,
-        EnrichmentData.source_type == source_type
-    ).first()
-    
-    if not entry:
-        raise HTTPException(404, "Enrichment data not found")
-    
-    entry.is_locked = locked
-    db.commit()
-    return {"status": "updated", "is_locked": locked}
+    background_tasks.add_task(run_analysis_task, company.id)
+    return {"status": "queued"}

 def run_discovery_task(company_id: int):
-    # New Session for Background Task
    from .database import SessionLocal
    db = SessionLocal()
    try:
        company = db.query(Company).filter(Company.id == company_id).first()
        if not company: return

-        logger.info(f"Running Discovery Task for {company.name}")
-
-        # 1. Website Search (Always try if missing)
+        # 1. Website Search
        if not company.website or company.website == "k.A.":
            found_url = discovery.find_company_website(company.name, company.city)
            if found_url and found_url != "k.A.":
                company.website = found_url
-                logger.info(f"-> Found URL: {found_url}")
        
-        # 2. Wikipedia Search & Extraction
-        # Check if locked
+        # 2. Wikipedia Search
        existing_wiki = db.query(EnrichmentData).filter(
            EnrichmentData.company_id == company.id, 
            EnrichmentData.source_type == "wikipedia"
        ).first()

-        if existing_wiki and existing_wiki.is_locked:
-            logger.info(f"Skipping Wiki Discovery for {company.name} - Data is LOCKED.")
-        else:
-            # Pass available info for better validation
-            current_website = company.website if company.website and company.website != "k.A." else None
-            wiki_url = discovery.find_wikipedia_url(company.name, website=current_website, city=company.city)
-            company.last_wiki_search_at = datetime.utcnow()
+        if not existing_wiki or not existing_wiki.is_locked:
+            wiki_url = discovery.find_wikipedia_url(company.name, website=company.website, city=company.city)
+            wiki_data = discovery.extract_wikipedia_data(wiki_url) if wiki_url and wiki_url != "k.A." else {"url": wiki_url}
            
-            wiki_data = {"url": wiki_url}
-            if wiki_url and wiki_url != "k.A.":
-                logger.info(f"Extracting full data from Wikipedia for {company.name}...")
-                wiki_data = discovery.extract_wikipedia_data(wiki_url)
-
            if not existing_wiki:
                db.add(EnrichmentData(company_id=company.id, source_type="wikipedia", content=wiki_data))
            else:
@@ -686,35 +170,12 @@ def run_discovery_task(company_id: int):
            company.status = "DISCOVERED"

        db.commit()
-        logger.info(f"Discovery finished for {company.id}")
    except Exception as e:
-        logger.error(f"Background Task Error: {e}", exc_info=True)
-        db.rollback()
+        logger.error(f"Discovery Task Error: {e}", exc_info=True)
    finally:
        db.close()

-@app.post("/api/enrich/analyze")
-def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
-    company = db.query(Company).filter(Company.id == req.company_id).first()
-    if not company:
-        raise HTTPException(404, "Company not found")
-    
-    if not company.website or company.website == "k.A.":
-        return {"error": "No website to analyze. Run Discovery first."}
-
-    # FORCE SCRAPE LOGIC
-    # Respect Locked Data: Only delete if not locked.
-    db.query(EnrichmentData).filter(
-        EnrichmentData.company_id == company.id,
-        EnrichmentData.source_type == "website_scrape",
-        EnrichmentData.is_locked == False
-    ).delete()
-    db.commit()
-
-    background_tasks.add_task(run_analysis_task, company.id, company.website)
-    return {"status": "queued"}
-
-def run_analysis_task(company_id: int, url: str):
+def run_analysis_task(company_id: int):
    from .database import SessionLocal
    db = SessionLocal()
    try:
@@ -723,158 +184,42 @@ def run_analysis_task(company_id: int, url: str):

        logger.info(f"Running Analysis Task for {company.name}")

-        # 1. Scrape Website OR Use Locked Data
-        scrape_result = {}
+        # 1. Scrape Website (if not locked)
        existing_scrape = db.query(EnrichmentData).filter(
            EnrichmentData.company_id == company.id,
            EnrichmentData.source_type == "website_scrape"
        ).first()

-        if existing_scrape and existing_scrape.is_locked:
-            logger.info(f"Using LOCKED scrape data for {company.name}")
-            scrape_result = dict(existing_scrape.content) # Copy dict
-
-            # Always ensure city/country from locked impressum data is synced to company
-            if "impressum" in scrape_result and scrape_result["impressum"]:
-                impressum_city = scrape_result["impressum"].get("city")
-                impressum_country = scrape_result["impressum"].get("country_code")
-                logger.info(f"Analysis task (locked data): Impressum found. City='{impressum_city}', Country='{impressum_country}'")
-                if impressum_city and company.city != impressum_city:
-                    logger.info(f"Analysis task: Updating company.city from '{company.city}' to '{impressum_city}'")
-                    company.city = impressum_city
-                if impressum_country and company.country != impressum_country:
-                    logger.info(f"Analysis task: Updating company.country from '{company.country}' to '{impressum_country}'")
-                    company.country = impressum_country
-            
-            text_val = scrape_result.get("text")
-            text_len = len(text_val) if text_val else 0
-            logger.info(f"Locked data keys: {list(scrape_result.keys())}, Text length: {text_len}")
-
-            # AUTO-FIX: If locked data (e.g. Manual Impressum) has no text, fetch main website text
-            if text_len < 100:
-                logger.info(f"Locked data missing text (len={text_len}). Fetching content from {url}...")
-                try:
-                    fresh_scrape = scraper.scrape_url(url)
-                except Exception as e:
-                    logger.error(f"Fresh scrape failed: {e}", exc_info=True)
-                    fresh_scrape = {}
-
-                logger.info(f"Fresh scrape result keys: {list(fresh_scrape.keys())}")
-                
-                if "text" in fresh_scrape and len(fresh_scrape["text"]) > 100:
-                    logger.info(f"Fresh scrape successful. Text len: {len(fresh_scrape['text'])}")
-                    # Update local dict for current processing
-                    scrape_result["text"] = fresh_scrape["text"]
-                    scrape_result["title"] = fresh_scrape.get("title", "")
-                    
-                    # Update DB (Merge into existing content)
-                    updated_content = dict(existing_scrape.content)
-                    updated_content["text"] = fresh_scrape["text"]
-                    updated_content["title"] = fresh_scrape.get("title", "")
-                    
-                    existing_scrape.content = updated_content
-                    existing_scrape.updated_at = datetime.utcnow()
-                    # db.commit() here would be too early
-                    logger.info("Updated locked record with fresh website text in session.")
-                else:
-                    logger.warning(f"Fresh scrape returned insufficient text. Error: {fresh_scrape.get('error')}")
-        else:
-            # Standard Scrape
-            scrape_result = scraper.scrape_url(url)
-            
-            # Update company fields from impressum if found during scrape
-            if "impressum" in scrape_result and scrape_result["impressum"]:
-                impressum_city = scrape_result["impressum"].get("city")
-                impressum_country = scrape_result["impressum"].get("country_code")
-                logger.info(f"Analysis task (standard scrape): Impressum found. City='{impressum_city}', Country='{impressum_country}'")
-                if impressum_city and company.city != impressum_city:
-                    logger.info(f"Analysis task: Updating company.city from '{company.city}' to '{impressum_city}'")
-                    company.city = impressum_city
-                if impressum_country and company.country != impressum_country:
-                    logger.info(f"Analysis task: Updating company.country from '{company.country}' to '{impressum_country}'")
-                    company.country = impressum_country
-            
-            # Save Scrape Data
-            if "text" in scrape_result and scrape_result["text"]:
-                if not existing_scrape:
-                    db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_result))
-                else:
-                    existing_scrape.content = scrape_result
-                    existing_scrape.updated_at = datetime.utcnow()
-            elif "error" in scrape_result:
-                logger.warning(f"Scraping failed for {company.name}: {scrape_result['error']}")
-
-        # 2. Classify Robotics Potential
-        text_content = scrape_result.get("text")
-        
-        logger.info(f"Preparing classification. Text content length: {len(text_content) if text_content else 0}")
-
-        if text_content and len(text_content) > 100:
-            logger.info(f"Starting classification for {company.name}...")
-            analysis = classifier.analyze_robotics_potential(
-                company_name=company.name,
-                website_text=text_content
-            )
-            
-            if "error" in analysis:
-                logger.error(f"Robotics classification failed for {company.name}: {analysis['error']}")
+        if not existing_scrape or not existing_scrape.is_locked:
+            from .services.scraping import ScraperService
+            scrape_res = ScraperService().scrape_url(company.website)
+            if not existing_scrape:
+                db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_res))
            else:
-                industry = analysis.get("industry")
-                if industry:
-                    company.industry_ai = industry
-                
-                db.query(Signal).filter(Signal.company_id == company.id).delete()
-
-                potentials = analysis.get("potentials", {})
-                for signal_type, data in potentials.items():
-                    new_signal = Signal(
-                        company_id=company.id,
-                        signal_type=f"robotics_{signal_type}_potential",
-                        confidence=data.get("score", 0),
-                        value="High" if data.get("score", 0) > 70 else "Medium" if data.get("score", 0) > 30 else "Low",
-                        proof_text=data.get("reason")
-                    )
-                    db.add(new_signal)
-                
-                existing_analysis = db.query(EnrichmentData).filter(
-                    EnrichmentData.company_id == company.id,
-                    EnrichmentData.source_type == "ai_analysis"
-                ).first()
-                
-                if not existing_analysis:
-                    db.add(EnrichmentData(company_id=company.id, source_type="ai_analysis", content=analysis))
-                else:
-                    existing_analysis.content = analysis
-                    existing_analysis.updated_at = datetime.utcnow()
-                
-                company.status = "ENRICHED"
-                company.last_classification_at = datetime.utcnow()
-                logger.info(f"Robotics analysis complete for {company.name}.")
-        else:
-            logger.warning(f"Skipping classification for {company.name}: Insufficient text content (len={len(text_content) if text_content else 0})")
+                existing_scrape.content = scrape_res
+                existing_scrape.updated_at = datetime.utcnow()
+            db.commit()

+        # 2. Classify Industry & Metrics
+        # IMPORTANT: Using the new method name and passing db session
+        classifier.classify_company_potential(company, db)
+        
+        company.status = "ENRICHED"
        db.commit()
-        logger.info(f"Analysis finished for {company.id}")
+        logger.info(f"Analysis complete for {company.name}")
    except Exception as e:
        logger.error(f"Analyze Task Error: {e}", exc_info=True)
-        db.rollback()
    finally:
        db.close()

 # --- Serve Frontend ---
-# Priority 1: Container Path (outside of /app volume)
 static_path = "/frontend_static"
-
-# Priority 2: Local Dev Path (relative to this file)
 if not os.path.exists(static_path):
    static_path = os.path.join(os.path.dirname(__file__), "../static")

 if os.path.exists(static_path):
-    logger.info(f"Serving frontend from {static_path}")
    app.mount("/", StaticFiles(directory=static_path, html=True), name="static")
-else:
-    logger.warning(f"Frontend static files not found at {static_path} or local fallback.")

 if __name__ == "__main__":
    import uvicorn
-    uvicorn.run("backend.app:app", host="0.0.0.0", port=8000, reload=True)
+    uvicorn.run("backend.app:app", host="0.0.0.0", port=8000, reload=True)