[2f988f42] fix(company-explorer): Implement robust quantitative potential and atomic opener generation\n\n- Refactored ClassificationService for two-stage metric extraction (direct area and proxy).- Enhanced MetricParser for targeted value matching and robust number parsing.- Implemented persona-specific 'Atomic Opener' generation using segmented pains.- Fixed logging configuration and Pydantic response models.- Added dedicated debugging script and updated documentation (GEMINI.md, MIGRATION_PLAN.md).
This commit is contained in:
@@ -32,7 +32,7 @@ setup_logging()
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake, MarketingMatrix, Persona
|
||||
from .database import init_db, get_db, Company, Signal, EnrichmentData, RoboticsCategory, Contact, Industry, JobRoleMapping, ReportedMistake, MarketingMatrix, Persona, RawJobTitle
|
||||
from .services.deduplication import Deduplicator
|
||||
from .services.discovery import DiscoveryService
|
||||
from .services.scraping import ScraperService
|
||||
@@ -101,6 +101,71 @@ class ProvisioningResponse(BaseModel):
|
||||
opener_secondary: Optional[str] = None # Secondary opener (Service/Logistics)
|
||||
texts: Dict[str, Optional[str]] = {}
|
||||
|
||||
class IndustryDetails(BaseModel):
|
||||
pains: Optional[str] = None
|
||||
gains: Optional[str] = None
|
||||
priority: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
ops_focus_secondary: bool = False
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class ContactResponse(BaseModel):
|
||||
id: int
|
||||
first_name: Optional[str] = None
|
||||
last_name: Optional[str] = None
|
||||
job_title: Optional[str] = None
|
||||
role: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
is_primary: bool
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class EnrichmentDataResponse(BaseModel):
|
||||
id: int
|
||||
source_type: str
|
||||
content: Dict[str, Any]
|
||||
is_locked: bool
|
||||
wiki_verified_empty: bool
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class CompanyDetailsResponse(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
website: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
country: Optional[str] = None
|
||||
industry_ai: Optional[str] = None
|
||||
status: str
|
||||
|
||||
# Metrics
|
||||
calculated_metric_name: Optional[str] = None
|
||||
calculated_metric_value: Optional[float] = None
|
||||
calculated_metric_unit: Optional[str] = None
|
||||
standardized_metric_value: Optional[float] = None
|
||||
standardized_metric_unit: Optional[str] = None
|
||||
metric_source: Optional[str] = None
|
||||
metric_proof_text: Optional[str] = None
|
||||
metric_source_url: Optional[str] = None
|
||||
metric_confidence: Optional[float] = None
|
||||
|
||||
# Openers
|
||||
ai_opener: Optional[str] = None
|
||||
ai_opener_secondary: Optional[str] = None
|
||||
|
||||
# Relations
|
||||
industry_details: Optional[IndustryDetails] = None
|
||||
contacts: List[ContactResponse] = []
|
||||
enrichment_data: List[EnrichmentDataResponse] = []
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
# --- Events ---
|
||||
@app.on_event("startup")
|
||||
def on_startup():
|
||||
@@ -336,7 +401,7 @@ def export_companies_csv(db: Session = Depends(get_db), username: str = Depends(
|
||||
headers={"Content-Disposition": f"attachment; filename=company_export_{datetime.utcnow().strftime('%Y-%m-%d')}.csv"}
|
||||
)
|
||||
|
||||
@app.get("/api/companies/{company_id}")
|
||||
@app.get("/api/companies/{company_id}", response_model=CompanyDetailsResponse)
|
||||
def get_company(company_id: int, db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
|
||||
company = db.query(Company).options(
|
||||
joinedload(Company.enrichment_data),
|
||||
@@ -350,28 +415,14 @@ def get_company(company_id: int, db: Session = Depends(get_db), username: str =
|
||||
if company.industry_ai:
|
||||
ind = db.query(Industry).filter(Industry.name == company.industry_ai).first()
|
||||
if ind:
|
||||
industry_details = {
|
||||
"pains": ind.pains,
|
||||
"gains": ind.gains,
|
||||
"priority": ind.priority,
|
||||
"notes": ind.notes,
|
||||
"ops_focus_secondary": ind.ops_focus_secondary
|
||||
}
|
||||
industry_details = IndustryDetails.model_validate(ind)
|
||||
|
||||
# HACK: Attach to response object (Pydantic would be cleaner, but this works for fast prototyping)
|
||||
# We convert to dict and append
|
||||
resp = company.__dict__.copy()
|
||||
resp["industry_details"] = industry_details
|
||||
# Handle SQLAlchemy internal state
|
||||
if "_sa_instance_state" in resp: del resp["_sa_instance_state"]
|
||||
# Handle relationships manually if needed, or let FastAPI encode the SQLAlchemy model + extra dict
|
||||
# Better: return a custom dict merging both
|
||||
# FastAPI will automatically serialize the 'company' ORM object into the
|
||||
# CompanyDetailsResponse schema. We just need to attach the extra 'industry_details'.
|
||||
response_data = CompanyDetailsResponse.model_validate(company)
|
||||
response_data.industry_details = industry_details
|
||||
|
||||
# Since we use joinedload, relationships are loaded.
|
||||
# Let's rely on FastAPI's ability to serialize the object, but we need to inject the extra field.
|
||||
# The safest way without changing Pydantic schemas everywhere is to return a dict.
|
||||
|
||||
return {**resp, "enrichment_data": company.enrichment_data, "contacts": company.contacts, "signals": company.signals}
|
||||
return response_data
|
||||
|
||||
@app.post("/api/companies")
|
||||
def create_company(company: CompanyCreate, db: Session = Depends(get_db), username: str = Depends(authenticate_user)):
|
||||
@@ -797,23 +848,21 @@ def run_analysis_task(company_id: int):
|
||||
db = SessionLocal()
|
||||
try:
|
||||
company = db.query(Company).filter(Company.id == company_id).first()
|
||||
if not company: return
|
||||
if not company:
|
||||
logger.error(f"Analysis Task: Company with ID {company_id} not found.")
|
||||
return
|
||||
|
||||
logger.info(f"Running Analysis Task for {company.name}")
|
||||
logger.info(f"--- [BACKGROUND TASK] Starting for {company.name} ---")
|
||||
|
||||
# --- 1. Scrape Website (if not locked) ---
|
||||
# Check for existing scrape data first
|
||||
existing_scrape = db.query(EnrichmentData).filter(
|
||||
EnrichmentData.company_id == company.id,
|
||||
EnrichmentData.source_type == "website_scrape"
|
||||
).first()
|
||||
|
||||
# If it doesn't exist or is not locked, we perform a scrape
|
||||
if not existing_scrape or not existing_scrape.is_locked:
|
||||
logger.info(f"Scraping website for {company.name}...")
|
||||
scrape_res = scraper.scrape_url(company.website) # Use singleton
|
||||
|
||||
# Now, either create new or update existing
|
||||
scrape_res = scraper.scrape_url(company.website)
|
||||
if not existing_scrape:
|
||||
db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_res))
|
||||
logger.info("Created new website_scrape entry.")
|
||||
@@ -825,15 +874,16 @@ def run_analysis_task(company_id: int):
|
||||
else:
|
||||
logger.info("Website scrape is locked. Skipping.")
|
||||
|
||||
# 2. Classify Industry & Metrics
|
||||
# IMPORTANT: Using the new method name and passing db session
|
||||
# --- 2. Classify Industry & Metrics ---
|
||||
logger.info(f"Handing over to ClassificationService for {company.name}...")
|
||||
classifier.classify_company_potential(company, db)
|
||||
|
||||
company.status = "ENRICHED"
|
||||
db.commit()
|
||||
logger.info(f"Analysis complete for {company.name}")
|
||||
logger.info(f"--- [BACKGROUND TASK] Successfully finished for {company.name} ---")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Analyze Task Error: {e}", exc_info=True)
|
||||
logger.critical(f"--- [BACKGROUND TASK] CRITICAL ERROR for Company ID {company_id} ---", exc_info=True)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user