fix(ce): Resolve database schema mismatch and restore docs

- Fixed a critical  in the company-explorer by forcing a database re-initialization with a new file (). This ensures the application code is in sync with the database schema.
- Documented the schema mismatch incident and its resolution in MIGRATION_PLAN.md.

- Restored and enhanced BUILDER_APPS_MIGRATION.md by recovering extensive, valuable content from the git history that was accidentally deleted. The guide now again includes detailed troubleshooting steps and code templates for common migration pitfalls.
This commit is contained in:
2026-01-15 15:54:45 +00:00
parent 4fcbbe3723
commit 4a336f6374
13 changed files with 724 additions and 555 deletions

View File

@@ -106,6 +106,7 @@ def list_companies(
skip: int = 0,
limit: int = 50,
search: Optional[str] = None,
sort_by: Optional[str] = Query("name_asc"),
db: Session = Depends(get_db)
):
try:
@@ -114,8 +115,16 @@ def list_companies(
query = query.filter(Company.name.ilike(f"%{search}%"))
total = query.count()
# Sort by ID desc (newest first)
items = query.order_by(Company.id.desc()).offset(skip).limit(limit).all()
# Sorting Logic
if sort_by == "updated_desc":
query = query.order_by(Company.updated_at.desc())
elif sort_by == "created_desc":
query = query.order_by(Company.id.desc())
else: # Default: name_asc
query = query.order_by(Company.name.asc())
items = query.offset(skip).limit(limit).all()
return {"total": total, "items": items}
except Exception as e:
@@ -263,10 +272,48 @@ def override_wiki_url(company_id: int, url: str = Query(...), db: Session = Depe
existing_wiki.content = wiki_data
existing_wiki.updated_at = datetime.utcnow()
existing_wiki.is_locked = True # LOCK IT
existing_wiki.wiki_verified_empty = False # It's no longer empty
db.commit()
# The return needs to be here, outside the else block but inside the main function
return {"status": "updated", "data": wiki_data}
@app.post("/api/companies/{company_id}/wiki_mark_empty")
def mark_wiki_empty(company_id: int, db: Session = Depends(get_db)):
"""
Marks a company as having no valid Wikipedia entry after manual review.
Creates a locked, empty Wikipedia enrichment entry.
"""
company = db.query(Company).filter(Company.id == company_id).first()
if not company:
raise HTTPException(404, "Company not found")
logger.info(f"Manual override for {company.name}: Marking Wikipedia as verified empty.")
existing_wiki = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "wikipedia"
).first()
empty_wiki_data = {"url": "k.A.", "title": "k.A.", "first_paragraph": "k.A.", "error": "Manually marked as empty"}
if not existing_wiki:
db.add(EnrichmentData(
company_id=company.id,
source_type="wikipedia",
content=empty_wiki_data,
is_locked=True,
wiki_verified_empty=True
))
else:
existing_wiki.content = empty_wiki_data
existing_wiki.updated_at = datetime.utcnow()
existing_wiki.is_locked = True # LOCK IT
existing_wiki.wiki_verified_empty = True # Mark as empty
db.commit()
return {"status": "updated", "wiki_verified_empty": True}
@app.post("/api/companies/{company_id}/override/website")
def override_website_url(company_id: int, url: str = Query(...), db: Session = Depends(get_db)):
"""
@@ -305,6 +352,17 @@ def override_impressum_url(company_id: int, url: str = Query(...), db: Session =
if not impressum_data:
raise HTTPException(status_code=400, detail="Failed to extract data from provided URL")
# Update company record with city/country if found
logger.info(f"override_impressum_url: Scraped impressum_data for {company.name}: City={impressum_data.get('city')}, Country_code={impressum_data.get('country_code')}")
if city_val := impressum_data.get("city"):
logger.info(f"override_impressum_url: Updating company.city from '{company.city}' to '{city_val}'")
company.city = city_val
if country_val := impressum_data.get("country_code"):
logger.info(f"override_impressum_url: Updating company.country from '{company.country}' to '{country_val}'")
company.country = country_val
logger.info(f"override_impressum_url: Company object after updates (before commit): City='{company.city}', Country='{company.country}'")
# 2. Find existing scrape data or create new
existing_scrape = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
@@ -312,20 +370,23 @@ def override_impressum_url(company_id: int, url: str = Query(...), db: Session =
).first()
if not existing_scrape:
# Create minimal scrape entry
# Create minimal scrape entry and lock it
db.add(EnrichmentData(
company_id=company.id,
source_type="website_scrape",
content={"impressum": impressum_data, "text": "", "title": "Manual Impressum", "url": url}
content={"impressum": impressum_data, "text": "", "title": "Manual Impressum", "url": url},
is_locked=True
))
else:
# Update existing
# Update existing and lock it
content = dict(existing_scrape.content) if existing_scrape.content else {}
content["impressum"] = impressum_data
existing_scrape.content = content
existing_scrape.updated_at = datetime.utcnow()
existing_scrape.is_locked = True
db.commit()
logger.info(f"override_impressum_url: Commit successful. Company ID {company.id} updated.")
return {"status": "updated", "data": impressum_data}
# --- Contact Routes ---
@@ -465,6 +526,7 @@ def list_all_contacts(
skip: int = 0,
limit: int = 50,
search: Optional[str] = None,
sort_by: Optional[str] = Query("name_asc"),
db: Session = Depends(get_db)
):
"""
@@ -482,8 +544,16 @@ def list_all_contacts(
)
total = query.count()
# Sort by ID desc
contacts = query.order_by(Contact.id.desc()).offset(skip).limit(limit).all()
# Sorting Logic
if sort_by == "updated_desc":
query = query.order_by(Contact.updated_at.desc())
elif sort_by == "created_desc":
query = query.order_by(Contact.id.desc())
else: # Default: name_asc
query = query.order_by(Contact.last_name.asc(), Contact.first_name.asc())
contacts = query.offset(skip).limit(limit).all()
# Enrich with Company Name for the frontend list
result = []
@@ -552,6 +622,23 @@ def bulk_import_contacts(req: BulkContactImportRequest, db: Session = Depends(ge
db.commit()
return stats
@app.post("/api/enrichment/{company_id}/{source_type}/lock")
def lock_enrichment(company_id: int, source_type: str, locked: bool = Query(...), db: Session = Depends(get_db)):
"""
Toggles the lock status of a specific enrichment data type (e.g. 'website_scrape', 'wikipedia').
"""
entry = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company_id,
EnrichmentData.source_type == source_type
).first()
if not entry:
raise HTTPException(404, "Enrichment data not found")
entry.is_locked = locked
db.commit()
return {"status": "updated", "is_locked": locked}
def run_discovery_task(company_id: int):
# New Session for Background Task
from .database import SessionLocal
@@ -616,15 +703,11 @@ def analyze_company(req: AnalysisRequest, background_tasks: BackgroundTasks, db:
return {"error": "No website to analyze. Run Discovery first."}
# FORCE SCRAPE LOGIC
# If explicit force_scrape is requested OR if we want to ensure fresh data for debugging
# We delete the old scrape data.
# For now, let's assume every manual "Analyze" click implies a desire for fresh results if previous failed.
# But let's respect the flag from frontend if we add it later.
# Always clearing scrape data for now to fix the "stuck cache" issue reported by user
# Respect Locked Data: Only delete if not locked.
db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape"
EnrichmentData.source_type == "website_scrape",
EnrichmentData.is_locked == False
).delete()
db.commit()
@@ -640,29 +723,97 @@ def run_analysis_task(company_id: int, url: str):
logger.info(f"Running Analysis Task for {company.name}")
# 1. Scrape Website
scrape_result = scraper.scrape_url(url)
# Save Scrape Data
existing_scrape_data = db.query(EnrichmentData).filter(
# 1. Scrape Website OR Use Locked Data
scrape_result = {}
existing_scrape = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "website_scrape"
).first()
if "text" in scrape_result and scrape_result["text"]:
if not existing_scrape_data:
db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_result))
else:
existing_scrape_data.content = scrape_result
existing_scrape_data.updated_at = datetime.utcnow()
elif "error" in scrape_result:
logger.warning(f"Scraping failed for {company.name}: {scrape_result['error']}")
if existing_scrape and existing_scrape.is_locked:
logger.info(f"Using LOCKED scrape data for {company.name}")
scrape_result = dict(existing_scrape.content) # Copy dict
# Always ensure city/country from locked impressum data is synced to company
if "impressum" in scrape_result and scrape_result["impressum"]:
impressum_city = scrape_result["impressum"].get("city")
impressum_country = scrape_result["impressum"].get("country_code")
logger.info(f"Analysis task (locked data): Impressum found. City='{impressum_city}', Country='{impressum_country}'")
if impressum_city and company.city != impressum_city:
logger.info(f"Analysis task: Updating company.city from '{company.city}' to '{impressum_city}'")
company.city = impressum_city
if impressum_country and company.country != impressum_country:
logger.info(f"Analysis task: Updating company.country from '{company.country}' to '{impressum_country}'")
company.country = impressum_country
text_val = scrape_result.get("text")
text_len = len(text_val) if text_val else 0
logger.info(f"Locked data keys: {list(scrape_result.keys())}, Text length: {text_len}")
# AUTO-FIX: If locked data (e.g. Manual Impressum) has no text, fetch main website text
if text_len < 100:
logger.info(f"Locked data missing text (len={text_len}). Fetching content from {url}...")
try:
fresh_scrape = scraper.scrape_url(url)
except Exception as e:
logger.error(f"Fresh scrape failed: {e}", exc_info=True)
fresh_scrape = {}
logger.info(f"Fresh scrape result keys: {list(fresh_scrape.keys())}")
if "text" in fresh_scrape and len(fresh_scrape["text"]) > 100:
logger.info(f"Fresh scrape successful. Text len: {len(fresh_scrape['text'])}")
# Update local dict for current processing
scrape_result["text"] = fresh_scrape["text"]
scrape_result["title"] = fresh_scrape.get("title", "")
# Update DB (Merge into existing content)
updated_content = dict(existing_scrape.content)
updated_content["text"] = fresh_scrape["text"]
updated_content["title"] = fresh_scrape.get("title", "")
existing_scrape.content = updated_content
existing_scrape.updated_at = datetime.utcnow()
# db.commit() here would be too early
logger.info("Updated locked record with fresh website text in session.")
else:
logger.warning(f"Fresh scrape returned insufficient text. Error: {fresh_scrape.get('error')}")
else:
# Standard Scrape
scrape_result = scraper.scrape_url(url)
# Update company fields from impressum if found during scrape
if "impressum" in scrape_result and scrape_result["impressum"]:
impressum_city = scrape_result["impressum"].get("city")
impressum_country = scrape_result["impressum"].get("country_code")
logger.info(f"Analysis task (standard scrape): Impressum found. City='{impressum_city}', Country='{impressum_country}'")
if impressum_city and company.city != impressum_city:
logger.info(f"Analysis task: Updating company.city from '{company.city}' to '{impressum_city}'")
company.city = impressum_city
if impressum_country and company.country != impressum_country:
logger.info(f"Analysis task: Updating company.country from '{company.country}' to '{impressum_country}'")
company.country = impressum_country
# Save Scrape Data
if "text" in scrape_result and scrape_result["text"]:
if not existing_scrape:
db.add(EnrichmentData(company_id=company.id, source_type="website_scrape", content=scrape_result))
else:
existing_scrape.content = scrape_result
existing_scrape.updated_at = datetime.utcnow()
elif "error" in scrape_result:
logger.warning(f"Scraping failed for {company.name}: {scrape_result['error']}")
# 2. Classify Robotics Potential
if "text" in scrape_result and scrape_result["text"]:
text_content = scrape_result.get("text")
logger.info(f"Preparing classification. Text content length: {len(text_content) if text_content else 0}")
if text_content and len(text_content) > 100:
logger.info(f"Starting classification for {company.name}...")
analysis = classifier.analyze_robotics_potential(
company_name=company.name,
website_text=scrape_result["text"]
website_text=text_content
)
if "error" in analysis:
@@ -672,10 +823,8 @@ def run_analysis_task(company_id: int, url: str):
if industry:
company.industry_ai = industry
# Delete old signals
db.query(Signal).filter(Signal.company_id == company.id).delete()
# Save new signals
potentials = analysis.get("potentials", {})
for signal_type, data in potentials.items():
new_signal = Signal(
@@ -687,7 +836,6 @@ def run_analysis_task(company_id: int, url: str):
)
db.add(new_signal)
# Save Full Analysis Blob (Business Model + Evidence)
existing_analysis = db.query(EnrichmentData).filter(
EnrichmentData.company_id == company.id,
EnrichmentData.source_type == "ai_analysis"
@@ -702,6 +850,8 @@ def run_analysis_task(company_id: int, url: str):
company.status = "ENRICHED"
company.last_classification_at = datetime.utcnow()
logger.info(f"Robotics analysis complete for {company.name}.")
else:
logger.warning(f"Skipping classification for {company.name}: Insufficient text content (len={len(text_content) if text_content else 0})")
db.commit()
logger.info(f"Analysis finished for {company.id}")