This commit introduces the foundational elements for the new "Company Explorer" web application, marking a significant step away from the legacy Google Sheets / CLI system. Key changes include: - Project Structure: A new directory with separate (FastAPI) and (React/Vite) components. - Data Persistence: Migration from Google Sheets to a local SQLite database () using SQLAlchemy. - Core Utilities: Extraction and cleanup of essential helper functions (LLM wrappers, text utilities) into . - Backend Services: , , for AI-powered analysis, and logic. - Frontend UI: Basic React application with company table, import wizard, and dynamic inspector sidebar. - Docker Integration: Updated and for multi-stage builds and sideloading. - Deployment & Access: Integrated into central Nginx proxy and dashboard, accessible via . Lessons Learned & Fixed during development: - Frontend Asset Loading: Addressed issues with Vite's path and FastAPI's . - TypeScript Configuration: Added and . - Database Schema Evolution: Solved errors by forcing a new database file and correcting override. - Logging: Implemented robust file-based logging (). This new foundation provides a powerful and maintainable platform for future B2B robotics lead generation.
92 lines
2.8 KiB
Python
92 lines
2.8 KiB
Python
import sys
|
|
import os
|
|
import logging
|
|
from sqlalchemy.orm import Session
|
|
|
|
# Add paths to access legacy and new modules
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))) # Root for legacy
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))) # Company Explorer Root
|
|
|
|
# Legacy Import
|
|
try:
|
|
from _legacy_gsheets_system.google_sheet_handler import GoogleSheetHandler
|
|
from _legacy_gsheets_system.config import Config as LegacyConfig
|
|
except ImportError as e:
|
|
print(f"Failed to import legacy modules: {e}")
|
|
sys.exit(1)
|
|
|
|
# New DB
|
|
from backend.database import SessionLocal, Company, init_db
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("LegacyImporter")
|
|
|
|
def migrate():
|
|
logger.info("Starting migration from Google Sheets...")
|
|
|
|
# 1. Connect to GSheets
|
|
LegacyConfig.load_api_keys() # Ensure keys are loaded
|
|
try:
|
|
handler = GoogleSheetHandler()
|
|
df = handler.get_sheet_as_dataframe("CRM_Accounts") # Assuming standard sheet name
|
|
except Exception as e:
|
|
logger.error(f"GSheet Connection failed: {e}")
|
|
return
|
|
|
|
if df is None or df.empty:
|
|
logger.warning("No data found in sheet.")
|
|
return
|
|
|
|
logger.info(f"Found {len(df)} rows. Transforming...")
|
|
|
|
# 2. Connect to New DB
|
|
init_db() # Ensure tables exist
|
|
db = SessionLocal()
|
|
|
|
count = 0
|
|
skipped = 0
|
|
|
|
try:
|
|
for _, row in df.iterrows():
|
|
name = str(row.get('CRM Name', '')).strip()
|
|
if not name or name.lower() in ['nan', 'none', '']:
|
|
continue
|
|
|
|
# Check duplicate (simple check by name for migration)
|
|
exists = db.query(Company).filter(Company.name == name).first()
|
|
if exists:
|
|
skipped += 1
|
|
continue
|
|
|
|
# Create Company
|
|
comp = Company(
|
|
name=name,
|
|
website=str(row.get('CRM Website', '')).strip() or None,
|
|
crm_id=str(row.get('CRM ID', '')).strip() or None,
|
|
city=str(row.get('CRM Ort', '')).strip() or None,
|
|
country=str(row.get('CRM Land', 'DE')).strip(),
|
|
status="IMPORTED" # Mark as imported so we know to enrich them
|
|
)
|
|
|
|
# Map old industry if useful, otherwise leave blank for re-classification
|
|
# comp.industry_ai = str(row.get('Chat Vorschlag Branche', ''))
|
|
|
|
db.add(comp)
|
|
count += 1
|
|
|
|
if count % 100 == 0:
|
|
logger.info(f"Committed {count}...")
|
|
db.commit()
|
|
|
|
db.commit()
|
|
logger.info(f"Migration finished. Imported: {count}, Skipped: {skipped}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Migration error: {e}")
|
|
db.rollback()
|
|
finally:
|
|
db.close()
|
|
|
|
if __name__ == "__main__":
|
|
migrate()
|