import sys import os import logging from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker # Add backend to path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from database import Company from services.deduplication import Deduplicator logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Mock DB or use live DB (safely) # The config uses /data/companies_v3_fixed_2.db in Docker, but locally it's in the root. DB_PATH = "../../companies_v3_fixed_2.db" engine = create_engine(f"sqlite:///{DB_PATH}") Session = sessionmaker(bind=engine) db = Session() def test_matching(): dedup = Deduplicator(db) test_cases = [ {"name": "Wolfra", "website": "wolfra.de", "city": "Erding"}, {"name": "Wolfra Kelterei", "website": "wolfra.de", "city": "Erding"}, {"name": "Wolfra Fruchtsaft GmbH", "website": "https://www.wolfra.de/", "city": "Erding"}, {"name": "Müller GmbH", "city": "München"}, # Broad search {"name": "NonExistentCompany", "city": "Berlin"} ] for case in test_cases: print(f"\n--- Matching Query: {case['name']} ({case.get('website', 'no-url')}) ---") results = dedup.find_duplicates(case) if results: for i, res in enumerate(results[:3]): print(f" [{i+1}] Match: {res['name']} (Score: {res['score']}) | CRM ID: {res['crm_id']}") else: print(" No matches found.") if __name__ == "__main__": test_matching()