45 lines
1.5 KiB
Python
45 lines
1.5 KiB
Python
import sys
|
|
import os
|
|
import logging
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
# Add backend to path
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from database import Company
|
|
from services.deduplication import Deduplicator
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Mock DB or use live DB (safely)
|
|
# The config uses /data/companies_v3_fixed_2.db in Docker, but locally it's in the root.
|
|
DB_PATH = "../../companies_v3_fixed_2.db"
|
|
engine = create_engine(f"sqlite:///{DB_PATH}")
|
|
Session = sessionmaker(bind=engine)
|
|
db = Session()
|
|
|
|
def test_matching():
|
|
dedup = Deduplicator(db)
|
|
|
|
test_cases = [
|
|
{"name": "Wolfra", "website": "wolfra.de", "city": "Erding"},
|
|
{"name": "Wolfra Kelterei", "website": "wolfra.de", "city": "Erding"},
|
|
{"name": "Wolfra Fruchtsaft GmbH", "website": "https://www.wolfra.de/", "city": "Erding"},
|
|
{"name": "Müller GmbH", "city": "München"}, # Broad search
|
|
{"name": "NonExistentCompany", "city": "Berlin"}
|
|
]
|
|
|
|
for case in test_cases:
|
|
print(f"\n--- Matching Query: {case['name']} ({case.get('website', 'no-url')}) ---")
|
|
results = dedup.find_duplicates(case)
|
|
if results:
|
|
for i, res in enumerate(results[:3]):
|
|
print(f" [{i+1}] Match: {res['name']} (Score: {res['score']}) | CRM ID: {res['crm_id']}")
|
|
else:
|
|
print(" No matches found.")
|
|
|
|
if __name__ == "__main__":
|
|
test_matching()
|