Files
Brancheneinstufung2/company-explorer/backend/tests/test_matching_logic.py
Floke 3fd3c5acfa [31f88f42] Keine neuen Commits in dieser Session.
Keine neuen Commits in dieser Session.
2026-03-10 13:54:07 +00:00

45 lines
1.5 KiB
Python

import sys
import os
import logging
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
# Add backend to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from database import Company
from services.deduplication import Deduplicator
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Mock DB or use live DB (safely)
# The config uses /data/companies_v3_fixed_2.db in Docker, but locally it's in the root.
DB_PATH = "../../companies_v3_fixed_2.db"
engine = create_engine(f"sqlite:///{DB_PATH}")
Session = sessionmaker(bind=engine)
db = Session()
def test_matching():
dedup = Deduplicator(db)
test_cases = [
{"name": "Wolfra", "website": "wolfra.de", "city": "Erding"},
{"name": "Wolfra Kelterei", "website": "wolfra.de", "city": "Erding"},
{"name": "Wolfra Fruchtsaft GmbH", "website": "https://www.wolfra.de/", "city": "Erding"},
{"name": "Müller GmbH", "city": "München"}, # Broad search
{"name": "NonExistentCompany", "city": "Berlin"}
]
for case in test_cases:
print(f"\n--- Matching Query: {case['name']} ({case.get('website', 'no-url')}) ---")
results = dedup.find_duplicates(case)
if results:
for i, res in enumerate(results[:3]):
print(f" [{i+1}] Match: {res['name']} (Score: {res['score']}) | CRM ID: {res['crm_id']}")
else:
print(" No matches found.")
if __name__ == "__main__":
test_matching()