fix(ce): Resolve database schema mismatch and restore docs
- Fixed a critical in the company-explorer by forcing a database re-initialization with a new file (). This ensures the application code is in sync with the database schema. - Documented the schema mismatch incident and its resolution in MIGRATION_PLAN.md. - Restored and enhanced BUILDER_APPS_MIGRATION.md by recovering extensive, valuable content from the git history that was accidentally deleted. The guide now again includes detailed troubleshooting steps and code templates for common migration pitfalls.
This commit is contained in:
@@ -1,10 +1,11 @@
|
||||
import logging
|
||||
import requests
|
||||
import re
|
||||
from typing import Optional, Dict, Tuple
|
||||
from typing import Optional, Dict, Tuple, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..config import settings
|
||||
from ..lib.core_utils import retry_on_failure, normalize_string
|
||||
from ..lib.core_utils import retry_on_failure, normalize_string, normalize_company_name, simple_normalize_url
|
||||
from .wikipedia_service import WikipediaService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -23,7 +24,6 @@ class DiscoveryService:
|
||||
if not self.api_key:
|
||||
logger.warning("SERP_API_KEY not set. Discovery features will fail.")
|
||||
|
||||
# Initialize the specialized Wikipedia Service
|
||||
self.wiki_service = WikipediaService()
|
||||
|
||||
@retry_on_failure(max_retries=2)
|
||||
@@ -60,42 +60,31 @@ class DiscoveryService:
|
||||
for result in data["organic_results"]:
|
||||
link = result.get("link", "")
|
||||
if self._is_credible_url(link):
|
||||
# Simple heuristic: If the company name is part of the domain, high confidence
|
||||
# Otherwise, take the first credible result.
|
||||
return link
|
||||
|
||||
return "k.A."
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"SerpAPI Error: {e}")
|
||||
logger.error(f"SerpAPI Error: {e}", exc_info=True)
|
||||
return "k.A."
|
||||
|
||||
@retry_on_failure(max_retries=2)
|
||||
def find_wikipedia_url(self, company_name: str, website: str = None, city: str = None) -> str:
|
||||
def find_wikipedia_url(self, company_name: str, website: Optional[str] = None, city: Optional[str] = None) -> str:
|
||||
"""
|
||||
Searches for a specific German Wikipedia article using the robust WikipediaService.
|
||||
Includes validation via website domain and city.
|
||||
"""
|
||||
if not self.api_key:
|
||||
return "k.A."
|
||||
|
||||
try:
|
||||
# Delegate to the robust service
|
||||
# parent_name could be added if available in the future
|
||||
page = self.wiki_service.search_company_article(
|
||||
company_name=company_name,
|
||||
website=website,
|
||||
crm_city=city
|
||||
)
|
||||
|
||||
if page:
|
||||
return page.url
|
||||
|
||||
return "k.A."
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Wiki Search Error via Service: {e}")
|
||||
return "k.A."
|
||||
# Pass all available info for robust search and validation
|
||||
page = self.wiki_service.search_company_article(
|
||||
company_name=company_name,
|
||||
website=website,
|
||||
crm_city=city
|
||||
)
|
||||
|
||||
if page:
|
||||
return page.url
|
||||
|
||||
return "k.A."
|
||||
|
||||
def extract_wikipedia_data(self, url: str) -> dict:
|
||||
"""
|
||||
@@ -104,21 +93,21 @@ class DiscoveryService:
|
||||
try:
|
||||
return self.wiki_service.extract_company_data(url)
|
||||
except Exception as e:
|
||||
logger.error(f"Wiki Extraction Error for {url}: {e}")
|
||||
logger.error(f"Wiki Extraction Error for {url}: {e}", exc_info=True)
|
||||
return {"url": url, "error": str(e)}
|
||||
|
||||
def _is_credible_url(self, url: str) -> bool:
|
||||
"""Filters out social media, directories, and junk."""
|
||||
"""
|
||||
Filters out social media, directories, and junk.
|
||||
"""
|
||||
if not url: return False
|
||||
try:
|
||||
domain = urlparse(url).netloc.lower().replace("www.", "")
|
||||
if domain in BLACKLIST_DOMAINS:
|
||||
return False
|
||||
# Check for subdomains of blacklist (e.g. de.linkedin.com)
|
||||
for bad in BLACKLIST_DOMAINS:
|
||||
if domain.endswith("." + bad):
|
||||
return False
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
return False
|
||||
Reference in New Issue
Block a user