[31388f42] Fix NameError in reply generator and implement free-mail quality detection
This commit is contained in:
@@ -70,6 +70,18 @@ def fetch_tradingtwins_emails(token, limit=200):
|
||||
filtered = [m for m in all_msgs if "Neue Anfrage zum Thema Roboter" in (m.get('subject') or '')]
|
||||
return filtered
|
||||
|
||||
def is_free_mail(email_addr):
|
||||
"""Checks if an email belongs to a known free-mail provider."""
|
||||
if not email_addr: return False
|
||||
free_domains = {
|
||||
'gmail.com', 'googlemail.com', 'outlook.com', 'hotmail.com', 'live.com',
|
||||
'msn.com', 'icloud.com', 'me.com', 'mac.com', 'yahoo.com', 'ymail.com',
|
||||
'rocketmail.com', 'gmx.de', 'gmx.net', 'web.de', 't-online.de',
|
||||
'freenet.de', 'mail.com', 'protonmail.com', 'proton.me', 'online.de'
|
||||
}
|
||||
domain = email_addr.split('@')[-1].lower()
|
||||
return domain in free_domains
|
||||
|
||||
def parse_tradingtwins_html(html_body):
|
||||
"""
|
||||
Extracts data from the Tradingtwins HTML table structure.
|
||||
@@ -80,15 +92,15 @@ def parse_tradingtwins_html(html_body):
|
||||
# Map label names in HTML to our keys
|
||||
field_map = {
|
||||
'Firma': 'company',
|
||||
'Vorname': 'contact_first', # Key fixed to match ingest.py logic
|
||||
'Nachname': 'contact_last', # Key fixed to match ingest.py logic
|
||||
'Vorname': 'contact_first',
|
||||
'Nachname': 'contact_last',
|
||||
'E-Mail': 'email',
|
||||
'Rufnummer': 'phone',
|
||||
'Einsatzzweck': 'purpose', # Specific field
|
||||
'Reinigungs-Fläche': 'area', # Specific field
|
||||
'Einsatzzweck': 'purpose',
|
||||
'Reinigungs-Fläche': 'area',
|
||||
'PLZ': 'zip',
|
||||
'Stadt': 'city',
|
||||
'Lead-ID': 'source_id' # Mapped to DB column source_id
|
||||
'Lead-ID': 'source_id'
|
||||
}
|
||||
|
||||
for label, key in field_map.items():
|
||||
@@ -103,6 +115,13 @@ def parse_tradingtwins_html(html_body):
|
||||
if data.get('contact_first') and data.get('contact_last'):
|
||||
data['contact'] = f"{data['contact_first']} {data['contact_last']}"
|
||||
|
||||
# Quality Check: Free mail or missing company
|
||||
email = data.get('email', '')
|
||||
company = data.get('company', '-')
|
||||
|
||||
data['is_free_mail'] = is_free_mail(email)
|
||||
data['is_low_quality'] = data['is_free_mail'] or company == '-' or not company
|
||||
|
||||
# Ensure source_id is present and map to 'id' for db.py compatibility
|
||||
if not data.get('source_id'):
|
||||
data['source_id'] = f"tt_unknown_{int(datetime.now().timestamp())}"
|
||||
@@ -111,7 +130,7 @@ def parse_tradingtwins_html(html_body):
|
||||
|
||||
return data
|
||||
|
||||
def process_leads(auto_sync=True):
|
||||
def process_leads(auto_sync=False):
|
||||
init_db()
|
||||
new_count = 0
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user