diff --git a/.dev_session/SESSION_INFO b/.dev_session/SESSION_INFO index b16559b8..321a1b9a 100644 --- a/.dev_session/SESSION_INFO +++ b/.dev_session/SESSION_INFO @@ -1 +1 @@ -{"task_id": "30988f42-8544-817a-a250-fddb7d72b4c6", "token": "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", "session_start_time": "2026-02-16T11:58:09.261049"} \ No newline at end of file +{"task_id": "30a88f42-8544-819a-b5fc-c85cd80f43b7", "token": "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", "session_start_time": "2026-02-17T07:16:11.126812"} \ No newline at end of file diff --git a/KONVER_STRATEGY.md b/KONVER_STRATEGY.md new file mode 100644 index 00000000..1670c101 --- /dev/null +++ b/KONVER_STRATEGY.md @@ -0,0 +1,81 @@ +# Konver.ai Integration: Strategie & Architektur + +**Status:** Vertrag unterzeichnet (Fokus: Telefon-Enrichment). +**Risiko:** Wegfall von Dealfront (Lead Gen) ohne adäquaten, automatisierten Ersatz. +**Ziel:** Nutzung von Konver.ai nicht nur als manuelles "Telefonbuch", sondern als **skalierbare Quelle** für die Lead-Fabrik (Company Explorer). + +## 1. Das Zielszenario (The "Golden Flow") + +Wir integrieren Konver.ai via API direkt in den Company Explorer. Der CE fungiert als Gatekeeper, um Credits zu sparen und Dubletten zu verhindern. + +```mermaid +flowchart TD + subgraph "RoboPlanet Ecosystem" + Notion[("Notion Strategy\n(Verticals/Pains)")] + SO[("SuperOffice CRM\n(Bestand)")] + CE["Company Explorer\n(The Brain)"] + end + + subgraph "External Sources" + Konver["Konver.ai API"] + Web["Web / Google / Wiki"] + end + + %% Data Flow + Notion -->|1. Sync Strategy| CE + SO -->|2. Import Existing (Blocklist)| CE + + CE -->|3. Search Query + Exclusion List| Konver + Note right of Konver: "Suche: Altenheime > 10 Mio\nExclude: Domain-Liste aus SO" + + Konver -->|4. Net New Candidates| CE + + CE -->|5. Deep Dive (Robotik-Check)| Web + + CE -->|6. Enrich Contact (Phone/Mail)| Konver + Note right of CE: "Nur für Firmen mit\nhohem Robotik-Score!" + + CE -->|7. Export Qualified Lead| SO +``` + +## 2. Die kritische Lücke: "Exclusion List" + +Da Dealfront (unser bisheriges "Fischnetz") abgeschaltet wird, müssen wir Konver zur **Neukunden-Generierung** nutzen. +Ohne eine **Ausschluss-Liste (Exclusion List)** bei der Suche verbrennen wir Geld und Zeit: + +1. **Kosten:** Wir zahlen Credits für Firmen/Kontakte, die wir schon haben. +2. **Daten-Hygiene:** Wir importieren Dubletten, die wir mühsam bereinigen müssen. +3. **Blindflug:** Wir wissen vor dem Kauf nicht, ob der Datensatz "netto neu" ist. + +### Forderung an Konver (Technisches Onboarding) + +*"Um Konver.ai als strategischen Nachfolger für Dealfront in unserer Marketing-Automation nutzen zu können, benötigen wir zwingend API-Funktionen zur **Deduplizierung VOR dem Datenkauf**."* + +**Konkrete Features:** +* **Domain-Exclusion:** Upload einer Liste (z.B. 5.000 Domains), die in der API-Suche *nicht* zurückgegeben werden. +* **Contact-Check:** Prüfung (z.B. Hash-Abgleich), ob eine E-Mail-Adresse bereits "bekannt" ist, bevor Kontaktdaten enthüllt (und berechnet) werden. + +## 3. Workflow-Varianten + +### A. Der "Smart Enricher" (Wirtschaftlich) +Wir nutzen Konver nur für Firmen, die **wirklich** relevant sind. + +1. **Scraping:** Company Explorer findet 100 Altenheime (Web-Suche). +2. **Filterung:** KI prüft Websites -> 40 davon sind relevant (haben große Flächen). +3. **Enrichment:** Nur für diese 40 fragen wir Konver via API: *"Gib mir den Facility Manager + Handy"*. +4. **Ergebnis:** Wir zahlen 40 Credits statt 100. Hohe Effizienz. + +### B. Der "Mass Loader" (Teuer & Dumm - zu vermeiden) +1. Wir laden "Alle Altenheime" aus Konver direkt nach SuperOffice. +2. Wir zahlen 100 Credits. +3. Der Vertrieb ruft an -> 60 davon sind ungeeignet (zu klein, kein Bedarf). +4. **Ergebnis:** 60 Credits verbrannt, Vertrieb frustriert. + +## 4. Fazit & Next Steps + +Wir müssen im Onboarding-Gespräch klären: +1. **API-Doku:** Wo ist die Dokumentation für `Search` und `Enrich` Endpoints? +2. **Exclusion:** Wie filtern wir Bestandskunden im API-Call? +3. **Bulk-Enrichment:** Können wir Listen (Domains) zum Anreichern hochladen? + +Ohne diese Features ist Konver ein Rückschritt in die manuelle Einzelbearbeitung. diff --git a/b2b-marketing-assistant/App.tsx b/b2b-marketing-assistant/App.tsx index 1b2b7551..c37f0e89 100644 --- a/b2b-marketing-assistant/App.tsx +++ b/b2b-marketing-assistant/App.tsx @@ -57,6 +57,8 @@ const App: React.FC = () => { const [generationStep, setGenerationStep] = useState(0); // 0: idle, 1-6: step X is complete const [selectedIndustry, setSelectedIndustry] = useState(''); const [batchStatus, setBatchStatus] = useState<{ current: number; total: number; industry: string } | null>(null); + const [isEnriching, setIsEnriching] = useState(false); + // Project Persistence const [projectId, setProjectId] = useState(null); @@ -69,6 +71,43 @@ const App: React.FC = () => { const STEP_TITLES = t.stepTitles; const STEP_KEYS: (keyof AnalysisData)[] = ['offer', 'targetGroups', 'personas', 'painPoints', 'gains', 'messages', 'customerJourney']; + const handleEnrichRow = async (productName: string, productUrl?: string) => { + setIsEnriching(true); + setError(null); + try { + const response = await fetch(`${API_BASE_URL}/enrich-product`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + productName, + productUrl, + language: inputData.language + }), + }); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.details || `HTTP error! status: ${response.status}`); + } + const newRow = await response.json(); + setAnalysisData(prev => { + const currentOffer = prev.offer || { headers: [], rows: [], summary: [] }; + return { + ...prev, + offer: { + ...currentOffer, + rows: [...currentOffer.rows, newRow] + } + }; + }); + } catch (e) { + console.error(e); + setError(e instanceof Error ? `Fehler beim Anreichern: ${e.message}` : 'Unbekannter Fehler beim Anreichern.'); + } finally { + setIsEnriching(false); + } + }; + + // --- AUTO-SAVE EFFECT --- useEffect(() => { if (generationStep === 0 || !inputData.companyUrl) return; @@ -507,9 +546,10 @@ const App: React.FC = () => { const canAdd = ['offer', 'targetGroups'].includes(stepKey); const canDelete = ['offer', 'targetGroups', 'personas'].includes(stepKey); - const handleManualAdd = (newRow: string[]) => { + const handleManualAdd = () => { + const newEmptyRow = Array(step.headers.length).fill(''); const currentRows = step.rows || []; - handleDataChange(stepKey, { ...step, rows: [...currentRows, newRow] }); + handleDataChange(stepKey, { ...step, rows: [...currentRows, newEmptyRow] }); }; return ( @@ -521,8 +561,8 @@ const App: React.FC = () => { rows={step.rows} onDataChange={(newRows) => handleDataChange(stepKey, { ...step, rows: newRows })} canAddRows={canAdd} - onEnrichRow={canAdd ? handleManualAdd : undefined} - isEnriching={false} + onEnrichRow={stepKey === 'offer' ? handleEnrichRow : handleManualAdd} + isEnriching={isEnriching} canDeleteRows={canDelete} onRestart={() => handleStepRestart(stepKey)} t={t} diff --git a/b2b-marketing-assistant/README.md b/b2b-marketing-assistant/README.md index 4cc79dc2..2ba133e2 100644 --- a/b2b-marketing-assistant/README.md +++ b/b2b-marketing-assistant/README.md @@ -15,6 +15,6 @@ View your app in AI Studio: https://ai.studio/apps/drive/1ZPnGbhaEnyhIyqs2rYhcPX 1. Install dependencies: `npm install` -2. Set the `GEMINI_API_KEY` in [.env.local](.env.local) to your Gemini API key +2. Set the `GEMINI_API_KEY` in the central `.env` file in the project's root directory. 3. Run the app: `npm run dev` diff --git a/b2b-marketing-assistant/components/StepDisplay.tsx b/b2b-marketing-assistant/components/StepDisplay.tsx index ce86baa6..df16e006 100644 --- a/b2b-marketing-assistant/components/StepDisplay.tsx +++ b/b2b-marketing-assistant/components/StepDisplay.tsx @@ -12,7 +12,7 @@ interface StepDisplayProps { onDataChange: (newRows: string[][]) => void; canAddRows?: boolean; canDeleteRows?: boolean; - onEnrichRow?: (productName: string, productUrl?: string) => Promise; + onEnrichRow?: (productName: string, productUrl?: string) => void; isEnriching?: boolean; onRestart?: () => void; t: typeof translations.de; @@ -106,12 +106,7 @@ export const StepDisplay: React.FC = ({ title, summary, header }; const handleAddRowClick = () => { - if (onEnrichRow) { - setIsAddingRow(true); - } else { - const newEmptyRow = Array(headers.length).fill(''); - onDataChange([...rows, newEmptyRow]); - } + setIsAddingRow(true); }; const handleConfirmAddRow = () => { diff --git a/b2b-marketing-assistant/server.cjs b/b2b-marketing-assistant/server.cjs index 5e246932..aefdf866 100644 --- a/b2b-marketing-assistant/server.cjs +++ b/b2b-marketing-assistant/server.cjs @@ -89,6 +89,15 @@ router.post('/next-step', (req, res) => { } catch (e) { res.status(500).json({ error: e.message }); } }); +router.post('/enrich-product', (req, res) => { + const { productName, productUrl, language } = req.body; + const args = [SCRIPT_PATH, '--mode', 'enrich_product', '--product_name', productName, '--language', language]; + if (productUrl) { + args.push('--product_url', productUrl); + } + runPythonScript(args, res); +}); + router.get('/projects', (req, res) => runPythonScript([dbScript, 'list'], res)); router.get('/projects/:id', (req, res) => runPythonScript([dbScript, 'load', req.params.id], res)); router.delete('/projects/:id', (req, res) => runPythonScript([dbScript, 'delete', req.params.id], res)); diff --git a/b2b-marketing-assistant/vite.config.ts b/b2b-marketing-assistant/vite.config.ts index 508c3760..1631dc5c 100644 --- a/b2b-marketing-assistant/vite.config.ts +++ b/b2b-marketing-assistant/vite.config.ts @@ -3,7 +3,7 @@ import { defineConfig, loadEnv } from 'vite'; import react from '@vitejs/plugin-react'; export default defineConfig(({ mode }) => { - const env = loadEnv(mode, '.', ''); + const env = loadEnv(mode, '../', ''); return { base: '/b2b/', server: { diff --git a/b2b_marketing_orchestrator.py b/b2b_marketing_orchestrator.py index 8641574b..450532b1 100644 --- a/b2b_marketing_orchestrator.py +++ b/b2b_marketing_orchestrator.py @@ -622,6 +622,40 @@ def next_step(language, context_file, generation_step, channels, focus_industry= summary = [re.sub(r'^\*\s*|^-\s*|^\d+\.\s*', '', s.strip()) for s in summary_match[1].split('\n') if s.strip()] if summary_match else [] return {step_key: {"summary": summary, "headers": table_data['headers'], "rows": table_data['rows']}} +def enrich_product(product_name, product_url, language): + logging.info(f"Enriching product: {product_name} ({product_url})") + api_key = load_api_key() + if not api_key: raise ValueError("Gemini API key is missing.") + + grounding_text = "" + if product_url: + grounding_text = get_text_from_url(product_url) + + prompt_text = f""" +# ANWEISUNG +Du bist ein B2B-Marketing-Analyst. Deine Aufgabe ist es, die Daten für EIN Produkt zu generieren. +Basierend auf dem Produktnamen und (optional) dem Inhalt der Produkt-URL, fülle die Spalten einer Markdown-Tabelle aus. +Die Ausgabe MUSS eine einzelne, kommaseparierte Zeile sein, die in eine Tabelle passt. KEINE Header, KEIN Markdown, nur die Werte. + +# PRODUKT +- Name: "{product_name}" +- URL-Inhalt: "{grounding_text[:3000]}..." + +# SPALTEN +Produkt/Lösung | Beschreibung (1-2 Sätze) | Kernfunktionen | Differenzierung | Primäre Quelle (URL) + +# BEISPIEL-OUTPUT +Saugroboter NR1500,Ein professioneller Saugroboter für große Büroflächen.,Autonome Navigation;Intelligente Kartierung;Lange Akkulaufzeit,Fokus auf B2B-Markt;Datenschutzkonform,https://nexaro.com/products/nr1500 + +# DEINE AUFGABE +Erstelle jetzt die kommaseparierte Zeile für das Produkt "{product_name}". +""" + + response_text = call_gemini_api(prompt_text, api_key) + + # Return as a simple list of strings + return [cell.strip() for cell in response_text.split(',')] + def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', required=True) @@ -633,10 +667,13 @@ def main(): parser.add_argument('--channels') parser.add_argument('--language', required=True) parser.add_argument('--focus_industry') # New argument + parser.add_argument('--product_name') + parser.add_argument('--product_url') args = parser.parse_args() try: if args.mode == 'start_generation': result = start_generation(args.url, args.language, args.regions, args.focus) elif args.mode == 'next_step': result = next_step(args.language, args.context_file, args.generation_step, args.channels, args.focus_industry) + elif args.mode == 'enrich_product': result = enrich_product(args.product_name, args.product_url, args.language) sys.stdout.write(json.dumps(result, ensure_ascii=False)) except Exception as e: logging.error(f"Error: {e}", exc_info=True) diff --git a/company-explorer/backend/database.py b/company-explorer/backend/database.py index c91d8685..2940a0ce 100644 --- a/company-explorer/backend/database.py +++ b/company-explorer/backend/database.py @@ -18,35 +18,21 @@ class Company(Base): id = Column(Integer, primary_key=True, index=True) - # Core Identity (Golden Record - from Research) + # Core Identity name = Column(String, index=True) website = Column(String, index=True) # Normalized Domain preferred crm_id = Column(String, unique=True, index=True, nullable=True) # Link to D365 - # CRM Original Data (Source of Truth for Import) - crm_name = Column(String, nullable=True) - crm_website = Column(String, nullable=True) - crm_address = Column(String, nullable=True) # Full address string or JSON - crm_vat = Column(String, nullable=True) - # Classification industry_crm = Column(String, nullable=True) # The "allowed" industry industry_ai = Column(String, nullable=True) # The AI suggested industry - # Location (Golden Record) + # Location city = Column(String, nullable=True) country = Column(String, default="DE") # Workflow Status - status = Column(String, default="NEW", index=True) # NEW, TO_ENRICH, ENRICHED, QUALIFIED, DISQUALIFIED - - # Quality & Confidence - confidence_score = Column(Float, default=0.0) # Overall confidence - data_mismatch_score = Column(Float, default=0.0) # 0.0=Match, 1.0=Mismatch - - # Scraping Status Flags - website_scrape_status = Column(String, default="PENDING") # PENDING, SUCCESS, FAILED, BLOCKED - wiki_search_status = Column(String, default="PENDING") # PENDING, FOUND, NOT_FOUND + status = Column(String, default="NEW", index=True) # Granular Process Tracking (Timestamps) created_at = Column(DateTime, default=datetime.utcnow) @@ -120,13 +106,6 @@ class Industry(Base): status_notion = Column(String, nullable=True) # e.g. "P1 Focus Industry" is_focus = Column(Boolean, default=False) # Derived from status_notion - # Enhanced Fields (v3.1 - Pains/Gains/Priority) - pains = Column(Text, nullable=True) - gains = Column(Text, nullable=True) - notes = Column(Text, nullable=True) - priority = Column(String, nullable=True) # Replaces old status concept ("Freigegeben") - ops_focus_secondary = Column(Boolean, default=False) - # NEW SCHEMA FIELDS (from MIGRATION_PLAN) metric_type = Column(String, nullable=True) # Unit_Count, Area_in, Area_out min_requirement = Column(Float, nullable=True) @@ -138,7 +117,6 @@ class Industry(Base): # Optional link to a Robotics Category (the "product" relevant for this industry) primary_category_id = Column(Integer, ForeignKey("robotics_categories.id"), nullable=True) - secondary_category_id = Column(Integer, ForeignKey("robotics_categories.id"), nullable=True) created_at = Column(DateTime, default=datetime.utcnow) diff --git a/company-explorer/backend/scripts/migrate_ce_db.py b/company-explorer/backend/scripts/migrate_ce_db.py deleted file mode 100644 index 93c192c1..00000000 --- a/company-explorer/backend/scripts/migrate_ce_db.py +++ /dev/null @@ -1,53 +0,0 @@ -import sqlite3 -import os - -# Adjust path to your actual DB location -DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer.db" - -def migrate(): - if not os.path.exists(DB_PATH): - print(f"Database not found at {DB_PATH}. Maybe it hasn't been created yet?") - return - - print(f"Migrating database at {DB_PATH}...") - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - - columns_to_add = [ - # Industries (Existing List) - ("industries", "pains", "TEXT"), - ("industries", "gains", "TEXT"), - ("industries", "notes", "TEXT"), - ("industries", "priority", "TEXT"), - ("industries", "ops_focus_secondary", "BOOLEAN DEFAULT 0"), - ("industries", "secondary_category_id", "INTEGER"), - - # Companies (New List for CRM Data) - ("companies", "crm_name", "TEXT"), - ("companies", "crm_website", "TEXT"), - ("companies", "crm_address", "TEXT"), - ("companies", "crm_vat", "TEXT"), - - # Companies (Status & Quality) - ("companies", "confidence_score", "FLOAT DEFAULT 0.0"), - ("companies", "data_mismatch_score", "FLOAT DEFAULT 0.0"), - ("companies", "website_scrape_status", "TEXT DEFAULT 'PENDING'"), - ("companies", "wiki_search_status", "TEXT DEFAULT 'PENDING'"), - ] - - for table, col_name, col_type in columns_to_add: - try: - print(f"Adding column '{col_name}' to '{table}'...") - cursor.execute(f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}") - except sqlite3.OperationalError as e: - if "duplicate column name" in str(e): - print(f"Column '{col_name}' already exists. Skipping.") - else: - print(f"Error adding '{col_name}' to '{table}': {e}") - - conn.commit() - conn.close() - print("Migration complete.") - -if __name__ == "__main__": - migrate() diff --git a/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py b/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py deleted file mode 100644 index 37ad0d90..00000000 --- a/company-explorer/backend/scripts/sync_notion_to_ce_enhanced.py +++ /dev/null @@ -1,170 +0,0 @@ -import sys -import os -import requests -import logging - -# Setup Paths -sys.path.append(os.path.abspath("/home/node/clawd/repos/brancheneinstufung2/company-explorer")) -sys.path.append(os.path.abspath("/home/node/clawd/repos/brancheneinstufung2")) - -from backend.database import SessionLocal, Industry, RoboticsCategory, init_db -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -# Logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -NOTION_TOKEN = os.getenv("NOTION_API_KEY") -if not NOTION_TOKEN: - logger.error("NOTION_API_KEY missing!") - sys.exit(1) - -HEADERS = { - "Authorization": f"Bearer {NOTION_TOKEN}", - "Notion-Version": "2022-06-28", - "Content-Type": "application/json" -} - -def find_db_id(query_name): - url = "https://api.notion.com/v1/search" - payload = {"query": query_name, "filter": {"value": "database", "property": "object"}} - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - results = resp.json().get("results", []) - if results: - return results[0]['id'] - return None - -def query_all(db_id): - url = f"https://api.notion.com/v1/databases/{db_id}/query" - results = [] - has_more = True - next_cursor = None - - while has_more: - payload = {} - if next_cursor: payload["start_cursor"] = next_cursor - - resp = requests.post(url, headers=HEADERS, json=payload) - data = resp.json() - results.extend(data.get("results", [])) - has_more = data.get("has_more", False) - next_cursor = data.get("next_cursor") - return results - -def extract_rich_text(prop): - if not prop or "rich_text" not in prop: return "" - return "".join([t.get("plain_text", "") for t in prop.get("rich_text", [])]) - -def extract_title(prop): - if not prop or "title" not in prop: return "" - return "".join([t.get("plain_text", "") for t in prop.get("title", [])]) - -def extract_select(prop): - if not prop or "select" not in prop or not prop["select"]: return "" - return prop["select"]["name"] - -def sync(): - logger.info("--- Starting Enhanced Sync ---") - - # 1. Init DB - init_db() - session = SessionLocal() - - # 2. Sync Categories (Products) - cat_db_id = find_db_id("Product Categories") or find_db_id("Products") - if cat_db_id: - logger.info(f"Syncing Products from {cat_db_id}...") - pages = query_all(cat_db_id) - for page in pages: - props = page["properties"] - name = extract_title(props.get("Name") or props.get("Product Name")) - if not name: continue - - notion_id = page["id"] - key = name.lower().replace(" ", "_") - - # Upsert - cat = session.query(RoboticsCategory).filter(RoboticsCategory.notion_id == notion_id).first() - if not cat: - cat = RoboticsCategory(notion_id=notion_id, key=key) - session.add(cat) - - cat.name = name - cat.description = extract_rich_text(props.get("Description")) - # Add reasoning guide map if available - session.commit() - else: - logger.warning("Product DB not found!") - - # 3. Sync Industries - ind_db_id = find_db_id("Industries") - if ind_db_id: - logger.info(f"Syncing Industries from {ind_db_id}...") - - # Clear existing? Or Upsert? - # For clean sync, DELETE is safer as long as we don't have FK constraints blocking it. - # But wait! Companies link to Industry STRING, not FK usually? - # Check Company model: industry_ai = Column(String). So no FK constraint. Safe to delete. - session.query(Industry).delete() - session.commit() - - pages = query_all(ind_db_id) - count = 0 - - for page in pages: - props = page["properties"] - name = extract_title(props.get("Vertical")) - if not name: continue - - ind = Industry(notion_id=page["id"], name=name) - session.add(ind) - - # Map Fields - ind.description = extract_rich_text(props.get("Definition")) - ind.notes = extract_rich_text(props.get("Notes")) - ind.pains = extract_rich_text(props.get("Pains")) - ind.gains = extract_rich_text(props.get("Gains")) - - # Status / Priority (Renamed field check) - # Try "Priorität" first, then "Freigegeben", then "Status" - prio = extract_select(props.get("Priorität")) - if not prio: prio = extract_select(props.get("Freigegeben")) - if not prio: prio = extract_select(props.get("Status")) - - ind.priority = prio - ind.status_notion = prio # Legacy field - ind.is_focus = (prio == "Freigegeben" or prio == "P1 Focus Industry") - - # Ops Focus - if "Ops Focus: Secondary" in props: - ind.ops_focus_secondary = props["Ops Focus: Secondary"].get("checkbox", False) - - # Relations - # Primary - rels_prim = props.get("Primary Product Category", {}).get("relation", []) - if rels_prim: - pid = rels_prim[0]["id"] - cat = session.query(RoboticsCategory).filter(RoboticsCategory.notion_id == pid).first() - if cat: ind.primary_category_id = cat.id - - # Secondary - rels_sec = props.get("Secondary Product", {}).get("relation", []) - if rels_sec: - pid = rels_sec[0]["id"] - cat = session.query(RoboticsCategory).filter(RoboticsCategory.notion_id == pid).first() - if cat: ind.secondary_category_id = cat.id - - count += 1 - - session.commit() - logger.info(f"✅ Synced {count} industries.") - else: - logger.error("Industries DB not found!") - - session.close() - -if __name__ == "__main__": - sync() diff --git a/debug_transcription_raw.py b/debug_transcription_raw.py new file mode 100644 index 00000000..40fbe1e7 --- /dev/null +++ b/debug_transcription_raw.py @@ -0,0 +1,70 @@ +import sqlite3 +import json +import os + +DB_PATH = "transcripts.db" + +def inspect_latest_meeting(): + if not os.path.exists(DB_PATH): + print(f"Error: Database file '{DB_PATH}' not found.") + return + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + # Get latest meeting + cursor.execute("SELECT id, title, created_at FROM meetings ORDER BY created_at DESC LIMIT 1") + meeting = cursor.fetchone() + + if not meeting: + print("No meetings found in DB.") + conn.close() + return + + meeting_id, title, created_at = meeting + print(f"--- Inspecting Latest Meeting: ID {meeting_id} ('{title}') created at {created_at} ---") + + # Get chunks for this meeting + cursor.execute("SELECT id, chunk_index, raw_text, json_content FROM transcript_chunks WHERE meeting_id = ? ORDER BY chunk_index", (meeting_id,)) + chunks = cursor.fetchall() + + if not chunks: + print("No chunks found for this meeting.") + + for chunk in chunks: + chunk_id, idx, raw_text, json_content = chunk + print(f"\n[Chunk {idx} (ID: {chunk_id})]") + + print(f"Stored JSON Content (Length): {len(json.loads(json_content)) if json_content else 'None/Empty'}") + + print("-" * 20 + " RAW TEXT START " + "-" * 20) + print(raw_text[:500]) # Print first 500 chars + print("..." if len(raw_text) > 500 else "") + print("-" * 20 + " RAW TEXT END " + "-" * 20) + + # Try to parse manually to see error + try: + # Simulate cleaning logic from orchestrator + cleaned = raw_text.strip() + if cleaned.startswith("```json"): + cleaned = cleaned[7:] + elif cleaned.startswith("```"): + cleaned = cleaned[3:] + if cleaned.endswith("```"): + cleaned = cleaned[:-3] + cleaned = cleaned.strip() + + parsed = json.loads(cleaned) + print("✅ Manual Parsing Successful!") + except json.JSONDecodeError as e: + print(f"❌ Manual Parsing Failed: {e}") + # Show context around error + if hasattr(e, 'pos'): + start = max(0, e.pos - 20) + end = min(len(cleaned), e.pos + 20) + print(f" Context at error: ...{cleaned[start:end]}...") + + conn.close() + +if __name__ == "__main__": + inspect_latest_meeting() diff --git a/docker-compose.yml b/docker-compose.yml index a34e7061..d9136bee 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -98,13 +98,13 @@ services: restart: unless-stopped volumes: - ./transcription-tool/backend:/app/backend + - ./transcription-tool/frontend/dist:/app/frontend/dist # Mount Frontend Build for Live Updates - ./transcripts.db:/app/transcripts.db - ./uploads_audio:/app/uploads_audio - ./gemini_api_key.txt:/app/gemini_api_key.txt environment: PYTHONUNBUFFERED: "1" DATABASE_URL: "sqlite:////app/transcripts.db" - GEMINI_API_KEY: "AIzaSyCFRmr1rOrkFKiEuh9GOCJNB2zfJsYmR68" # Placeholder, actual key is in file ports: - "8001:8001" diff --git a/run_lead_engine.py b/run_lead_engine.py deleted file mode 100644 index d486cc26..00000000 --- a/run_lead_engine.py +++ /dev/null @@ -1,102 +0,0 @@ -import argparse -import subprocess -import os -import sys -from datetime import datetime - -# --- Setup Paths --- -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -SCRIPTS_SUBDIR = os.path.join(SCRIPT_DIR, "scripts") -LOG_DIR = os.path.join(SCRIPT_DIR, "logs") -THROUGHPUT_LOG = os.path.join(LOG_DIR, "throughput.log") - -# Add scripts subdir to path to allow imports -sys.path.append(SCRIPTS_SUBDIR) - -# TODO: Import other modules once they are ready -# from company_explorer_connector import handle_company_workflow -# from generate_sniper_copy import generate_copy - -def setup_environment(): - """Ensures necessary directories exist.""" - os.makedirs(LOG_DIR, exist_ok=True) - -def log_throughput(identifier): - """Logs a successful processing event for the dashboard.""" - with open(THROUGHPUT_LOG, "a") as f: - f.write(f"{datetime.utcnow().isoformat()},{identifier}\n") - print(f"📈 Logged successful processing for '{identifier}' for dashboard.") - -def run_sync(): - """Runs the database sync script to ensure local data is fresh.""" - print("\n--- [Step 1: Syncing local Company Explorer database] ---") - sync_script_path = os.path.join(SCRIPTS_SUBDIR, "sync_ce_to_sqlite.py") - - if not os.path.exists(sync_script_path): - print(f"❌ ERROR: Sync script not found at {sync_script_path}") - return False - - result = subprocess.run(["python3", sync_script_path], capture_output=True, text=True, check=False) - - if result.returncode != 0: - print("❌ ERROR: Database sync failed.") - print(result.stderr) - return False - - print("✅ Sync successful.") - return True - -def process_lead(identifier): - """ - Orchestrates the full enrichment and copy generation for a single lead. - """ - print(f"\n======= PROCESSING LEAD: {identifier} =======") - - # --- Step 2: Enrich Company (if necessary) --- - print("\n--- [Step 2: Check/Enrich Company Data] ---") - # ce_data = handle_company_workflow(identifier) # Example of direct import - # if not ce_data or 'error' in ce_data: - # print(f"❌ Failed to enrich '{identifier}'. Aborting.") - # return - print("... (Placeholder for Enrichment Logic)") - print("✅ Enrichment complete.") - - - # --- Step 3: Generate Sniper Copy --- - print("\n--- [Step 3: Generate Sniper Copy] ---") - # sniper_copy = generate_copy(ce_data['data']['id']) - # print("\nGENERATED COPY:\n", sniper_copy) - print("... (Placeholder for Sniper Copy Generation)") - print("✅ Copy generation complete.") - - # --- Step 4: Finalize & Log --- - print("\n--- [Step 4: Finalizing] ---") - log_throughput(identifier) - print(f"✅ Successfully processed lead '{identifier}'.") - print("============================================") - - -def main(): - parser = argparse.ArgumentParser(description="GTM Lead Engine Orchestrator.") - parser.add_argument("leads", nargs='+', help="One or more company names or SuperOffice IDs to process.") - parser.add_argument("--skip-sync", action="store_true", help="Skip the initial database sync for faster iteration.") - args = parser.parse_args() - - print("🚀 GTM Lead Engine Orchestrator started.") - - setup_environment() - - if not args.skip_sync: - if not run_sync(): - sys.exit(1) # Exit if sync fails - else: - print("\n--- [Skipping Step 1: Database Sync] ---") - - for lead_identifier in args.leads: - process_lead(lead_identifier) - - print("\n🎉 Orchestration complete for all leads. 🎉") - - -if __name__ == "__main__": - main() diff --git a/scripts/add_manual_report.py b/scripts/add_manual_report.py deleted file mode 100644 index 96531d9b..00000000 --- a/scripts/add_manual_report.py +++ /dev/null @@ -1,75 +0,0 @@ -import os -import requests -import json -from datetime import datetime -from zoneinfo import ZoneInfo - -# Configuration -NOTION_TOKEN = "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" -PAGE_ID = "2ff88f42854480008314c9013414d1d0" -BERLIN_TZ = ZoneInfo("Europe/Berlin") - -def add_status_to_notion(): - headers = { - "Authorization": f"Bearer {NOTION_TOKEN}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" - } - - # 1. Update the 'Total Duration (h)' field - # First, get current value - resp = requests.get(f"https://api.notion.com/v1/pages/{PAGE_ID}", headers=headers) - page_data = resp.json() - current_hours = page_data.get("properties", {}).get("Total Duration (h)", {}).get("number") or 0.0 - new_hours = current_hours + 3.2 - - # Update property - update_payload = { - "properties": { - "Total Duration (h)": {"number": new_hours}, - "Status": {"status": {"name": "Doing"}} - } - } - requests.patch(f"https://api.notion.com/v1/pages/{PAGE_ID}", headers=headers, json=update_payload) - - # 2. Append the Status Update Block - timestamp = datetime.now(BERLIN_TZ).strftime('%Y-%m-%d %H:%M') - report_content = ( - "Investierte Zeit in dieser Session: 03:12\n" - "Neuer Status: Doing\n\n" - "Arbeitszusammenfassung:\n" - "Wir haben heute den entscheidenden technischen Durchbruch bei der bidirektionalen Datensynchronisation zwischen dem Company Explorer (CE) und SuperOffice CRM (SO) erzielt.\n\n" - "1. Infrastruktur-Stabilisierung: Das Git-Repository wurde über eine interne Docker-Netzwerk-Verbindung (gitea-internal) stabil angebunden.\n" - "2. Pipeline-Durchstich (SO -> CE): Firmenstammdaten aus SuperOffice (Contact ID 2) werden sauber in den Company Explorer übertragen.\n" - "3. Round-Trip & Write-Back (CE -> SO): Das Protokoll für den Rückschreibeprozess wurde geknackt. Erkenntnis: SuperOffice ignoriert UrlAddress beim PUT, wir nutzen jetzt das Urls-Array. Pflichtfelder wie Number2 werden nun explizit mitgegeben." - ) - - block_payload = { - "children": [ - { - "object": "block", - "type": "heading_2", - "heading_2": { - "rich_text": [{"type": "text", "text": {"content": f"🤖 Status-Update ({timestamp} Berlin Time)"}}] - } - }, - { - "object": "block", - "type": "code", - "code": { - "rich_text": [{"type": "text", "text": {"content": report_content}}], - "language": "yaml" - } - } - ] - } - - final_resp = requests.patch(f"https://api.notion.com/v1/blocks/{PAGE_ID}/children", headers=headers, json=block_payload) - - if final_resp.status_code == 200: - print(f"✅ SUCCESS: Notion Task updated. Total hours now: {new_hours}") - else: - print(f"❌ ERROR: {final_resp.text}") - -if __name__ == "__main__": - add_status_to_notion() diff --git a/scripts/append_task_updates.py b/scripts/append_task_updates.py new file mode 100644 index 00000000..4737c01a --- /dev/null +++ b/scripts/append_task_updates.py @@ -0,0 +1,66 @@ +import os +import requests +import json +from dotenv import load_dotenv + +load_dotenv(dotenv_path="/home/node/clawd/.env") + +NOTION_TOKEN = os.getenv("NOTION_API_KEY") +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Content-Type": "application/json", + "Notion-Version": "2022-06-28" +} + +# IDs from yesterday +TASKS = { + "Pains Gains Vertical": "2ff88f42-8544-8050-8245-c3bb852058f4", + "Segmentierung Bestand": "2ff88f42-8544-808f-862b-c30ab2f29783", + "Matrixmultiplikation": "2ff88f42-8544-8079-a23e-c248e35b09a0" +} + +UPDATES = { + "Pains Gains Vertical": "Update 17.02.: ✅ Entwurf in Notion finalisiert und detailliert (inkl. Hygiene-Fokus). Bereit für Review am Freitag.", + "Segmentierung Bestand": "Update 17.02.: ✅ Company Explorer Schema erweitert (V2). Bereit für Excel-Import.", + "Matrixmultiplikation": "Update 17.02.: ✅ Logik '3+1' (Prio Produkt + Sekundär bei Ops-Rolle) in Datenstruktur abgebildet." +} + +def append_block(page_id, text): + url = f"https://api.notion.com/v1/blocks/{page_id}/children" + payload = { + "children": [ + { + "object": "block", + "type": "paragraph", + "paragraph": { + "rich_text": [ + { + "type": "text", + "text": { + "content": text, + "link": None + }, + "annotations": { + "bold": True, # Make it stand out + "italic": False, + "strikethrough": False, + "underline": False, + "code": False, + "color": "default" + } + } + ] + } + } + ] + } + resp = requests.patch(url, headers=HEADERS, json=payload) + if resp.status_code == 200: + print(f"✅ Appended to {page_id}") + else: + print(f"❌ Error {page_id}: {resp.text}") + +if __name__ == "__main__": + for name, page_id in TASKS.items(): + if name in UPDATES: + append_block(page_id, UPDATES[name]) diff --git a/scripts/audit_notion_consistency.py b/scripts/audit_notion_consistency.py deleted file mode 100644 index d5c60fbd..00000000 --- a/scripts/audit_notion_consistency.py +++ /dev/null @@ -1,115 +0,0 @@ -import os -import requests -import json -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -NOTION_TOKEN = os.getenv("NOTION_API_KEY") -HEADERS = { - "Authorization": f"Bearer {NOTION_TOKEN}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" -} - -def find_db_id(query_name): - url = "https://api.notion.com/v1/search" - payload = {"query": query_name, "filter": {"value": "database", "property": "object"}} - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - results = resp.json().get("results", []) - if results: - return results[0]['id'] - return None - -# Cache for product names to avoid API spam -product_cache = {} - -def resolve_product_name(relation_ids): - if not relation_ids: - return "None" - - names = [] - for rel in relation_ids: - page_id = rel['id'] - if page_id in product_cache: - names.append(product_cache[page_id]) - continue - - url = f"https://api.notion.com/v1/pages/{page_id}" - resp = requests.get(url, headers=HEADERS) - if resp.status_code == 200: - props = resp.json().get("properties", {}) - # Assume Product DB has a Title field called "Name" or "Product Name" - # We iterate to find the title - title = "Unknown" - for key, val in props.items(): - if val['id'] == 'title': # The title property always has id 'title' - if val['title']: - title = val['title'][0]['plain_text'] - break - product_cache[page_id] = title - names.append(title) - else: - names.append("Error fetching Product") - - return ", ".join(names) - -def audit_industries(): - db_id = find_db_id("Industries") - if not db_id: - print("❌ Industries DB not found.") - return - - print(f"--- Auditing Industries DB ({db_id}) ---") - - url = f"https://api.notion.com/v1/databases/{db_id}/query" - resp = requests.post(url, headers=HEADERS, json={}) - - if resp.status_code != 200: - print(f"Error: {resp.text}") - return - - pages = resp.json().get("results", []) - - # We want to see: Vertical Name | Status | Primary Product (Resolved) | Notes Snippet - print(f"{'Vertical':<35} | {'Status':<15} | {'Primary Product':<30} | {'Notes (Snippet)'}") - print("-" * 120) - - for page in pages: - props = page['properties'] - - # Name - name = "N/A" - if "Vertical" in props and props["Vertical"]["title"]: - name = props["Vertical"]["title"][0]["plain_text"] - elif "Name" in props and props["Name"]["title"]: # Fallback - name = props["Name"]["title"][0]["plain_text"] - - # Filter for the ones we touched or are interested in - # (Optional: remove filter to see all) - - # Status - status = "" - if "Freigabe" in props: - if props["Freigabe"]["type"] == "status" and props["Freigabe"]["status"]: - status = props["Freigabe"]["status"]["name"] - elif props["Freigabe"]["type"] == "select" and props["Freigabe"]["select"]: - status = props["Freigabe"]["select"]["name"] - - # Primary Product (Relation) - product_name = "None" - if "Primary Product Category" in props and props["Primary Product Category"]["relation"]: - product_name = resolve_product_name(props["Primary Product Category"]["relation"]) - - # Notes - notes = "" - if "Notes" in props and props["Notes"]["rich_text"]: - full_note = props["Notes"]["rich_text"][0]["plain_text"] - notes = (full_note[:40] + '...') if len(full_note) > 40 else full_note - - if name != "N/A": - print(f"{name:<35} | {status:<15} | {product_name:<30} | {notes}") - -if __name__ == "__main__": - audit_industries() diff --git a/scripts/check_notion_tasks.py b/scripts/check_notion_tasks.py new file mode 100644 index 00000000..19f3e07c --- /dev/null +++ b/scripts/check_notion_tasks.py @@ -0,0 +1,69 @@ +import os +import requests +import json +from dotenv import load_dotenv + +load_dotenv(dotenv_path="/home/node/clawd/.env") + +NOTION_TOKEN = os.getenv("NOTION_API_KEY") +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Content-Type": "application/json", + "Notion-Version": "2022-06-28" +} + +PROJECT_ID = "2ea88f42-8544-8074-9ad8-c24d283bc1c9" + +def find_tasks_db(): + url = "https://api.notion.com/v1/search" + payload = {"query": "Tasks", "filter": {"value": "database", "property": "object"}} + resp = requests.post(url, headers=HEADERS, json=payload) + if resp.status_code == 200: + results = resp.json().get("results", []) + if results: + return results[0]['id'] + return None + +def get_project_tasks(db_id): + url = f"https://api.notion.com/v1/databases/{db_id}/query" + # We look for tasks linked to the project ID via a relation property (usually "Project") + payload = { + "filter": { + "property": "Project", + "relation": { + "contains": PROJECT_ID + } + } + } + resp = requests.post(url, headers=HEADERS, json=payload) + if resp.status_code != 200: + print(f"Error querying tasks: {resp.text}") + return [] + + return resp.json().get("results", []) + +def print_tasks(): + db_id = find_tasks_db() + if not db_id: + print("❌ Tasks DB not found.") + return + + print(f"--- Tasks for Project {PROJECT_ID} ---") + tasks = get_project_tasks(db_id) + + for task in tasks: + props = task['properties'] + name = "Unknown" + if "Name" in props and props["Name"]["title"]: + name = props["Name"]["title"][0]["plain_text"] + elif "Task" in props and props["Task"]["title"]: + name = props["Task"]["title"][0]["plain_text"] + + status = "Unknown" + if "Status" in props and props["Status"]["status"]: + status = props["Status"]["status"]["name"] + + print(f"- [{status}] {name} ({task['id']})") + +if __name__ == "__main__": + print_tasks() diff --git a/scripts/discover_notion_dbs.py b/scripts/discover_notion_dbs.py deleted file mode 100644 index f9787a7a..00000000 --- a/scripts/discover_notion_dbs.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -import requests -import json -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -def discover_dbs(): - token = os.getenv("NOTION_API_KEY") - headers = { - "Authorization": f"Bearer {token}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" - } - url = "https://api.notion.com/v1/search" - payload = {"filter": {"value": "database", "property": "object"}} - - resp = requests.post(url, headers=headers, json=payload) - results = resp.json().get("results", []) - - print("--- Gefundene Datenbanken ---") - for db in results: - title = db.get("title", [{}])[0].get("plain_text", "Unbekannt") - print(f"Name: {title} | ID: {db['id']}") - -if __name__ == "__main__": - discover_dbs() diff --git a/scripts/enrich_notion_pains.py b/scripts/enrich_notion_pains.py deleted file mode 100644 index cf97dc00..00000000 --- a/scripts/enrich_notion_pains.py +++ /dev/null @@ -1,265 +0,0 @@ -import os -import requests -import json -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -NOTION_TOKEN = os.getenv("NOTION_API_KEY") -HEADERS = { - "Authorization": f"Bearer {NOTION_TOKEN}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" -} - -# --- Load Product Mapping --- -try: - with open("data/product_mapping.json", "r") as f: - PRODUCT_MAP = json.load(f) -except FileNotFoundError: - print("❌ Product mapping not found. Run fetch_product_mapping.py first.") - exit(1) - -# Helper to find DB ID -def find_db_id(query_name): - url = "https://api.notion.com/v1/search" - payload = {"query": query_name, "filter": {"value": "database", "property": "object"}} - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - results = resp.json().get("results", []) - if results: - return results[0]['id'] - return None - -def get_page_by_vertical(db_id, vertical_name): - url = f"https://api.notion.com/v1/databases/{db_id}/query" - # Using 'Vertical' as the title property name based on previous audit - payload = { - "filter": { - "property": "Vertical", - "title": {"equals": vertical_name} - } - } - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - results = resp.json().get("results", []) - if results: - return results[0] - return None - -def update_page(page_id, properties): - url = f"https://api.notion.com/v1/pages/{page_id}" - payload = {"properties": properties} - resp = requests.patch(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - print(f"✅ Updated '{page_id}'") - else: - print(f"❌ Error updating '{page_id}': {resp.text}") - -def create_page(db_id, properties): - url = "https://api.notion.com/v1/pages" - payload = {"parent": {"database_id": db_id}, "properties": properties} - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - print(f"✅ Created new page") - else: - print(f"❌ Error creating page: {resp.text}") - -# --- CONTENT DEFINITION --- - -# Format: Vertical -> { props... } -UPDATES = { - "Healthcare - Care Home": { - "product": "Cleaning Indoor Roboter (Wet Surface)", - "pains": """[Primary Product: Cleaning] -- Infektionsrisiko: Mangelnde Bodenhygiene und Keimverschleppung in sensiblen Bereichen gefährden Bewohner. -- Dokumentationspflicht: Lückenlose Nachweise für Hygiene-Audits binden wertvolle Pflegezeit. -- Personalmangel: Reinigungskräfte fehlen, Standards können manuell kaum gehalten werden. - -[Secondary Product: Service] -- Pflegeressourcen: Fachkräfte binden bis zu 30% ihrer Zeit mit nicht-pflegerischen Transportwegen (Essen/Wäsche). -- Körperliche Belastung: Schweres Heben und weite Wege führen zu krankheitsbedingten Ausfällen im Pflegeteam.""", - "gains": """[Primary Product: Cleaning] -- Audit-Sicherheit: Automatisierte, protokollierte Reinigung sichert Compliance ohne Mehraufwand. -- Entlastung Housekeeping: Personal konzentriert sich auf Sichtreinigung und Desinfektion statt Bodenfläche. - -[Secondary Product: Service] -- Mehr Zeit am Patienten: Reduktion der Laufwege gibt Pflegekräften 2-3 Std./Schicht zurück. -- Mitarbeiterzufriedenheit: Reduktion körperlicher Belastung senkt Krankenstand.""", - "ops_focus": True, - "status": "Freigegeben", - "notes": "Prio 1: Reinigung. Prio 2: Service (Essen). Fokus auf Fachkräftemangel & Hygiene." - }, - "Healthcare - Hospital": { - "product": "Cleaning Indoor Roboter (Wet Surface)", - "pains": """[Primary Product: Cleaning] -- Infektionsschutz: Hohe Frequenz an Patientenbewegungen erfordert permanente Desinfektion der Böden. -- Audit-Druck: Behördliche Auflagen verlangen lückenlose Dokumentation, die manuell kaum leistbar ist. -- Kostendruck: Steigende Personalkosten bei fixen Fallpauschalen zwingen zur Effizienzsteigerung. - -[Secondary Product: Service] -- Logistik-Aufwand: Transport von Proben, Wäsche und Essen bindet Pflegepersonal in unproduktiven Wegezeiten.""", - "gains": """[Primary Product: Cleaning] -- Hygiene-Standard: 24/7 gleichbleibende Reinigungsqualität reduziert Keimbelastung messbar. -- Compliance: Automatische Protokollierung aller Reinigungsfahrten für Audits. - -[Secondary Product: Service] -- Prozess-Effizienz: Automatisierter Warentransport entlastet Fachpersonal für medizinische Aufgaben.""", - "ops_focus": True, - "status": "Freigegeben", - "notes": "Prio 1: Reinigung (Alex Veto). Service ist 'nice to have'. KPI: Hygiene-Sicherheit." - }, - "Leisure - Entertainment": { - "product": "Service Roboter", # FIX: Changed from Cleaning to Service - "pains": """[Primary Product: Service] -- Service-Engpass: Umsatzverlust zu Stoßzeiten, da Personal nicht schnell genug Getränke/Snacks nachliefert. -- Personalmangel: Schwierige Besetzung von Spätschichten führt zu geschlossenen Stationen/Bahnen. -- Wartezeiten: Gäste sind unzufrieden, wenn Bestellungen zu lange dauern. - -[Secondary Product: Cleaning] -- Bodenverschmutzung: Klebrige Böden (Getränke/Popcorn) im Foyer stören das Gästeerlebnis.""", - "gains": """[Primary Product: Service] -- Umsatzsteigerung: Permanente Verfügbarkeit von Snacks/Getränken direkt am Platz (Cross-Selling). -- Erlebnis-Faktor: Innovative Roboter begeistern Gäste und fördern Social-Media-Sichtbarkeit. -- Entlastung: Servicepersonal hat mehr Zeit für Gästebetreuung statt Laufwege.""", - "ops_focus": True, # Keep secondary focus plausible - "status": "Freigegeben", - "notes": "Prio 1: Service Robotik (BellaBot). Cleaning nur Prio 2 (Foyer/Gänge)." - }, - "Logistics - Warehouse": { - "product": "Cleaning Outdoor Roboter (Sweeper)", - "pains": """[Primary Product: Sweeper] -- Prozesssicherheit: Staub auf Sensoren und Lichtschranken führt zu Anlagenstörungen und Produktionsstopps. -- Arbeitssicherheit: Verschmutzte Fahrwege durch Palettenreste/Staub erhöhen das Unfallrisiko. -- Manuelle Bindung: Fachkräfte müssen kehren statt kommissionieren. - -[Secondary Product: Cleaning Wet] -- Hartnäckige Verschmutzungen: Öl/Reifenabrieb erfordern Nassreinigung, die manuell zeitintensiv ist.""", - "gains": """[Primary Product: Sweeper] -- Staubfreie Umgebung: Werterhalt des Hallenbodens und Schutz empfindlicher Ware/Anlagen. -- Produktivität: Reinigung erfolgt parallel zum Betrieb oder nachts, ohne Störung. -- Sicherheit: Saubere Fahrwege reduzieren Unfallrisiko für Flurförderzeuge.""", - "ops_focus": True, - "status": "Freigegeben", - "notes": "Prio 1: Sweeper (Staub). Prio 2: Wet. Transport schwierig wegen Paletten." - }, - "Tech - Data Center": { - "product": "Security Roboter", - "pains": """[Primary Product: Security] -- Sicherheitsrisiko: Unbefugter Zutritt in Sicherheitsbereiche muss lückenlos detektiert werden (24/7). -- Personalbindung: Wachpersonal ist teuer und kann nicht überall gleichzeitig sein. - -[Secondary Product: Cleaning] -- Feinstaub: Staubpartikel in Serverräumen gefährden Hardware und Kühlung.""", - "gains": """[Primary Product: Security] -- Lückenlose Überwachung: Permanente Patrouille und sofortige Alarmierung ohne Personalbindung. -- Dokumentation: Video- und Sensorprotokolle aller Ereignisse. - -[Secondary Product: Cleaning] -- Ausfallsicherheit: Staubfreie Umgebung verlängert Hardware-Lebensdauer.""", - "ops_focus": True, - "status": "Klärrungsbedarf", # New, needs review - "notes": "Neu angelegt. Prio 1 Security (lt. Transkript). Prio 2 Cleaning (Staub)." - }, - "Reinigungsdienstleister": { - "product": "Cleaning Indoor Roboter (Wet Surface)", - "pains": """[Primary Product: Cleaning] -- Personalmangel: Schwierigkeit, zuverlässiges Personal für alle Objekte zu finden. -- Kostendruck: Geringe Margen bei Ausschreibungen erfordern hohe Effizienz. -- Qualitätsschwankungen: Manuelle Reinigung variiert stark, Kunden beschweren sich. -- Fluktuation: Hoher Aufwand für ständige Neueinarbeitung.""", - "gains": """[Primary Product: Cleaning] -- Skalierbarkeit: Roboter übernehmen Flächenleistung, Personal macht Detailreinigung. -- Innovation: Wettbewerbsvorteil bei Ausschreibungen durch Technologie-Einsatz. -- Kalkulationssicherheit: Fixe Kosten statt variabler Personalkosten/Krankheitstage.""", - "ops_focus": False, - "status": "Klärrungsbedarf", - "notes": "Neu angelegt. Zielgruppe: Wisag, Dussmann etc. (Alex: Größter Markt)." - }, - "Infrastructure - Communities": { - "product": "Cleaning Indoor Roboter (Wet Surface)", - "pains": """[Primary Product: Cleaning] -- Großflächen-Reinigung: Sporthallen, Aulen und Flure binden enorm viele Personalstunden. -- Budget-Druck: Kommunen müssen sparen, Reinigungskosten sind großer Posten. -- Nutzungs-Konflikte: Reinigung muss in engen Zeitfenstern zwischen Schul/Vereinsnutzung erfolgen.""", - "gains": """[Primary Product: Cleaning] -- Kosteneffizienz: Reduktion der Reinigungskosten pro Quadratmeter. -- Flexibilität: Reinigung kann nachts oder in Randzeiten erfolgen. -- Werterhalt: Schonende, regelmäßige Reinigung verlängert Lebensdauer von Sportböden.""", - "ops_focus": False, - "status": "Klärrungsbedarf", - "notes": "Neu angelegt (Schulen, Gemeinden)." - }, - "Infrastructure Parking": { - "product": "Cleaning Outdoor Roboter (Sweeper)", - "pains": """[Primary Product: Sweeper] -- Außenwirkung: Verschmutzte Parkflächen/Müll schaden dem Image (erster Eindruck). -- Manuelle Arbeit: Fegen von großen Außenflächen ist personalintensiv und unbeliebt. -- Umwelt: Müll gelangt in die Umgebung/Kanalisation.""", - "gains": """[Primary Product: Sweeper] -- Gepflegtes Erscheinungsbild: Täglich saubere Außenanlagen. -- Autonomie: Roboter reinigt selbstständig, auch bei schlechtem Wetter. -- Entlastung: Hausmeister kann sich um Wartung kümmern statt Fegen.""", - "ops_focus": False, - "status": "Klärrungsbedarf", - "notes": "Neu angelegt (Parkplätze, Außenanlagen)." - } -} - -def run_enrichment(): - db_id = find_db_id("Industries") - if not db_id: - print("❌ Industries DB not found.") - return - - print(f"--- Enriching Verticals in DB {db_id} ---") - - for vertical, data in UPDATES.items(): - # Resolve Product ID - prod_id = PRODUCT_MAP.get(data["product"]) - if not prod_id: - print(f"❌ Product '{data['product']}' not found in map. Skipping {vertical}.") - continue - - # Prepare Properties - props = { - "Pains": {"rich_text": [{"text": {"content": data["pains"]}}]}, - "Gains": {"rich_text": [{"text": {"content": data["gains"]}}]}, - "Primary Product Category": {"relation": [{"id": prod_id}]}, - "Notes": {"rich_text": [{"text": {"content": data["notes"]}}]}, - # Handle Status (Try Select first, then Status) - # We assume "Freigabe" exists - } - - # Add checkbox if present in logic - if "ops_focus" in data: - props["Ops Focus: Secondary"] = {"checkbox": data["ops_focus"]} - - # Check if page exists - page = get_page_by_vertical(db_id, vertical) - - if page: - # Update existing - # Add Status Update - # (Note: Logic to detect Select vs Status type is needed, but we assume Select/Status name is consistent) - # For robustness, we check the property type in the page object - status_type = page['properties'].get("Freigabe", {}).get("type") - if status_type == "status": - props["Freigabe"] = {"status": {"name": data["status"]}} - elif status_type == "select": - props["Freigabe"] = {"select": {"name": data["status"]}} - - print(f"Updating '{vertical}'...") - update_page(page['id'], props) - else: - # Create new - print(f"Creating new vertical '{vertical}'...") - props["Vertical"] = {"title": [{"text": {"content": vertical}}]} - # Guess status type (usually Select or Status) - Try Status first as default in new Notion DBs - # Or omit status if unsure, but we want to set it. - # We'll try Status format. - props["Freigabe"] = {"status": {"name": data["status"]}} - create_page(db_id, props) - -if __name__ == "__main__": - run_enrichment() diff --git a/scripts/fetch_product_mapping.py b/scripts/fetch_product_mapping.py deleted file mode 100644 index ec28f9df..00000000 --- a/scripts/fetch_product_mapping.py +++ /dev/null @@ -1,65 +0,0 @@ -import os -import requests -import json -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -NOTION_TOKEN = os.getenv("NOTION_API_KEY") -HEADERS = { - "Authorization": f"Bearer {NOTION_TOKEN}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" -} - -def find_db_id(query_name): - url = "https://api.notion.com/v1/search" - payload = {"query": query_name, "filter": {"value": "database", "property": "object"}} - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - results = resp.json().get("results", []) - if results: - return results[0]['id'] - return None - -def fetch_products(): - # Find Product DB (it's likely named "Product Categories" or similar based on schema) - # Or search for "Products" - db_id = find_db_id("Product Categories") - if not db_id: - db_id = find_db_id("Products") # Fallback - - if not db_id: - print("❌ Could not find Product Database.") - return - - print(f"--- Fetching Products from DB {db_id} ---") - - url = f"https://api.notion.com/v1/databases/{db_id}/query" - resp = requests.post(url, headers=HEADERS, json={}) - - products = {} - if resp.status_code == 200: - results = resp.json().get("results", []) - for page in results: - props = page['properties'] - # Find Title - name = "Unknown" - if "Name" in props and props["Name"]["title"]: - name = props["Name"]["title"][0]["plain_text"] - elif "Product Name" in props and props["Product Name"]["title"]: - name = props["Product Name"]["title"][0]["plain_text"] - - products[name] = page['id'] - print(f"Found: {name} ({page['id']})") - - # Save to file - os.makedirs("data", exist_ok=True) - with open("data/product_mapping.json", "w") as f: - json.dump(products, f, indent=4) - print("✅ Saved to data/product_mapping.json") - else: - print(f"Error: {resp.text}") - -if __name__ == "__main__": - fetch_products() diff --git a/scripts/generate_ce_hooks.py b/scripts/generate_ce_hooks.py deleted file mode 100644 index 2092f1cd..00000000 --- a/scripts/generate_ce_hooks.py +++ /dev/null @@ -1,93 +0,0 @@ -import sqlite3 -import os -import json -import requests -from dotenv import load_dotenv - -# Load ENV for Gemini API -load_dotenv(dotenv_path="/home/node/clawd/.env", override=True) - -class LeadHookService: - def __init__(self, db_path): - self.db_path = db_path - self.api_key = os.getenv("GEMINI_API_KEY") - - def _get_company_data(self, company_id): - conn = sqlite3.connect(self.db_path) - conn.row_factory = sqlite3.Row - cursor = conn.cursor() - - # Get company and metrics - cursor.execute("SELECT * FROM companies WHERE id = ?", (company_id,)) - company = cursor.fetchone() - - if not company: - return None - - data = dict(company) - conn.close() - return data - - def build_combined_context(self, company_data): - # Build the 'combined' string from CE facts - parts = [] - parts.append(f"Name: {company_data.get('name')}") - parts.append(f"Branche: {company_data.get('industry_ai')}") - - if company_data.get('calculated_metric_value'): - parts.append(f"Metrik: {company_data.get('calculated_metric_value')} {company_data.get('calculated_metric_unit')}") - - # Add a hint about the core business from status/city - parts.append(f"Standort: {company_data.get('city')}") - - return " | ".join(parts) - - def generate_hook(self, company_id): - company_data = self._get_company_data(company_id) - if not company_data: - return "Company not found." - - combined = self.build_combined_context(company_data) - display_name = company_data.get('name').split(' ')[0] # Simple Kurzname logic - - prompt = f""" -Du bist ein exzellenter B2B-Stratege und Texter. -Deine Aufgabe ist es, einen hochpersonalisierten, scharfsinnigen und wertschätzenden Einleitungssatz für eine E-Mail zu formulieren. - ---- Unternehmenskontext --- -Kurzname: {display_name} -Beschreibung: {combined} - ---- Stilvorgaben --- -1. Analysiere das Kerngeschäft: Was ist die zentrale physische Herausforderung (z.B. Sauberkeit in Nassbereichen, Logistik-Effizienz, Objektschutz)? -2. KEINE ZAHLEN: Erwähne niemals konkrete Zahlen (Besucherzahlen, m², Anzahl Pools). Nutze stattdessen qualitative Begriffe wie "weitläufig", "hochfrequent", "komplex" oder "marktführend". -3. Identifiziere den Hebel: Was ist der Erfolgsfaktor (z.B. Gäste-Zufriedenheit, Prozessstabilität, Sicherheit)? -4. Formuliere den Satz (20-35 Wörter): Elegant, aktiv, KEINE Floskeln. -5. WICHTIG: Formuliere als positive Beobachtung über eine Kernkompetenz. - -Deine Ausgabe: NUR der finale Satz. -""" - - # Call Gemini (Simplified for POC) - headers = {"Content-Type": "application/json"} - payload = { - "contents": [{"parts": [{"text": prompt}]}] - } - url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={self.api_key}" - - resp = requests.post(url, headers=headers, json=payload) - result = resp.json() - - try: - hook_text = result['candidates'][0]['content']['parts'][0]['text'].strip() - return hook_text - except: - return f"Error generating hook: {result}" - -if __name__ == "__main__": - # Test with CE-ID 1 (Therme Erding) - db = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db" - service = LeadHookService(db) - print(f"--- Testing LeadHookService for ID 1 ---") - hook = service.generate_hook(1) - print(f"GENERATED HOOK:\n{hook}") diff --git a/scripts/generate_sniper_copy.py b/scripts/generate_sniper_copy.py deleted file mode 100644 index 92213e80..00000000 --- a/scripts/generate_sniper_copy.py +++ /dev/null @@ -1,123 +0,0 @@ -import sqlite3 -import os -import json -import requests -import argparse -from dotenv import load_dotenv - -# --- Configuration & Setup --- -load_dotenv(dotenv_path="/home/node/clawd/.env", override=True) -DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db" -GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") - - -class SniperGenerator: - def __init__(self, db_path=DB_PATH): - self.db_path = db_path - # TODO: Initialize Notion client to get Vertical/Persona data - # TODO: Load Marketing Report KPIs into memory - - def get_lead_data(self, company_id): - """Gathers all necessary data for a lead from the local DB.""" - conn = sqlite3.connect(self.db_path) - conn.row_factory = sqlite3.Row - cursor = conn.cursor() - - cursor.execute("SELECT * FROM companies WHERE id = ?", (company_id,)) - company_data = cursor.fetchone() - conn.close() - - if not company_data: - return None - - return dict(company_data) - - def select_product_and_persona(self, company_data, target_role): - """ - Implements the '3+1' rule to decide which product to pitch. - Placeholder logic - will be replaced with Notion data. - """ - print(f"🎯 Selecting product for role '{target_role}' in industry '{company_data.get('industry_ai')}'...") - # Placeholder for the 3+1 logic - # if target_role in ["Wirtschaftl. Entscheider", "Infrastruktur-Verantw."]: - # return "Primary" - # if target_role == "Innovations-Treiber": - # return "Secondary" - # if target_role == "Operativer Entscheider": - # # Here we would check the "Ops Focus: Secondary?" checkbox from Notion - # return "Primary" # Default - - # For now, we default to the primary product (Cleaning) - print("-> Defaulting to 'Primary Product' (Cleaning).") - return "Cleaning" - - - def generate_copy(self, company_id, target_role="Wirtschaftl. Entscheider"): - """ - Generates the 3-sentence sniper copy for a given company and role. - """ - # 1. Gather Data - lead_data = self.get_lead_data(company_id) - if not lead_data: - return "Error: Company data not found." - - # 2. Decide on Product (using 3+1 rule) - product_to_pitch = self.select_product_and_persona(lead_data, target_role) - - # 3. Get Social Proof KPIs (from Marketing Report) - # Placeholder - using hardcoded values from the report - kpis = { - "cost_reduction": "10-25%", - "time_saving": "20-40%" - } - - # 4. Construct Master Prompt - # This is a simplified version for now - prompt = f""" - Du bist ein Weltklasse B2B-Stratege. Deine Aufgabe ist es, eine 3-Satz-E-Mail-Einleitung im '1,5°-Stil' zu erstellen. - - **Regeln:** - - Satz 1 (Firma): Zeige, dass du das Geschäftsmodell und die zentrale Herausforderung verstanden hast. KEINE ZAHLEN, nur qualitative Größe. - - Satz 2 (Persona): Sprich den spezifischen Schmerz der Zielrolle an und verbinde ihn mit dem Produkt '{product_to_pitch}'. - - Satz 3 (Social Proof): Untermauere die Lösung mit einem konkreten KPI von Marktbegleitern. - - **Daten:** - - Firma: {lead_data.get('name')} - - Branche (KI): {lead_data.get('industry_ai')} - - Standort: {lead_data.get('city')} - - Rolle: {target_role} - - KPI 1 (Kosten): {kpis['cost_reduction']} - - KPI 2 (Zeit): {kpis['time_saving']} - - **Output:** Nur die 3 Sätze. Sonst nichts. - """ - - # 5. Call Gemini API - print("📞 Calling Gemini to generate copy...") - headers = {"Content-Type": "application/json"} - payload = {"contents": [{"parts": [{"text": prompt}]}]} - url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={GEMINI_API_KEY}" - - try: - resp = requests.post(url, headers=headers, json=payload, timeout=20) - resp.raise_for_status() - result = resp.json() - copy_text = result['candidates'][0]['content']['parts'][0]['text'].strip() - return copy_text - except Exception as e: - return f"Error during Gemini call: {e}" - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate sniper copy for a lead.") - parser.add_argument("company_id", type=int, help="The Company Explorer ID of the lead.") - parser.add_argument("--role", type=str, default="Wirtschaftl. Entscheider", help="The target persona/role.") - args = parser.parse_args() - - sniper = SniperGenerator() - final_copy = sniper.generate_copy(args.company_id, args.role) - - print("\n--- GENERATED SNIPER COPY ---") - print(final_copy) - print("-----------------------------\n") - diff --git a/scripts/post_daily_log_to_notion.py b/scripts/post_daily_log_to_notion.py new file mode 100644 index 00000000..160ffea8 --- /dev/null +++ b/scripts/post_daily_log_to_notion.py @@ -0,0 +1,114 @@ +import os +import requests +import json +from dotenv import load_dotenv + +load_dotenv(dotenv_path="/home/node/clawd/.env") + +NOTION_TOKEN = os.getenv("NOTION_API_KEY") +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Content-Type": "application/json", + "Notion-Version": "2022-06-28" +} + +PROJECT_ID = "2ea88f42-8544-8074-9ad8-c24d283bc1c9" + +def find_tasks_db(): + url = "https://api.notion.com/v1/search" + payload = {"query": "Tasks", "filter": {"value": "database", "property": "object"}} + resp = requests.post(url, headers=HEADERS, json=payload) + if resp.status_code == 200: + results = resp.json().get("results", []) + if results: + return results[0]['id'] + return None + +def read_memory(): + try: + with open("memory/2026-02-17.md", "r") as f: + return f.readlines() + except FileNotFoundError: + return [] + +def parse_markdown_to_blocks(lines): + blocks = [] + for line in lines: + line = line.strip() + if not line: + continue + + if line.startswith("# "): + blocks.append({ + "object": "block", + "type": "heading_1", + "heading_1": {"rich_text": [{"type": "text", "text": {"content": line[2:]}}]} + }) + elif line.startswith("## "): + blocks.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": line[3:]}}]} + }) + elif line.startswith("### "): + blocks.append({ + "object": "block", + "type": "heading_3", + "heading_3": {"rich_text": [{"type": "text", "text": {"content": line[4:]}}]} + }) + elif line.startswith("- "): + blocks.append({ + "object": "block", + "type": "bulleted_list_item", + "bulleted_list_item": {"rich_text": [{"type": "text", "text": {"content": line[2:]}}]} + }) + else: + blocks.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [{"type": "text", "text": {"content": line}}]} + }) + return blocks + +def create_log_entry(): + db_id = find_tasks_db() + if not db_id: + print("❌ Tasks DB not found via search.") + return + + lines = read_memory() + children_blocks = parse_markdown_to_blocks(lines) + + url = "https://api.notion.com/v1/pages" + + # Try creating with "Name", if fails we might need to check schema, but usually it's Name or Task. + # We'll stick to "Name" as it's most standard, but based on error before, maybe the DB was wrong. + + payload = { + "parent": {"database_id": db_id}, + "properties": { + "Name": {"title": [{"text": {"content": "Tages-Log 17.02.2026"}}]}, + "Status": {"status": {"name": "Done"}}, + "Project": {"relation": [{"id": PROJECT_ID}]} + }, + "children": children_blocks[:100] + } + + resp = requests.post(url, headers=HEADERS, json=payload) + if resp.status_code == 200: + print("✅ Tages-Log in Notion erstellt.") + else: + # If Name fails, try Task + if "Name is not a property" in resp.text: + payload["properties"].pop("Name") + payload["properties"]["Task"] = {"title": [{"text": {"content": "Tages-Log 17.02.2026"}}]} + resp2 = requests.post(url, headers=HEADERS, json=payload) + if resp2.status_code == 200: + print("✅ Tages-Log in Notion erstellt (Property 'Task').") + else: + print(f"❌ Fehler (Retry): {resp2.text}") + else: + print(f"❌ Fehler: {resp.text}") + +if __name__ == "__main__": + create_log_entry() diff --git a/scripts/post_retro_log.py b/scripts/post_retro_log.py new file mode 100644 index 00000000..1b104c6c --- /dev/null +++ b/scripts/post_retro_log.py @@ -0,0 +1,91 @@ +import os +import requests +import json +from dotenv import load_dotenv + +load_dotenv(dotenv_path="/home/node/clawd/.env") + +NOTION_TOKEN = os.getenv("NOTION_API_KEY") +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Content-Type": "application/json", + "Notion-Version": "2022-06-28" +} + +PROJECT_ID = "2ea88f42-8544-8074-9ad8-c24d283bc1c9" + +def find_tasks_db(): + url = "https://api.notion.com/v1/search" + payload = {"query": "Tasks", "filter": {"value": "database", "property": "object"}} + resp = requests.post(url, headers=HEADERS, json=payload) + if resp.status_code == 200: + results = resp.json().get("results", []) + if results: + return results[0]['id'] + return None + +def create_log_entry(): + db_id = find_tasks_db() + if not db_id: + print("❌ Tasks DB not found.") + return + + # Content for 16.02. + content = """# Tages-Log: 16.02.2026 (Nachtrag) + +## Zusammenfassung +Durchbruch bei der technischen Integration zwischen SuperOffice CRM und Company Explorer. Der bidirektionale Datenaustausch steht. + +## Erreichte Meilensteine + +### 1. SuperOffice Integration (Deep Dive) +- **Status:** ✅ **POC Erfolgreich.** +- **Token-Management:** Automatische Refresh-Logik implementiert (kein manuelles Login mehr nötig). +- **Write-Back:** Erfolgreiches Update von Firmen-Daten (Adresse, VAT, URL) in SuperOffice. +- **Hürden genommen:** + - **Pflichtfelder:** Fehler mit `Number2` (unbekanntes Pflichtfeld) identifiziert und umgangen. + - **Listen-Objekte:** Korrekte Syntax für das Update von Dropdowns (Branche) gefunden (`Select` vs `Id`). + +### 2. Company Explorer Connector +- **Status:** ✅ **Client fertig.** +- **Workflow:** Skript `company_explorer_connector.py` steuert jetzt den Upload von Firmen und das Abholen der Ergebnisse. + +### 3. Regeln der Zusammenarbeit +- **Core Directive V2.0:** Fokus auf "Ehrlicher Partner" und präzise technische Umsetzung ohne Floskeln definiert. + +## Fazit +Die "Rohre" zwischen den Systemen sind verlegt. Daten können fließen. +""" + + blocks = [] + for line in content.split('\n'): + blocks.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [{"type": "text", "text": {"content": line}}]} + }) + + url = "https://api.notion.com/v1/pages" + payload = { + "parent": {"database_id": db_id}, + "properties": { + "Name": {"title": [{"text": {"content": "Tages-Log 16.02.2026 (Nachtrag)"}}]}, + "Status": {"status": {"name": "Done"}}, + "Project": {"relation": [{"id": PROJECT_ID}]} + }, + "children": blocks[:90] + } + + resp = requests.post(url, headers=HEADERS, json=payload) + if resp.status_code == 200: + print("✅ Nachtrag 16.02. erstellt.") + else: + # Fallback Name/Task check + if "Name is not a property" in resp.text: + payload["properties"].pop("Name") + payload["properties"]["Task"] = {"title": [{"text": {"content": "Tages-Log 16.02.2026 (Nachtrag)"}}]} + requests.post(url, headers=HEADERS, json=payload) + print("✅ Nachtrag 16.02. erstellt (Fallback).") + +if __name__ == "__main__": + create_log_entry() diff --git a/scripts/query_notion_db.py b/scripts/query_notion_db.py deleted file mode 100644 index 6ff1b60f..00000000 --- a/scripts/query_notion_db.py +++ /dev/null @@ -1,106 +0,0 @@ -import os -import requests -import json -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -def find_db_by_name(query_name): - token = os.getenv("NOTION_API_KEY") - headers = { - "Authorization": f"Bearer {token}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" - } - - url = "https://api.notion.com/v1/search" - payload = { - "query": query_name, - "filter": {"value": "database", "property": "object"} - } - - # print(f"Searching for '{query_name}' database...") - resp = requests.post(url, headers=headers, json=payload) - - if resp.status_code != 200: - print(f"Error searching DB: {resp.text}") - return None - - results = resp.json().get("results", []) - if not results: - # print(f"No database named '{query_name}' found via search.") - return None - - db = results[0] - return db['id'] - -def dump_db_content(db_id, db_name="DB"): - token = os.getenv("NOTION_API_KEY") - headers = { - "Authorization": f"Bearer {token}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" - } - - # Get all pages - url = f"https://api.notion.com/v1/databases/{db_id}/query" - resp = requests.post(url, headers=headers, json={}) - - if resp.status_code != 200: - print(f"Error querying DB: {resp.text}") - return - - pages = resp.json().get("results", []) - print(f"\n--- Content of '{db_name}' ({len(pages)} rows) ---") - - rows = [] - for page in pages: - props = page['properties'] - - # Extract Name (Title) - Robust Logic - name = "N/A" - if "Vertical" in props and props["Vertical"]["title"]: - name = props["Vertical"]["title"][0]["plain_text"] - elif "Name" in props and props["Name"]["title"]: - name = props["Name"]["title"][0]["plain_text"] - elif "Role" in props and props["Role"]["title"]: - name = props["Role"]["title"][0]["plain_text"] - - # Extract Status/Freigabe - freigabe = "" - if "Freigabe" in props: - if props["Freigabe"]["type"] == "status": - freigabe = props["Freigabe"]["status"]["name"] if props["Freigabe"]["status"] else "" - elif props["Freigabe"]["type"] == "select": - freigabe = props["Freigabe"]["select"]["name"] if props["Freigabe"]["select"] else "" - - # Extract Notes - notes = "" - if "Notes" in props and props["Notes"]["rich_text"]: - notes = props["Notes"]["rich_text"][0]["plain_text"] - - # Extract KPIs - kpis = "" - for kpi_key in ["KPIs", "KPI", "Quantitative Value"]: - if kpi_key in props and props[kpi_key]["rich_text"]: - kpis = props[kpi_key]["rich_text"][0]["plain_text"] - break - - rows.append({"name": name, "freigabe": freigabe, "notes": notes, "kpis": kpis}) - - # Print clean table - print(f"{'Name':<40} | {'Freigabe':<15} | {'KPIs':<20} | {'Notes'}") - print("-" * 120) - for r in rows: - # Nur Zeilen mit Inhalt anzeigen (Filter empty names) - if r['name'] != "N/A": - print(f"{r['name']:<40} | {r['freigabe']:<15} | {r['kpis']:<20} | {r['notes']}") - -if __name__ == "__main__": - db_id_ind = find_db_by_name("Industries") - if db_id_ind: - dump_db_content(db_id_ind, "Industries") - - db_id_roles = find_db_by_name("Personas") - if db_id_roles: - dump_db_content(db_id_roles, "Personas") diff --git a/scripts/sync_ce_to_sqlite.py b/scripts/sync_ce_to_sqlite.py deleted file mode 100644 index c58a94da..00000000 --- a/scripts/sync_ce_to_sqlite.py +++ /dev/null @@ -1,130 +0,0 @@ -import requests -import os -import sqlite3 -import json - -# --- Configuration --- -BASE_URL = "http://192.168.178.6:8090/ce/api" -API_USER = os.getenv("COMPANY_EXPLORER_API_USER", "admin") -API_PASSWORD = os.getenv("COMPANY_EXPLORER_API_PASSWORD", "gemini") -DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db" - -def fetch_all_companies_from_api(): - """Fetches all companies from the Company Explorer API.""" - print("Fetching all companies from Company Explorer API...") - url = f"{BASE_URL}/companies" - all_companies = [] - page = 1 - - while True: - try: - params = {'page': page, 'per_page': 50} - response = requests.get( - url, - auth=(API_USER, API_PASSWORD), - params=params, - timeout=20 - ) - response.raise_for_status() - data = response.json() - - companies_on_page = data.get("items", []) - if not companies_on_page: - break - - all_companies.extend(companies_on_page) - print(f"Fetched page {page} with {len(companies_on_page)} companies.") - - if len(all_companies) >= data.get("total", 0): - break - - page += 1 - - except requests.exceptions.RequestException as e: - print(f"Error fetching companies from API: {e}") - return None - - print(f"Total companies fetched: {len(all_companies)}") - return all_companies - -def setup_database(): - """Creates the SQLite database and the companies table.""" - print(f"Setting up database at: {DB_PATH}") - if os.path.exists(DB_PATH): - os.remove(DB_PATH) - print("Removed existing database file.") - - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - - # Define a flexible schema to hold the key fields - cursor.execute(""" - CREATE TABLE companies ( - id INTEGER PRIMARY KEY, - name TEXT, - industry_ai TEXT, - status TEXT, - city TEXT, - country TEXT, - website TEXT, - calculated_metric_name TEXT, - calculated_metric_value TEXT, - calculated_metric_unit TEXT, - full_json TEXT - ) - """) - - conn.commit() - conn.close() - print("Database and table 'companies' created successfully.") - -def populate_database(companies): - """Populates the database with company data.""" - if not companies: - print("No companies to populate.") - return - - print("Populating database...") - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - - for company in companies: - # Extract metrics safely - metrics = company.get('calculated_metrics', []) - metric_name = metrics[0].get('name') if metrics else None - metric_value = metrics[0].get('value') if metrics else None - metric_unit = metrics[0].get('unit') if metrics else None - - cursor.execute(""" - INSERT INTO companies ( - id, name, industry_ai, status, city, country, website, - calculated_metric_name, calculated_metric_value, calculated_metric_unit, - full_json - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, ( - company.get('id'), - company.get('name'), - company.get('industry_ai'), - company.get('status'), - company.get('city'), - company.get('country'), - company.get('website'), - metric_name, - metric_value, - metric_unit, - json.dumps(company) # Store the full object for future flexibility - )) - - conn.commit() - conn.close() - print(f"Successfully inserted {len(companies)} records into the database.") - -if __name__ == "__main__": - all_companies = fetch_all_companies_from_api() - if all_companies is not None: - setup_database() - populate_database(all_companies) - print("\nSync process finished successfully.") - print(f"Database is ready at: {DB_PATH}") - else: - print("\nSync process failed due to API errors.") diff --git a/scripts/test_prod_key.py b/scripts/test_prod_key.py deleted file mode 100644 index a8437320..00000000 --- a/scripts/test_prod_key.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -import requests -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -def test_prod_key(): - key = os.getenv("GEMINI_API_KEY_PROD") - if not key: - print("❌ GEMINI_API_KEY_PROD not found in .env") - return - - print(f"🔑 Testing Key: {key[:5]}...{key[-3:]}") - - url = f"https://generativelanguage.googleapis.com/v1beta/models?key={key}" - resp = requests.get(url) - - if resp.status_code == 200: - print("✅ API Call Successful! Key is active.") - # print(f"Available Models: {[m['name'] for m in resp.json().get('models', [])][:3]}") - else: - print(f"❌ API Error: {resp.status_code} - {resp.text}") - -if __name__ == "__main__": - test_prod_key() diff --git a/scripts/update_notion_batch.py b/scripts/update_notion_batch.py deleted file mode 100644 index 0ff08d6a..00000000 --- a/scripts/update_notion_batch.py +++ /dev/null @@ -1,200 +0,0 @@ -import os -import requests -import json -from dotenv import load_dotenv - -load_dotenv(dotenv_path="/home/node/clawd/.env") - -NOTION_TOKEN = os.getenv("NOTION_API_KEY") -HEADERS = { - "Authorization": f"Bearer {NOTION_TOKEN}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28" -} - -def find_db_id(query_name): - url = "https://api.notion.com/v1/search" - payload = {"query": query_name, "filter": {"value": "database", "property": "object"}} - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - results = resp.json().get("results", []) - if results: - return results[0]['id'] - return None - -def get_page_id(db_id, title_col, title_val): - url = f"https://api.notion.com/v1/databases/{db_id}/query" - payload = { - "filter": { - "property": title_col, - "title": {"equals": title_val} - } - } - resp = requests.post(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - results = resp.json().get("results", []) - if results: - return results[0] # Return full page obj to access props - return None - -def update_page(page_id, properties): - url = f"https://api.notion.com/v1/pages/{page_id}" - payload = {"properties": properties} - resp = requests.patch(url, headers=HEADERS, json=payload) - if resp.status_code == 200: - print(f"✅ Updated page {page_id}") - else: - print(f"❌ Error updating {page_id}: {resp.text}") - -def append_text(current_text, new_text): - if not current_text: - return new_text - if new_text in current_text: - return current_text # Avoid duplicates - return f"{current_text}\n\n[Auto-Update]: {new_text}" - -# --- DATA TO UPDATE --- - -# 1. Personas (KPIs) -PERSONA_UPDATES = { - "Wirtschaftlicher Entscheider": "10-25% Reduktion Personalkosten\n15-30% höhere Gästezufriedenheit (Hypothese)", - "Operativer Entscheider": "20-40% Entlastung bei Routineaufgaben\n100% Abdeckung Reinigungszyklen", - "Infrastruktur-Verantwortlicher": "20-30% schnellere Integration\n80-90% weniger Ausfallzeiten", - "Innovations-Treiber": "10-20% höhere Servicekapazität\nSteigerung Upselling 5-10%" -} - -# 2. Industries (Pains/Gains/Status/Notes) -INDUSTRY_UPDATES = { - "Healthcare - Hospital": { - "pains_add": "Mangelnde Hygiene-Standards durch Personalengpässe (Infektionsrisiko). Hoher Dokumentationsaufwand für Audits.", - "gains_add": "Konstante, audit-sichere Sauberkeit (24/7). Entlastung des Reinigungspersonals.", - "status": "Freigegeben", - "note_add": "Prio 1: Reinigung (Alex Veto). Service ist 'nice to have'. KPI: Hygiene-Sicherheit.", - "ops_focus": True # Checkbox - }, - "Healthcare - Care Home": { - "pains_add": "Mangelnde Hygiene-Standards. Steigende Personalkosten bei begrenzten Pflegesätzen.", - "gains_add": "Sichtbare Hygiene schafft Vertrauen. Entlastung Housekeeping.", - "status": "Freigegeben", - "note_add": "Prio 1: Reinigung. Prio 2: Service (Essen). Fokus auf Fachkräftemangel.", - "ops_focus": True - }, - "Hospitality - Gastronomy": { - "pains_add": "Lobby-Optik leidet bei Personalmangel.", - "gains_add": "Makellose Optik für den ersten Eindruck.", - "status": "Freigegeben", - "note_add": "Prio 1: Reinigung (Nachts). Service nur in Entertainment-Gastro.", - "ops_focus": False - }, - "Leisure - Entertainment": { - "pains_add": "Service-Personal fehlt für Umsatz (Getränke).", - "gains_add": "Mehr Umsatz durch konstante Verfügbarkeit.", - "status": "Freigegeben", - "note_add": "Prio 1: Service Robotik (BellaBot).", - "ops_focus": False - }, - "Industry - Manufacturing": { - "pains_add": "Staubbelastung gefährdet Sensoren/Qualität. Sicherheitsrisiko auf Fahrwegen.", - "gains_add": "Staubfreie Umgebung ohne Produktionsstopp.", - "status": "Freigegeben", - "note_add": "Prio 1: Reinigung (Sweeper). Kein Stapler-Kampf!", - "ops_focus": True - }, - "Logistics - Warehouse": { - "pains_add": "Staub auf Waren. Manuelles Kehren bindet Fachkräfte.", - "gains_add": "Werterhalt Hallenboden. Sauberkeit ohne Störung.", - "status": "Freigegeben", - "note_add": "Prio 1: Sweeper (Staub). Prio 2: Wet.", - "ops_focus": True - } -} - - -def run_updates(): - print("--- Starting Notion Updates ---") - - # 1. Update Personas - db_personas = find_db_id("Personas") - if db_personas: - print(f"Found Personas DB: {db_personas}") - for role, kpi_text in PERSONA_UPDATES.items(): - page = get_page_id(db_personas, "Role", role) # Title col is "Role" here? Or "Name"? Script 1 said Role fallback. - # Actually, let's try "Name" first, then "Role". - # In previous dump, 'Name' was 'Infrastruktur-Verantwortlicher' etc. - # Let's assume the main column is "Name" (title). - if not page: - # Try finding by property "Role" (select) if title is different? - # Based on dump, the Title column content was "Infrastruktur-Verantwortlicher". - # Let's assume title property is named "Name" or "Role". - # Inspecting schema from previous run: `['Rollenbeschreibung', '...Product Categories', ... 'Role']` - # The Title property is likely "Role" or "Name". - # Let's try searching for "Role" property as title. - page = get_page_id(db_personas, "Role", role) - - if page: - # Update KPIs - # Column name in schema: "KPIs" - update_page(page['id'], { - "KPIs": {"rich_text": [{"text": {"content": kpi_text}}]} - }) - else: - print(f"⚠️ Persona '{role}' not found.") - else: - print("❌ Personas DB not found.") - - # 2. Update Industries - db_ind = find_db_id("Industries") - if db_ind: - print(f"Found Industries DB: {db_ind}") - for vertical, data in INDUSTRY_UPDATES.items(): - page = get_page_id(db_ind, "Vertical", vertical) - if page: - props = page['properties'] - - # Prepare updates - new_props = {} - - # Status - # Check if Status is select or status - if "Freigabe" in props: - # Assuming Select or Status. Let's try Select first, if fails try Status - if props["Freigabe"]["type"] == "select": - new_props["Freigabe"] = {"select": {"name": data["status"]}} - elif props["Freigabe"]["type"] == "status": - new_props["Freigabe"] = {"status": {"name": data["status"]}} - - # Ops Focus (Checkbox) - if "Ops Focus: Secondary" in props: - new_props["Ops Focus: Secondary"] = {"checkbox": data["ops_focus"]} - - # Pains (Append) - current_pains = "" - if "Pains" in props and props["Pains"]["rich_text"]: - current_pains = props["Pains"]["rich_text"][0]["plain_text"] - new_pains = append_text(current_pains, data["pains_add"]) - new_props["Pains"] = {"rich_text": [{"text": {"content": new_pains}}]} - - # Gains (Append) - current_gains = "" - if "Gains" in props and props["Gains"]["rich_text"]: - current_gains = props["Gains"]["rich_text"][0]["plain_text"] - new_gains = append_text(current_gains, data["gains_add"]) - new_props["Gains"] = {"rich_text": [{"text": {"content": new_gains}}]} - - # Notes (Append) - current_notes = "" - if "Notes" in props and props["Notes"]["rich_text"]: - current_notes = props["Notes"]["rich_text"][0]["plain_text"] - new_notes = append_text(current_notes, data["note_add"]) - new_props["Notes"] = {"rich_text": [{"text": {"content": new_notes}}]} - - # Execute Update - update_page(page['id'], new_props) - - else: - print(f"⚠️ Industry '{vertical}' not found.") - else: - print("❌ Industries DB not found.") - -if __name__ == "__main__": - run_updates() diff --git a/scripts/update_notion_tasks_status.py b/scripts/update_notion_tasks_status.py new file mode 100644 index 00000000..e0566cf3 --- /dev/null +++ b/scripts/update_notion_tasks_status.py @@ -0,0 +1,70 @@ +import os +import requests +import json +from dotenv import load_dotenv + +load_dotenv(dotenv_path="/home/node/clawd/.env") + +NOTION_TOKEN = os.getenv("NOTION_API_KEY") +HEADERS = { + "Authorization": f"Bearer {NOTION_TOKEN}", + "Content-Type": "application/json", + "Notion-Version": "2022-06-28" +} + +# IDs from previous fetch +TASKS = { + "Setup GCP": "2ea88f42-8544-8073-b287-eb83ce581c0b", + "SO API POC": "2ff88f42-8544-8093-a301-fc27b3886aa1", + "Pains Gains Vertical": "2ff88f42-8544-8050-8245-c3bb852058f4", + "Segmentierung Bestand": "2ff88f42-8544-808f-862b-c30ab2f29783", + "Matrixmultiplikation": "2ff88f42-8544-8079-a23e-c248e35b09a0" +} + +TASKS_DB_ID = "30588f42-8544-80c3-8919-e22d74d945ea" # From discovery +PROJECT_ID = "2ea88f42-8544-8074-9ad8-c24d283bc1c9" + +def update_status(page_id, status): + url = f"https://api.notion.com/v1/pages/{page_id}" + payload = {"properties": {"Status": {"status": {"name": status}}}} + requests.patch(url, headers=HEADERS, json=payload) + print(f"Updated Status {page_id} -> {status}") + +def add_comment(page_id, text): + url = "https://api.notion.com/v1/comments" + payload = { + "parent": {"page_id": page_id}, + "rich_text": [{"text": {"content": text}}] + } + requests.post(url, headers=HEADERS, json=payload) + print(f"Added comment to {page_id}") + +def create_task(title): + url = "https://api.notion.com/v1/pages" + payload = { + "parent": {"database_id": TASKS_DB_ID}, + "properties": { + "Name": {"title": [{"text": {"content": title}}]}, + "Status": {"status": {"name": "To Do"}}, + "Project": {"relation": [{"id": PROJECT_ID}]} + } + } + requests.post(url, headers=HEADERS, json=payload) + print(f"Created Task: {title}") + +def run(): + # 1. Done + update_status(TASKS["Setup GCP"], "Done") + update_status(TASKS["SO API POC"], "Done") + + # 2. Progress Comments + add_comment(TASKS["Pains Gains Vertical"], "✅ Entwurf in Notion finalisiert und detailliert (inkl. Hygiene-Fokus). Bereit für Review am Freitag.") + add_comment(TASKS["Segmentierung Bestand"], "✅ Company Explorer Schema erweitert (V2). Bereit für Excel-Import.") + add_comment(TASKS["Matrixmultiplikation"], "✅ Logik '3+1' (Prio Produkt + Sekundär bei Ops-Rolle) in Datenstruktur abgebildet.") + + # 3. New Tasks + create_task("Company Explorer: Daten-Sync & CRM-Import") + create_task("SuperOffice: Definition & Anlage UDF-Felder (Intro-Text)") + +if __name__ == "__main__": + run() diff --git a/show_status.py b/show_status.py deleted file mode 100644 index 351df596..00000000 --- a/show_status.py +++ /dev/null @@ -1,157 +0,0 @@ -import sqlite3 -import os -import requests -import json -from datetime import datetime, timedelta - -# --- Configuration --- -DB_PATH = "/home/node/clawd/repos/brancheneinstufung2/company_explorer_local.db" -CE_API_URL = "http://192.168.178.6:8090/ce/api" -# SO_API_URL = "..." # To be added - -import sys -sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "connector-superoffice")) - -from superoffice_client import SuperOfficeClient - -class GtmHealthCheck: - def __init__(self): - self.db_path = DB_PATH - self.ce_api_url = CE_API_URL - self.api_user = os.getenv("COMPANY_EXPLORER_API_USER", "admin") - self.api_password = os.getenv("COMPANY_EXPLORER_API_PASSWORD", "gemini") - - def get_ce_stats(self): - """Holt Statistiken aus der lokalen Company Explorer DB.""" - if not os.path.exists(self.db_path): - return {"total": 0, "in_progress": 0, "error": "DB file not found."} - - try: - conn = sqlite3.connect(self.db_path) - cursor = conn.cursor() - - cursor.execute("SELECT COUNT(*) FROM companies") - total = cursor.fetchone()[0] - - cursor.execute("SELECT COUNT(*) FROM companies WHERE status != 'ENRICHED'") - in_progress = cursor.fetchone()[0] - - conn.close() - return {"total": total, "in_progress": in_progress} - except Exception as e: - return {"total": 0, "in_progress": 0, "error": str(e)} - - def check_ce_api_health(self): - """Prüft die Erreichbarkeit der Company Explorer API.""" - try: - response = requests.get( - f"{self.ce_api_url}/health", - auth=(self.api_user, self.api_password), - timeout=5 - ) - if response.status_code == 200 and response.json().get("status") == "ok": - return "[HEALTHY]" - return f"[ERROR - Status {response.status_code}]" - except requests.exceptions.Timeout: - return "[BUSY/TIMEOUT]" - except requests.exceptions.RequestException: - return "[UNREACHABLE]" - - def get_so_stats(self): - """Holt die ungefähre Gesamtanzahl der Firmen aus der SuperOffice API.""" - try: - client = SuperOfficeClient() - # We query the first page with a page size of 200 (a common default) - query_string = "Contact?$top=200" - - # Directly use the requests logic from the client's search method for a single page - url = f"{client.base_url}/{query_string}" - response = requests.get(url, headers=client.headers, timeout=15) - response.raise_for_status() - data = response.json() - - count_on_page = len(data.get('value', [])) - - # Check for a next link to determine if there are more pages - if 'odata.nextLink' in data or 'next_page_url' in data: - return {"total": f"> {count_on_page}"} # More than one page - else: - return {"total": str(count_on_page)} # Exact number if only one page - - except requests.exceptions.RequestException as e: - return {"total": f"API Error"} - except Exception as e: - return {"total": f"Error"} - - def check_so_api_health(self): - """Prüft die Erreichbarkeit der SuperOffice API.""" - try: - client = SuperOfficeClient() - # A simple request to the base URL should suffice as a health check - response = requests.get(client.base_url, headers=client.headers, timeout=10) - if response.status_code == 200: - return "[HEALTHY]" - return f"[ERROR - Status {response.status_code}]" - except requests.exceptions.Timeout: - return "[BUSY/TIMEOUT]" - except requests.exceptions.RequestException: - return "[UNREACHABLE]" - - def get_throughput(self): - """Zählt die verarbeiteten Accounts der letzten Stunde aus dem Log.""" - log_file = "/home/node/clawd/repos/brancheneinstufung2/logs/throughput.log" - if not os.path.exists(log_file): - return 0 - - count = 0 - one_hour_ago = datetime.utcnow() - timedelta(hours=1) - - try: - with open(log_file, "r") as f: - for line in f: - parts = line.strip().split(',') - if len(parts) >= 1: - try: - timestamp = datetime.fromisoformat(parts[0]) - if timestamp >= one_hour_ago: - count += 1 - except ValueError: - continue # Ignore malformed lines - return count - except Exception: - return "Log Error" - - def render_dashboard(self): - """Stellt das Dashboard auf der Konsole dar.""" - ce_stats = self.get_ce_stats() - ce_health = self.check_ce_api_health() - so_stats = self.get_so_stats() - so_health = self.check_so_api_health() - throughput = self.get_throughput() - - timestamp = datetime.now().strftime("%d.%m.%y %H:%M") - - print("=======================================") - print(f"GTM Lead Engine - Status ({timestamp})") - print("=======================================") - print("\n[+] Schnittstellen:") - print(f" - SuperOffice API: {so_health}") - print(f" - Company Explorer: {ce_health}") - - print("\n[+] Account Trichter:") - print(f" - SuperOffice Gesamt: {so_stats.get('total')}") - - if 'error' in ce_stats: - print(f" - Im Company Explorer: Error ({ce_stats['error']})") - else: - print(f" - Im Company Explorer: {ce_stats.get('total')}") - print(f" - In Bearbeitung: {ce_stats.get('in_progress')}") - - print("\n[+] Durchsatz (Letzte Stunde):") - print(f" - Verarbeitet: {throughput} Accounts") - print("\n") - - -if __name__ == "__main__": - checker = GtmHealthCheck() - checker.render_dashboard() diff --git a/transcription-tool/backend/app.py b/transcription-tool/backend/app.py index 162231fa..6890fc25 100644 --- a/transcription-tool/backend/app.py +++ b/transcription-tool/backend/app.py @@ -99,6 +99,31 @@ async def upload_audio( return meeting +@app.post("/api/meetings/{meeting_id}/retry") +def retry_meeting( + meeting_id: int, + background_tasks: BackgroundTasks, + db: Session = Depends(get_db) +): + meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first() + if not meeting: + raise HTTPException(404, detail="Meeting not found") + + # Check if chunks directory exists + chunk_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id)) + if not os.path.exists(chunk_dir) or not os.listdir(chunk_dir): + raise HTTPException(400, detail="Original audio chunks not found. Please re-upload.") + + # Reset status + meeting.status = "QUEUED" + db.commit() + + # Trigger Retry Task + from .services.orchestrator import retry_meeting_task + background_tasks.add_task(retry_meeting_task, meeting.id, SessionLocal) + + return {"status": "started", "message": "Retrying transcription..."} + from pydantic import BaseModel class InsightRequest(BaseModel): @@ -201,9 +226,16 @@ def delete_meeting(meeting_id: int, db: Session = Depends(get_db)): # Serve Frontend # This must be the last route definition to avoid catching API routes -static_path = "/frontend_static" + +# PRIORITY 1: Mounted Volume (Development / Live Update) +static_path = "/app/frontend/dist" + +# PRIORITY 2: Built-in Image Path (Production) +if not os.path.exists(static_path): + static_path = "/frontend_static" + +# PRIORITY 3: Local Development (running python directly) if not os.path.exists(static_path): - # Fallback for local development if not in Docker static_path = os.path.join(os.path.dirname(__file__), "../frontend/dist") if os.path.exists(static_path): diff --git a/transcription-tool/backend/services/orchestrator.py b/transcription-tool/backend/services/orchestrator.py index 4806febe..60ab336c 100644 --- a/transcription-tool/backend/services/orchestrator.py +++ b/transcription-tool/backend/services/orchestrator.py @@ -19,6 +19,16 @@ def parse_time_to_seconds(time_str): return 0 return 0 +def clean_json_string(text): + text = text.strip() + if text.startswith("```json"): + text = text[7:] + elif text.startswith("```"): + text = text[3:] + if text.endswith("```"): + text = text[:-3] + return text.strip() + def process_meeting_task(meeting_id: int, db_session_factory): db = db_session_factory() meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first() @@ -50,7 +60,13 @@ def process_meeting_task(meeting_id: int, db_session_factory): # Parse JSON and Adjust Timestamps json_data = [] try: - raw_json = json.loads(result["raw_text"]) + cleaned_text = clean_json_string(result["raw_text"]) + raw_json = json.loads(cleaned_text) + + # Check for wrapped structure (e.g. {"items": [...]}) if schema enforced it + if isinstance(raw_json, dict) and "items" in raw_json: + raw_json = raw_json["items"] # Extract inner list + if isinstance(raw_json, list): for entry in raw_json: seconds = parse_time_to_seconds(entry.get("time", "00:00")) @@ -63,7 +79,7 @@ def process_meeting_task(meeting_id: int, db_session_factory): entry["display_time"] = f"{h:02}:{m:02}:{s:02}" json_data.append(entry) except Exception as e: - logger.error(f"JSON Parsing failed for chunk {i}: {e}") + logger.error(f"JSON Parsing failed for chunk {i}: {e}. Raw text start: {result['raw_text'][:100]}") # Save chunk result db_chunk = TranscriptChunk( @@ -89,3 +105,94 @@ def process_meeting_task(meeting_id: int, db_session_factory): db.commit() finally: db.close() + +def retry_meeting_task(meeting_id: int, db_session_factory): + """ + Retries transcription using existing chunks on disk. + Avoids re-splitting the original file. + """ + db = db_session_factory() + meeting = db.query(Meeting).filter(Meeting.id == meeting_id).first() + if not meeting: + return + + try: + import os + transcriber = TranscriptionService() + + # 0. Validate Chunk Directory + chunk_dir = os.path.join(settings.UPLOAD_DIR, "chunks", str(meeting_id)) + if not os.path.exists(chunk_dir): + logger.error(f"Chunk directory not found for meeting {meeting_id}") + meeting.status = "ERROR" + db.commit() + return + + chunks = sorted([os.path.join(chunk_dir, f) for f in os.listdir(chunk_dir) if f.endswith(".mp3")]) + if not chunks: + logger.error(f"No chunks found for meeting {meeting_id}") + meeting.status = "ERROR" + db.commit() + return + + # Phase 1: Clear Old Chunks + meeting.status = "RETRYING" + db.query(TranscriptChunk).filter(TranscriptChunk.meeting_id == meeting_id).delete() + db.commit() + + # Phase 2: Transcribe + all_text = [] + for i, chunk_path in enumerate(chunks): + offset = i * settings.CHUNK_DURATION_SEC + logger.info(f"Retrying chunk {i+1}/{len(chunks)} with offset {offset}s") + + result = transcriber.transcribe_chunk(chunk_path, offset) + + # Parse JSON and Adjust Timestamps (Same logic as process_meeting_task) + json_data = [] + try: + # With response_schema, raw_text SHOULD be valid JSON directly + # But let's keep clean_json_string just in case specific models deviate + cleaned_text = clean_json_string(result["raw_text"]) + raw_json = json.loads(cleaned_text) + + # Check for wrapped structure (e.g. {"items": [...]}) if schema enforced it + if isinstance(raw_json, dict) and "items" in raw_json: + raw_json = raw_json["items"] # Extract inner list + + if isinstance(raw_json, list): + for entry in raw_json: + seconds = parse_time_to_seconds(entry.get("time", "00:00")) + absolute_seconds = seconds + offset + entry["absolute_seconds"] = absolute_seconds + + h = int(absolute_seconds // 3600) + m = int((absolute_seconds % 3600) // 60) + s = int(absolute_seconds % 60) + entry["display_time"] = f"{h:02}:{m:02}:{s:02}" + json_data.append(entry) + except Exception as e: + logger.error(f"JSON Parsing failed for chunk {i}: {e}. Raw: {result['raw_text'][:100]}") + + # Save chunk result + db_chunk = TranscriptChunk( + meeting_id=meeting.id, + chunk_index=i, + raw_text=result["raw_text"], + json_content=json_data + ) + db.add(db_chunk) + all_text.append(result["raw_text"]) + db.commit() + + # Phase 3: Finalize + meeting.status = "COMPLETED" + db.commit() + logger.info(f"Meeting {meeting.id} retry completed.") + + except Exception as e: + logger.error(f"Error retrying meeting {meeting_id}: {e}", exc_info=True) + meeting.status = "ERROR" + db.commit() + finally: + db.close() diff --git a/transcription-tool/backend/services/transcription_service.py b/transcription-tool/backend/services/transcription_service.py index f8c6e375..1775c4b7 100644 --- a/transcription-tool/backend/services/transcription_service.py +++ b/transcription-tool/backend/services/transcription_service.py @@ -30,20 +30,17 @@ class TranscriptionService: if media_file.state == "FAILED": raise Exception("File processing failed at Gemini.") - # 3. Transcribe with Diarization and Timestamps + # 3. Transcribe with Diarization and Timestamps (Plain Text Mode for Stability) prompt = """ Transkribiere dieses Audio wortgetreu. Identifiziere die Sprecher (Speaker A, Speaker B, etc.). - Gib das Ergebnis als JSON-Liste zurück. - Format: - [ - { - "time": "MM:SS", - "speaker": "Speaker A", - "text": "..." - } - ] + Gib das Ergebnis EXAKT in diesem Format zurück (pro Zeile ein Sprecherwechsel): + [MM:SS] Speaker Name: Gesprochener Text... + + Beispiel: + [00:00] Speaker A: Hallo zusammen. + [00:05] Speaker B: Guten Morgen. """ logger.info(f"Generating transcription for {file_path}...") @@ -52,14 +49,46 @@ class TranscriptionService: contents=[media_file, prompt], config=types.GenerateContentConfig( temperature=0.1, - response_mime_type="application/json" + max_output_tokens=8192 ) ) # Cleanup: Delete file from Gemini storage self.client.files.delete(name=media_file.name) - + + # Parse Plain Text to JSON + structured_data = self.parse_transcript(response.text) + import json return { - "raw_text": response.text, # This is now a JSON string + "raw_text": json.dumps(structured_data), # Return valid JSON string "offset": offset_seconds } + + def parse_transcript(self, text: str) -> list: + """ + Parses lines like '[00:12] Speaker A: Hello world' into structured JSON. + """ + import re + results = [] + # Regex to match: [MM:SS] Speaker: Text + # Flexible for MM:SS or H:MM:SS + pattern = re.compile(r"^\[(\d{1,2}:\d{2}(?::\d{2})?)\]\s*([^:]+):\s*(.+)$") + + for line in text.strip().split('\n'): + line = line.strip() + if not line: continue + + match = pattern.match(line) + if match: + time_str, speaker, content = match.groups() + results.append({ + "time": time_str, + "speaker": speaker.strip(), + "text": content.strip() + }) + else: + # Fallback: Append to previous if it looks like continuation + if results and not line.startswith("["): + results[-1]["text"] += " " + line + + return results diff --git a/transcription-tool/frontend/src/App.tsx b/transcription-tool/frontend/src/App.tsx index 863de20f..dd2d980a 100644 --- a/transcription-tool/frontend/src/App.tsx +++ b/transcription-tool/frontend/src/App.tsx @@ -394,6 +394,20 @@ export default function App() { > + diff --git a/uploads_audio/76900d3a-9d65-460e-ad9b-6553fbc1a6bc.m4a b/uploads_audio/7cc29087-842f-4b47-b2df-ce34f8395ad4.m4a similarity index 100% rename from uploads_audio/76900d3a-9d65-460e-ad9b-6553fbc1a6bc.m4a rename to uploads_audio/7cc29087-842f-4b47-b2df-ce34f8395ad4.m4a diff --git a/uploads_audio/chunks/6/chunk_000.mp3 b/uploads_audio/chunks/6/chunk_000.mp3 index e69de29b..73a881e2 100644 Binary files a/uploads_audio/chunks/6/chunk_000.mp3 and b/uploads_audio/chunks/6/chunk_000.mp3 differ diff --git a/uploads_audio/chunks/6/chunk_001.mp3 b/uploads_audio/chunks/6/chunk_001.mp3 new file mode 100644 index 00000000..54eeee19 Binary files /dev/null and b/uploads_audio/chunks/6/chunk_001.mp3 differ