Refactor GTM Architect to v2: Python-driven architecture, 9-phase process, new DB and Docker setup

2026-01-02 19:00:05 +00:00
parent 157858503e
commit 416cb28446
302 changed files with 68130 additions and 4782 deletions
--- a/k-pop-thumbnail-genie/services/geminiService.ts
+++ b/k-pop-thumbnail-genie/services/geminiService.ts
@@ -0,0 +1,202 @@
+import { GoogleGenAI, Modality } from "@google/genai";
+import { UploadedImage, ImageFile } from '../types';
+
+const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
+
+const fileToGenerativePart = async (file: File): Promise<ImageFile> => {
+  const base64EncodedDataPromise = new Promise<string>((resolve) => {
+    const reader = new FileReader();
+    reader.onloadend = () => resolve((reader.result as string).split(',')[1]);
+    reader.readAsDataURL(file);
+  });
+  return {
+    inlineData: {
+      data: await base64EncodedDataPromise,
+      mimeType: file.type,
+    },
+  };
+};
+
+const dataUrlToGenerativePart = (dataUrl: string): ImageFile => {
+    const [header, data] = dataUrl.split(',');
+    const mimeType = header.match(/:(.*?);/)?.[1] || 'image/png';
+    return {
+        inlineData: { data, mimeType }
+    };
+};
+
+const getErrorMessage = (error: unknown): string => {
+    if (error instanceof Error) {
+        return error.message;
+    }
+    return String(error);
+}
+
+export const segmentSubject = async (imageFile: File, description: string): Promise<string> => {
+    const imagePart = await fileToGenerativePart(imageFile);
+    // A more descriptive, less technical prompt to guide the AI model more reliably.
+    const prompt = `
+      Analyze the provided image to identify the subject described as: "${description}".
+      Your task is to create a new image based on this analysis.
+      In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
+      Every other part of the image, which is the background, MUST be solid black (#000000).
+      The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
+    `;
+    
+    try {
+        const response = await ai.models.generateContent({
+            model: 'gemini-2.5-flash-image',
+            contents: { parts: [imagePart, { text: prompt }] },
+            config: {
+                responseModalities: [Modality.IMAGE],
+            },
+        });
+
+        // Improved error detection: Check for blocking first.
+        if (response.promptFeedback?.blockReason) {
+            throw new Error(`Request blocked due to: ${response.promptFeedback.blockReason}.`);
+        }
+
+        const firstPart = response.candidates?.[0]?.content?.parts[0];
+        if (firstPart && firstPart.inlineData) {
+            return firstPart.inlineData.data;
+        } 
+        
+        // More detailed error reporting if no image is returned.
+        const fullResponseText = JSON.stringify(response, null, 2);
+        const textResponse = response.text?.trim();
+        if (textResponse) {
+             throw new Error(`The AI returned a message instead of a mask: "${textResponse}". Full API response: ${fullResponseText}`);
+        }
+        throw new Error(`No segmentation mask received from the AI. Full API response: ${fullResponseText}`);
+        
+    } catch (error) {
+        console.error("Error segmenting subject:", error);
+        throw new Error(`Failed to segment subject: ${getErrorMessage(error)}`);
+    }
+};
+
+
+export const expandPrompt = async (
+  scenario: string,
+  userInstruction: string,
+  images: UploadedImage[]
+): Promise<string> => {
+  const subjectDescriptions = images
+    .map((img, i) => `Person from Image ${i + 1}: ${img.subjectDescription}`)
+    .join('\n');
+
+  const systemInstruction = `You are a creative assistant specializing in writing detailed, effective prompts for an AI image generator. Your goal is to create a single, photorealistic, emotionally resonant 16:9 YouTube thumbnail in a K-Pop aesthetic. 
+  - Combine the user's chosen scenario, their specific instructions, and the descriptions of the people involved.
+  - The output must be a single, cohesive paragraph. Do not use lists or bullet points.
+  - Translate the user's simple instructions into a rich, detailed description for the AI. Describe the composition, camera angle, lighting, and mood.
+  - Emphasize achieving high facial fidelity to the described people. The final image should look like a real photograph or a high-quality still from a music video.
+  - Mention specific K-Pop aesthetic elements like soft, slightly dramatic lighting, a subtle bokeh effect for the background, and a focus on emotional expression.`;
+  
+  const userPrompt = `
+    Scenario: "${scenario}"
+    User Instruction: "${userInstruction}"
+    People to include:
+    ${subjectDescriptions}
+
+    Generate the master prompt based on this information.
+    `;
+
+  try {
+    const response = await ai.models.generateContent({
+        model: 'gemini-2.5-flash',
+        contents: [{ parts: [{ text: userPrompt }] }],
+        config: { systemInstruction: systemInstruction }
+    });
+    return response.text;
+  } catch (error) {
+    console.error("Error expanding prompt:", error);
+    throw new Error(`Failed to generate the master prompt: ${getErrorMessage(error)}`);
+  }
+};
+
+export const generateImage = async (
+  masterPrompt: string,
+  images: UploadedImage[]
+): Promise<string> => {
+  
+  // Now using pre-segmented images with transparent backgrounds
+  const imageParts = images.map(img => {
+      if (!img.segmentedDataUrl) throw new Error("Segmented image data is missing.");
+      return dataUrlToGenerativePart(img.segmentedDataUrl);
+  });
+
+  const fullPrompt = `Task: Create a new photorealistic 16:9 image by composing the subjects from the provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
+  Instructions: ${masterPrompt}`;
+
+  try {
+      const response = await ai.models.generateContent({
+        model: 'gemini-2.5-flash-image',
+        contents: {
+          parts: [
+            ...imageParts,
+            { text: fullPrompt },
+          ],
+        },
+        config: {
+          responseModalities: [Modality.IMAGE],
+        },
+      });
+      
+      const firstPart = response.candidates?.[0]?.content?.parts[0];
+      if (firstPart && firstPart.inlineData) {
+        return firstPart.inlineData.data;
+      } else {
+        const textResponse = response.text?.trim();
+        if (textResponse) {
+             throw new Error(`The AI failed to generate an image and returned a message: "${textResponse}"`);
+        }
+        throw new Error('No image data received from the AI.');
+      }
+  } catch (error) {
+    console.error("Error generating image:", error);
+    throw new Error(`Failed to generate the image: ${getErrorMessage(error)}`);
+  }
+};
+
+export const refineImage = async (
+  refinementPrompt: string,
+  base64Image: string
+): Promise<string> => {
+    
+  const imagePart = {
+    inlineData: {
+      data: base64Image,
+      mimeType: 'image/png',
+    },
+  };
+
+  try {
+    const response = await ai.models.generateContent({
+      model: 'gemini-2.5-flash-image',
+      contents: {
+        parts: [
+          imagePart,
+          { text: refinementPrompt },
+        ],
+      },
+      config: {
+        responseModalities: [Modality.IMAGE],
+      },
+    });
+
+    const firstPart = response.candidates?.[0]?.content?.parts[0];
+    if (firstPart && firstPart.inlineData) {
+        return firstPart.inlineData.data;
+    } else {
+        const textResponse = response.text?.trim();
+        if (textResponse) {
+             throw new Error(`The AI failed to refine the image and returned a message: "${textResponse}"`);
+        }
+        throw new Error('No refined image data received from the AI.');
+    }
+  } catch (error) {
+    console.error("Error refining image:", error);
+    throw new Error(`Failed to refine the image: ${getErrorMessage(error)}`);
+  }
+};