import { GoogleGenAI, Modality } from "@google/genai"; import { UploadedImage, ImageFile } from '../types'; const ai = new GoogleGenAI({ apiKey: process.env.API_KEY }); const fileToGenerativePart = async (file: File): Promise => { const base64EncodedDataPromise = new Promise((resolve) => { const reader = new FileReader(); reader.onloadend = () => resolve((reader.result as string).split(',')[1]); reader.readAsDataURL(file); }); return { inlineData: { data: await base64EncodedDataPromise, mimeType: file.type, }, }; }; const dataUrlToGenerativePart = (dataUrl: string): ImageFile => { const [header, data] = dataUrl.split(','); const mimeType = header.match(/:(.*?);/)?.[1] || 'image/png'; return { inlineData: { data, mimeType } }; }; const getErrorMessage = (error: unknown): string => { if (error instanceof Error) { return error.message; } return String(error); } export const segmentSubject = async (imageFile: File, description: string): Promise => { const imagePart = await fileToGenerativePart(imageFile); // A more descriptive, less technical prompt to guide the AI model more reliably. const prompt = ` Analyze the provided image to identify the subject described as: "${description}". Your task is to create a new image based on this analysis. In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF). Every other part of the image, which is the background, MUST be solid black (#000000). The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response. `; try { const response = await ai.models.generateContent({ model: 'gemini-2.5-flash-image', contents: { parts: [imagePart, { text: prompt }] }, config: { responseModalities: [Modality.IMAGE], }, }); // Improved error detection: Check for blocking first. if (response.promptFeedback?.blockReason) { throw new Error(`Request blocked due to: ${response.promptFeedback.blockReason}.`); } const firstPart = response.candidates?.[0]?.content?.parts[0]; if (firstPart && firstPart.inlineData) { return firstPart.inlineData.data; } // More detailed error reporting if no image is returned. const fullResponseText = JSON.stringify(response, null, 2); const textResponse = response.text?.trim(); if (textResponse) { throw new Error(`The AI returned a message instead of a mask: "${textResponse}". Full API response: ${fullResponseText}`); } throw new Error(`No segmentation mask received from the AI. Full API response: ${fullResponseText}`); } catch (error) { console.error("Error segmenting subject:", error); throw new Error(`Failed to segment subject: ${getErrorMessage(error)}`); } }; export const expandPrompt = async ( scenario: string, userInstruction: string, images: UploadedImage[] ): Promise => { const subjectDescriptions = images .map((img, i) => `Person from Image ${i + 1}: ${img.subjectDescription}`) .join('\n'); const systemInstruction = `You are a creative assistant specializing in writing detailed, effective prompts for an AI image generator. Your goal is to create a single, photorealistic, emotionally resonant 16:9 YouTube thumbnail in a K-Pop aesthetic. - Combine the user's chosen scenario, their specific instructions, and the descriptions of the people involved. - The output must be a single, cohesive paragraph. Do not use lists or bullet points. - Translate the user's simple instructions into a rich, detailed description for the AI. Describe the composition, camera angle, lighting, and mood. - Emphasize achieving high facial fidelity to the described people. The final image should look like a real photograph or a high-quality still from a music video. - Mention specific K-Pop aesthetic elements like soft, slightly dramatic lighting, a subtle bokeh effect for the background, and a focus on emotional expression.`; const userPrompt = ` Scenario: "${scenario}" User Instruction: "${userInstruction}" People to include: ${subjectDescriptions} Generate the master prompt based on this information. `; try { const response = await ai.models.generateContent({ model: 'gemini-2.5-flash', contents: [{ parts: [{ text: userPrompt }] }], config: { systemInstruction: systemInstruction } }); return response.text; } catch (error) { console.error("Error expanding prompt:", error); throw new Error(`Failed to generate the master prompt: ${getErrorMessage(error)}`); } }; export const generateImage = async ( masterPrompt: string, images: UploadedImage[] ): Promise => { // Now using pre-segmented images with transparent backgrounds const imageParts = images.map(img => { if (!img.segmentedDataUrl) throw new Error("Segmented image data is missing."); return dataUrlToGenerativePart(img.segmentedDataUrl); }); const fullPrompt = `Task: Create a new photorealistic 16:9 image by composing the subjects from the provided images into a new scene. The subjects are provided as separate images with transparent backgrounds. Instructions: ${masterPrompt}`; try { const response = await ai.models.generateContent({ model: 'gemini-2.5-flash-image', contents: { parts: [ ...imageParts, { text: fullPrompt }, ], }, config: { responseModalities: [Modality.IMAGE], }, }); const firstPart = response.candidates?.[0]?.content?.parts[0]; if (firstPart && firstPart.inlineData) { return firstPart.inlineData.data; } else { const textResponse = response.text?.trim(); if (textResponse) { throw new Error(`The AI failed to generate an image and returned a message: "${textResponse}"`); } throw new Error('No image data received from the AI.'); } } catch (error) { console.error("Error generating image:", error); throw new Error(`Failed to generate the image: ${getErrorMessage(error)}`); } }; export const refineImage = async ( refinementPrompt: string, base64Image: string ): Promise => { const imagePart = { inlineData: { data: base64Image, mimeType: 'image/png', }, }; try { const response = await ai.models.generateContent({ model: 'gemini-2.5-flash-image', contents: { parts: [ imagePart, { text: refinementPrompt }, ], }, config: { responseModalities: [Modality.IMAGE], }, }); const firstPart = response.candidates?.[0]?.content?.parts[0]; if (firstPart && firstPart.inlineData) { return firstPart.inlineData.data; } else { const textResponse = response.text?.trim(); if (textResponse) { throw new Error(`The AI failed to refine the image and returned a message: "${textResponse}"`); } throw new Error('No refined image data received from the AI.'); } } catch (error) { console.error("Error refining image:", error); throw new Error(`Failed to refine the image: ${getErrorMessage(error)}`); } };