Brancheneinstufung2/k-pop-thumbnail-genie/services/geminiService.ts

import { GoogleGenAI, Modality } from "@google/genai";
import { UploadedImage, ImageFile } from '../types';

const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });

const fileToGenerativePart = async (file: File): Promise<ImageFile> => {
  const base64EncodedDataPromise = new Promise<string>((resolve) => {
    const reader = new FileReader();
    reader.onloadend = () => resolve((reader.result as string).split(',')[1]);
    reader.readAsDataURL(file);
  });
  return {
    inlineData: {
      data: await base64EncodedDataPromise,
      mimeType: file.type,
    },
  };
};

const dataUrlToGenerativePart = (dataUrl: string): ImageFile => {
    const [header, data] = dataUrl.split(',');
    const mimeType = header.match(/:(.*?);/)?.[1] || 'image/png';
    return {
        inlineData: { data, mimeType }
    };
};

const getErrorMessage = (error: unknown): string => {
    if (error instanceof Error) {
        return error.message;
    }
    return String(error);
}

export const segmentSubject = async (imageFile: File, description: string): Promise<string> => {
    const imagePart = await fileToGenerativePart(imageFile);
    // A more descriptive, less technical prompt to guide the AI model more reliably.
    const prompt = `
      Analyze the provided image to identify the subject described as: "${description}".
      Your task is to create a new image based on this analysis.
      In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
      Every other part of the image, which is the background, MUST be solid black (#000000).
      The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
    `;

    try {
        const response = await ai.models.generateContent({
            model: 'gemini-2.5-flash-image',
            contents: { parts: [imagePart, { text: prompt }] },
            config: {
                responseModalities: [Modality.IMAGE],
            },
        });

        // Improved error detection: Check for blocking first.
        if (response.promptFeedback?.blockReason) {
            throw new Error(`Request blocked due to: ${response.promptFeedback.blockReason}.`);
        }

        const firstPart = response.candidates?.[0]?.content?.parts[0];
        if (firstPart && firstPart.inlineData) {
            return firstPart.inlineData.data;
        }

        // More detailed error reporting if no image is returned.
        const fullResponseText = JSON.stringify(response, null, 2);
        const textResponse = response.text?.trim();
        if (textResponse) {
             throw new Error(`The AI returned a message instead of a mask: "${textResponse}". Full API response: ${fullResponseText}`);
        }
        throw new Error(`No segmentation mask received from the AI. Full API response: ${fullResponseText}`);

    } catch (error) {
        console.error("Error segmenting subject:", error);
        throw new Error(`Failed to segment subject: ${getErrorMessage(error)}`);
    }
};


export const expandPrompt = async (
  scenario: string,
  userInstruction: string,
  images: UploadedImage[]
): Promise<string> => {
  const subjectDescriptions = images
    .map((img, i) => `Person from Image ${i + 1}: ${img.subjectDescription}`)
    .join('\n');

  const systemInstruction = `You are a creative assistant specializing in writing detailed, effective prompts for an AI image generator. Your goal is to create a single, photorealistic, emotionally resonant 16:9 YouTube thumbnail in a K-Pop aesthetic.
  - Combine the user's chosen scenario, their specific instructions, and the descriptions of the people involved.
  - The output must be a single, cohesive paragraph. Do not use lists or bullet points.
  - Translate the user's simple instructions into a rich, detailed description for the AI. Describe the composition, camera angle, lighting, and mood.
  - Emphasize achieving high facial fidelity to the described people. The final image should look like a real photograph or a high-quality still from a music video.
  - Mention specific K-Pop aesthetic elements like soft, slightly dramatic lighting, a subtle bokeh effect for the background, and a focus on emotional expression.`;

  const userPrompt = `
    Scenario: "${scenario}"
    User Instruction: "${userInstruction}"
    People to include:
    ${subjectDescriptions}

    Generate the master prompt based on this information.
    `;

  try {
    const response = await ai.models.generateContent({
        model: 'gemini-2.5-flash',
        contents: [{ parts: [{ text: userPrompt }] }],
        config: { systemInstruction: systemInstruction }
    });
    return response.text;
  } catch (error) {
    console.error("Error expanding prompt:", error);
    throw new Error(`Failed to generate the master prompt: ${getErrorMessage(error)}`);
  }
};

export const generateImage = async (
  masterPrompt: string,
  images: UploadedImage[]
): Promise<string> => {

  // Now using pre-segmented images with transparent backgrounds
  const imageParts = images.map(img => {
      if (!img.segmentedDataUrl) throw new Error("Segmented image data is missing.");
      return dataUrlToGenerativePart(img.segmentedDataUrl);
  });

  const fullPrompt = `Task: Create a new photorealistic 16:9 image by composing the subjects from the provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
  Instructions: ${masterPrompt}`;

  try {
      const response = await ai.models.generateContent({
        model: 'gemini-2.5-flash-image',
        contents: {
          parts: [
            ...imageParts,
            { text: fullPrompt },
          ],
        },
        config: {
          responseModalities: [Modality.IMAGE],
        },
      });

      const firstPart = response.candidates?.[0]?.content?.parts[0];
      if (firstPart && firstPart.inlineData) {
        return firstPart.inlineData.data;
      } else {
        const textResponse = response.text?.trim();
        if (textResponse) {
             throw new Error(`The AI failed to generate an image and returned a message: "${textResponse}"`);
        }
        throw new Error('No image data received from the AI.');
      }
  } catch (error) {
    console.error("Error generating image:", error);
    throw new Error(`Failed to generate the image: ${getErrorMessage(error)}`);
  }
};

export const refineImage = async (
  refinementPrompt: string,
  base64Image: string
): Promise<string> => {

  const imagePart = {
    inlineData: {
      data: base64Image,
      mimeType: 'image/png',
    },
  };

  try {
    const response = await ai.models.generateContent({
      model: 'gemini-2.5-flash-image',
      contents: {
        parts: [
          imagePart,
          { text: refinementPrompt },
        ],
      },
      config: {
        responseModalities: [Modality.IMAGE],
      },
    });

    const firstPart = response.candidates?.[0]?.content?.parts[0];
    if (firstPart && firstPart.inlineData) {
        return firstPart.inlineData.data;
    } else {
        const textResponse = response.text?.trim();
        if (textResponse) {
             throw new Error(`The AI failed to refine the image and returned a message: "${textResponse}"`);
        }
        throw new Error('No refined image data received from the AI.');
    }
  } catch (error) {
    console.error("Error refining image:", error);
    throw new Error(`Failed to refine the image: ${getErrorMessage(error)}`);
  }
};