Files
Brancheneinstufung2/k-pop-thumbnail-genie/services/geminiService.ts

202 lines
7.4 KiB
TypeScript

import { GoogleGenAI, Modality } from "@google/genai";
import { UploadedImage, ImageFile } from '../types';
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
const fileToGenerativePart = async (file: File): Promise<ImageFile> => {
const base64EncodedDataPromise = new Promise<string>((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve((reader.result as string).split(',')[1]);
reader.readAsDataURL(file);
});
return {
inlineData: {
data: await base64EncodedDataPromise,
mimeType: file.type,
},
};
};
const dataUrlToGenerativePart = (dataUrl: string): ImageFile => {
const [header, data] = dataUrl.split(',');
const mimeType = header.match(/:(.*?);/)?.[1] || 'image/png';
return {
inlineData: { data, mimeType }
};
};
const getErrorMessage = (error: unknown): string => {
if (error instanceof Error) {
return error.message;
}
return String(error);
}
export const segmentSubject = async (imageFile: File, description: string): Promise<string> => {
const imagePart = await fileToGenerativePart(imageFile);
// A more descriptive, less technical prompt to guide the AI model more reliably.
const prompt = `
Analyze the provided image to identify the subject described as: "${description}".
Your task is to create a new image based on this analysis.
In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
Every other part of the image, which is the background, MUST be solid black (#000000).
The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
`;
try {
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash-image',
contents: { parts: [imagePart, { text: prompt }] },
config: {
responseModalities: [Modality.IMAGE],
},
});
// Improved error detection: Check for blocking first.
if (response.promptFeedback?.blockReason) {
throw new Error(`Request blocked due to: ${response.promptFeedback.blockReason}.`);
}
const firstPart = response.candidates?.[0]?.content?.parts[0];
if (firstPart && firstPart.inlineData) {
return firstPart.inlineData.data;
}
// More detailed error reporting if no image is returned.
const fullResponseText = JSON.stringify(response, null, 2);
const textResponse = response.text?.trim();
if (textResponse) {
throw new Error(`The AI returned a message instead of a mask: "${textResponse}". Full API response: ${fullResponseText}`);
}
throw new Error(`No segmentation mask received from the AI. Full API response: ${fullResponseText}`);
} catch (error) {
console.error("Error segmenting subject:", error);
throw new Error(`Failed to segment subject: ${getErrorMessage(error)}`);
}
};
export const expandPrompt = async (
scenario: string,
userInstruction: string,
images: UploadedImage[]
): Promise<string> => {
const subjectDescriptions = images
.map((img, i) => `Person from Image ${i + 1}: ${img.subjectDescription}`)
.join('\n');
const systemInstruction = `You are a creative assistant specializing in writing detailed, effective prompts for an AI image generator. Your goal is to create a single, photorealistic, emotionally resonant 16:9 YouTube thumbnail in a K-Pop aesthetic.
- Combine the user's chosen scenario, their specific instructions, and the descriptions of the people involved.
- The output must be a single, cohesive paragraph. Do not use lists or bullet points.
- Translate the user's simple instructions into a rich, detailed description for the AI. Describe the composition, camera angle, lighting, and mood.
- Emphasize achieving high facial fidelity to the described people. The final image should look like a real photograph or a high-quality still from a music video.
- Mention specific K-Pop aesthetic elements like soft, slightly dramatic lighting, a subtle bokeh effect for the background, and a focus on emotional expression.`;
const userPrompt = `
Scenario: "${scenario}"
User Instruction: "${userInstruction}"
People to include:
${subjectDescriptions}
Generate the master prompt based on this information.
`;
try {
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [{ parts: [{ text: userPrompt }] }],
config: { systemInstruction: systemInstruction }
});
return response.text;
} catch (error) {
console.error("Error expanding prompt:", error);
throw new Error(`Failed to generate the master prompt: ${getErrorMessage(error)}`);
}
};
export const generateImage = async (
masterPrompt: string,
images: UploadedImage[]
): Promise<string> => {
// Now using pre-segmented images with transparent backgrounds
const imageParts = images.map(img => {
if (!img.segmentedDataUrl) throw new Error("Segmented image data is missing.");
return dataUrlToGenerativePart(img.segmentedDataUrl);
});
const fullPrompt = `Task: Create a new photorealistic 16:9 image by composing the subjects from the provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
Instructions: ${masterPrompt}`;
try {
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash-image',
contents: {
parts: [
...imageParts,
{ text: fullPrompt },
],
},
config: {
responseModalities: [Modality.IMAGE],
},
});
const firstPart = response.candidates?.[0]?.content?.parts[0];
if (firstPart && firstPart.inlineData) {
return firstPart.inlineData.data;
} else {
const textResponse = response.text?.trim();
if (textResponse) {
throw new Error(`The AI failed to generate an image and returned a message: "${textResponse}"`);
}
throw new Error('No image data received from the AI.');
}
} catch (error) {
console.error("Error generating image:", error);
throw new Error(`Failed to generate the image: ${getErrorMessage(error)}`);
}
};
export const refineImage = async (
refinementPrompt: string,
base64Image: string
): Promise<string> => {
const imagePart = {
inlineData: {
data: base64Image,
mimeType: 'image/png',
},
};
try {
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash-image',
contents: {
parts: [
imagePart,
{ text: refinementPrompt },
],
},
config: {
responseModalities: [Modality.IMAGE],
},
});
const firstPart = response.candidates?.[0]?.content?.parts[0];
if (firstPart && firstPart.inlineData) {
return firstPart.inlineData.data;
} else {
const textResponse = response.text?.trim();
if (textResponse) {
throw new Error(`The AI failed to refine the image and returned a message: "${textResponse}"`);
}
throw new Error('No refined image data received from the AI.');
}
} catch (error) {
console.error("Error refining image:", error);
throw new Error(`Failed to refine the image: ${getErrorMessage(error)}`);
}
};