202 lines
7.4 KiB
TypeScript
202 lines
7.4 KiB
TypeScript
import { GoogleGenAI, Modality } from "@google/genai";
|
|
import { UploadedImage, ImageFile } from '../types';
|
|
|
|
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
|
|
|
|
const fileToGenerativePart = async (file: File): Promise<ImageFile> => {
|
|
const base64EncodedDataPromise = new Promise<string>((resolve) => {
|
|
const reader = new FileReader();
|
|
reader.onloadend = () => resolve((reader.result as string).split(',')[1]);
|
|
reader.readAsDataURL(file);
|
|
});
|
|
return {
|
|
inlineData: {
|
|
data: await base64EncodedDataPromise,
|
|
mimeType: file.type,
|
|
},
|
|
};
|
|
};
|
|
|
|
const dataUrlToGenerativePart = (dataUrl: string): ImageFile => {
|
|
const [header, data] = dataUrl.split(',');
|
|
const mimeType = header.match(/:(.*?);/)?.[1] || 'image/png';
|
|
return {
|
|
inlineData: { data, mimeType }
|
|
};
|
|
};
|
|
|
|
const getErrorMessage = (error: unknown): string => {
|
|
if (error instanceof Error) {
|
|
return error.message;
|
|
}
|
|
return String(error);
|
|
}
|
|
|
|
export const segmentSubject = async (imageFile: File, description: string): Promise<string> => {
|
|
const imagePart = await fileToGenerativePart(imageFile);
|
|
// A more descriptive, less technical prompt to guide the AI model more reliably.
|
|
const prompt = `
|
|
Analyze the provided image to identify the subject described as: "${description}".
|
|
Your task is to create a new image based on this analysis.
|
|
In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
|
|
Every other part of the image, which is the background, MUST be solid black (#000000).
|
|
The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
|
|
`;
|
|
|
|
try {
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-flash-image',
|
|
contents: { parts: [imagePart, { text: prompt }] },
|
|
config: {
|
|
responseModalities: [Modality.IMAGE],
|
|
},
|
|
});
|
|
|
|
// Improved error detection: Check for blocking first.
|
|
if (response.promptFeedback?.blockReason) {
|
|
throw new Error(`Request blocked due to: ${response.promptFeedback.blockReason}.`);
|
|
}
|
|
|
|
const firstPart = response.candidates?.[0]?.content?.parts[0];
|
|
if (firstPart && firstPart.inlineData) {
|
|
return firstPart.inlineData.data;
|
|
}
|
|
|
|
// More detailed error reporting if no image is returned.
|
|
const fullResponseText = JSON.stringify(response, null, 2);
|
|
const textResponse = response.text?.trim();
|
|
if (textResponse) {
|
|
throw new Error(`The AI returned a message instead of a mask: "${textResponse}". Full API response: ${fullResponseText}`);
|
|
}
|
|
throw new Error(`No segmentation mask received from the AI. Full API response: ${fullResponseText}`);
|
|
|
|
} catch (error) {
|
|
console.error("Error segmenting subject:", error);
|
|
throw new Error(`Failed to segment subject: ${getErrorMessage(error)}`);
|
|
}
|
|
};
|
|
|
|
|
|
export const expandPrompt = async (
|
|
scenario: string,
|
|
userInstruction: string,
|
|
images: UploadedImage[]
|
|
): Promise<string> => {
|
|
const subjectDescriptions = images
|
|
.map((img, i) => `Person from Image ${i + 1}: ${img.subjectDescription}`)
|
|
.join('\n');
|
|
|
|
const systemInstruction = `You are a creative assistant specializing in writing detailed, effective prompts for an AI image generator. Your goal is to create a single, photorealistic, emotionally resonant 16:9 YouTube thumbnail in a K-Pop aesthetic.
|
|
- Combine the user's chosen scenario, their specific instructions, and the descriptions of the people involved.
|
|
- The output must be a single, cohesive paragraph. Do not use lists or bullet points.
|
|
- Translate the user's simple instructions into a rich, detailed description for the AI. Describe the composition, camera angle, lighting, and mood.
|
|
- Emphasize achieving high facial fidelity to the described people. The final image should look like a real photograph or a high-quality still from a music video.
|
|
- Mention specific K-Pop aesthetic elements like soft, slightly dramatic lighting, a subtle bokeh effect for the background, and a focus on emotional expression.`;
|
|
|
|
const userPrompt = `
|
|
Scenario: "${scenario}"
|
|
User Instruction: "${userInstruction}"
|
|
People to include:
|
|
${subjectDescriptions}
|
|
|
|
Generate the master prompt based on this information.
|
|
`;
|
|
|
|
try {
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-flash',
|
|
contents: [{ parts: [{ text: userPrompt }] }],
|
|
config: { systemInstruction: systemInstruction }
|
|
});
|
|
return response.text;
|
|
} catch (error) {
|
|
console.error("Error expanding prompt:", error);
|
|
throw new Error(`Failed to generate the master prompt: ${getErrorMessage(error)}`);
|
|
}
|
|
};
|
|
|
|
export const generateImage = async (
|
|
masterPrompt: string,
|
|
images: UploadedImage[]
|
|
): Promise<string> => {
|
|
|
|
// Now using pre-segmented images with transparent backgrounds
|
|
const imageParts = images.map(img => {
|
|
if (!img.segmentedDataUrl) throw new Error("Segmented image data is missing.");
|
|
return dataUrlToGenerativePart(img.segmentedDataUrl);
|
|
});
|
|
|
|
const fullPrompt = `Task: Create a new photorealistic 16:9 image by composing the subjects from the provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
|
|
Instructions: ${masterPrompt}`;
|
|
|
|
try {
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-flash-image',
|
|
contents: {
|
|
parts: [
|
|
...imageParts,
|
|
{ text: fullPrompt },
|
|
],
|
|
},
|
|
config: {
|
|
responseModalities: [Modality.IMAGE],
|
|
},
|
|
});
|
|
|
|
const firstPart = response.candidates?.[0]?.content?.parts[0];
|
|
if (firstPart && firstPart.inlineData) {
|
|
return firstPart.inlineData.data;
|
|
} else {
|
|
const textResponse = response.text?.trim();
|
|
if (textResponse) {
|
|
throw new Error(`The AI failed to generate an image and returned a message: "${textResponse}"`);
|
|
}
|
|
throw new Error('No image data received from the AI.');
|
|
}
|
|
} catch (error) {
|
|
console.error("Error generating image:", error);
|
|
throw new Error(`Failed to generate the image: ${getErrorMessage(error)}`);
|
|
}
|
|
};
|
|
|
|
export const refineImage = async (
|
|
refinementPrompt: string,
|
|
base64Image: string
|
|
): Promise<string> => {
|
|
|
|
const imagePart = {
|
|
inlineData: {
|
|
data: base64Image,
|
|
mimeType: 'image/png',
|
|
},
|
|
};
|
|
|
|
try {
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-flash-image',
|
|
contents: {
|
|
parts: [
|
|
imagePart,
|
|
{ text: refinementPrompt },
|
|
],
|
|
},
|
|
config: {
|
|
responseModalities: [Modality.IMAGE],
|
|
},
|
|
});
|
|
|
|
const firstPart = response.candidates?.[0]?.content?.parts[0];
|
|
if (firstPart && firstPart.inlineData) {
|
|
return firstPart.inlineData.data;
|
|
} else {
|
|
const textResponse = response.text?.trim();
|
|
if (textResponse) {
|
|
throw new Error(`The AI failed to refine the image and returned a message: "${textResponse}"`);
|
|
}
|
|
throw new Error('No refined image data received from the AI.');
|
|
}
|
|
} catch (error) {
|
|
console.error("Error refining image:", error);
|
|
throw new Error(`Failed to refine the image: ${getErrorMessage(error)}`);
|
|
}
|
|
}; |