Refactor GTM Architect to v2: Python-driven architecture, 9-phase process, new DB and Docker setup
This commit is contained in:
202
k-pop-thumbnail-genie/services/geminiService.ts
Normal file
202
k-pop-thumbnail-genie/services/geminiService.ts
Normal file
@@ -0,0 +1,202 @@
|
||||
import { GoogleGenAI, Modality } from "@google/genai";
|
||||
import { UploadedImage, ImageFile } from '../types';
|
||||
|
||||
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
|
||||
|
||||
const fileToGenerativePart = async (file: File): Promise<ImageFile> => {
|
||||
const base64EncodedDataPromise = new Promise<string>((resolve) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve((reader.result as string).split(',')[1]);
|
||||
reader.readAsDataURL(file);
|
||||
});
|
||||
return {
|
||||
inlineData: {
|
||||
data: await base64EncodedDataPromise,
|
||||
mimeType: file.type,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
const dataUrlToGenerativePart = (dataUrl: string): ImageFile => {
|
||||
const [header, data] = dataUrl.split(',');
|
||||
const mimeType = header.match(/:(.*?);/)?.[1] || 'image/png';
|
||||
return {
|
||||
inlineData: { data, mimeType }
|
||||
};
|
||||
};
|
||||
|
||||
const getErrorMessage = (error: unknown): string => {
|
||||
if (error instanceof Error) {
|
||||
return error.message;
|
||||
}
|
||||
return String(error);
|
||||
}
|
||||
|
||||
export const segmentSubject = async (imageFile: File, description: string): Promise<string> => {
|
||||
const imagePart = await fileToGenerativePart(imageFile);
|
||||
// A more descriptive, less technical prompt to guide the AI model more reliably.
|
||||
const prompt = `
|
||||
Analyze the provided image to identify the subject described as: "${description}".
|
||||
Your task is to create a new image based on this analysis.
|
||||
In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
|
||||
Every other part of the image, which is the background, MUST be solid black (#000000).
|
||||
The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
|
||||
`;
|
||||
|
||||
try {
|
||||
const response = await ai.models.generateContent({
|
||||
model: 'gemini-2.5-flash-image',
|
||||
contents: { parts: [imagePart, { text: prompt }] },
|
||||
config: {
|
||||
responseModalities: [Modality.IMAGE],
|
||||
},
|
||||
});
|
||||
|
||||
// Improved error detection: Check for blocking first.
|
||||
if (response.promptFeedback?.blockReason) {
|
||||
throw new Error(`Request blocked due to: ${response.promptFeedback.blockReason}.`);
|
||||
}
|
||||
|
||||
const firstPart = response.candidates?.[0]?.content?.parts[0];
|
||||
if (firstPart && firstPart.inlineData) {
|
||||
return firstPart.inlineData.data;
|
||||
}
|
||||
|
||||
// More detailed error reporting if no image is returned.
|
||||
const fullResponseText = JSON.stringify(response, null, 2);
|
||||
const textResponse = response.text?.trim();
|
||||
if (textResponse) {
|
||||
throw new Error(`The AI returned a message instead of a mask: "${textResponse}". Full API response: ${fullResponseText}`);
|
||||
}
|
||||
throw new Error(`No segmentation mask received from the AI. Full API response: ${fullResponseText}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error("Error segmenting subject:", error);
|
||||
throw new Error(`Failed to segment subject: ${getErrorMessage(error)}`);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
export const expandPrompt = async (
|
||||
scenario: string,
|
||||
userInstruction: string,
|
||||
images: UploadedImage[]
|
||||
): Promise<string> => {
|
||||
const subjectDescriptions = images
|
||||
.map((img, i) => `Person from Image ${i + 1}: ${img.subjectDescription}`)
|
||||
.join('\n');
|
||||
|
||||
const systemInstruction = `You are a creative assistant specializing in writing detailed, effective prompts for an AI image generator. Your goal is to create a single, photorealistic, emotionally resonant 16:9 YouTube thumbnail in a K-Pop aesthetic.
|
||||
- Combine the user's chosen scenario, their specific instructions, and the descriptions of the people involved.
|
||||
- The output must be a single, cohesive paragraph. Do not use lists or bullet points.
|
||||
- Translate the user's simple instructions into a rich, detailed description for the AI. Describe the composition, camera angle, lighting, and mood.
|
||||
- Emphasize achieving high facial fidelity to the described people. The final image should look like a real photograph or a high-quality still from a music video.
|
||||
- Mention specific K-Pop aesthetic elements like soft, slightly dramatic lighting, a subtle bokeh effect for the background, and a focus on emotional expression.`;
|
||||
|
||||
const userPrompt = `
|
||||
Scenario: "${scenario}"
|
||||
User Instruction: "${userInstruction}"
|
||||
People to include:
|
||||
${subjectDescriptions}
|
||||
|
||||
Generate the master prompt based on this information.
|
||||
`;
|
||||
|
||||
try {
|
||||
const response = await ai.models.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [{ parts: [{ text: userPrompt }] }],
|
||||
config: { systemInstruction: systemInstruction }
|
||||
});
|
||||
return response.text;
|
||||
} catch (error) {
|
||||
console.error("Error expanding prompt:", error);
|
||||
throw new Error(`Failed to generate the master prompt: ${getErrorMessage(error)}`);
|
||||
}
|
||||
};
|
||||
|
||||
export const generateImage = async (
|
||||
masterPrompt: string,
|
||||
images: UploadedImage[]
|
||||
): Promise<string> => {
|
||||
|
||||
// Now using pre-segmented images with transparent backgrounds
|
||||
const imageParts = images.map(img => {
|
||||
if (!img.segmentedDataUrl) throw new Error("Segmented image data is missing.");
|
||||
return dataUrlToGenerativePart(img.segmentedDataUrl);
|
||||
});
|
||||
|
||||
const fullPrompt = `Task: Create a new photorealistic 16:9 image by composing the subjects from the provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
|
||||
Instructions: ${masterPrompt}`;
|
||||
|
||||
try {
|
||||
const response = await ai.models.generateContent({
|
||||
model: 'gemini-2.5-flash-image',
|
||||
contents: {
|
||||
parts: [
|
||||
...imageParts,
|
||||
{ text: fullPrompt },
|
||||
],
|
||||
},
|
||||
config: {
|
||||
responseModalities: [Modality.IMAGE],
|
||||
},
|
||||
});
|
||||
|
||||
const firstPart = response.candidates?.[0]?.content?.parts[0];
|
||||
if (firstPart && firstPart.inlineData) {
|
||||
return firstPart.inlineData.data;
|
||||
} else {
|
||||
const textResponse = response.text?.trim();
|
||||
if (textResponse) {
|
||||
throw new Error(`The AI failed to generate an image and returned a message: "${textResponse}"`);
|
||||
}
|
||||
throw new Error('No image data received from the AI.');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error generating image:", error);
|
||||
throw new Error(`Failed to generate the image: ${getErrorMessage(error)}`);
|
||||
}
|
||||
};
|
||||
|
||||
export const refineImage = async (
|
||||
refinementPrompt: string,
|
||||
base64Image: string
|
||||
): Promise<string> => {
|
||||
|
||||
const imagePart = {
|
||||
inlineData: {
|
||||
data: base64Image,
|
||||
mimeType: 'image/png',
|
||||
},
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await ai.models.generateContent({
|
||||
model: 'gemini-2.5-flash-image',
|
||||
contents: {
|
||||
parts: [
|
||||
imagePart,
|
||||
{ text: refinementPrompt },
|
||||
],
|
||||
},
|
||||
config: {
|
||||
responseModalities: [Modality.IMAGE],
|
||||
},
|
||||
});
|
||||
|
||||
const firstPart = response.candidates?.[0]?.content?.parts[0];
|
||||
if (firstPart && firstPart.inlineData) {
|
||||
return firstPart.inlineData.data;
|
||||
} else {
|
||||
const textResponse = response.text?.trim();
|
||||
if (textResponse) {
|
||||
throw new Error(`The AI failed to refine the image and returned a message: "${textResponse}"`);
|
||||
}
|
||||
throw new Error('No refined image data received from the AI.');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error refining image:", error);
|
||||
throw new Error(`Failed to refine the image: ${getErrorMessage(error)}`);
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user