138 lines
5.4 KiB
Python
138 lines
5.4 KiB
Python
import os
|
|
import argparse
|
|
from datetime import datetime
|
|
from dotenv import load_dotenv
|
|
import google.generativeai as genai
|
|
from PIL import Image
|
|
import io
|
|
|
|
# --- Configuration ---
|
|
load_dotenv()
|
|
API_KEY = os.getenv("GEMINI_API_KEY")
|
|
if not API_KEY or API_KEY == "YOUR_API_KEY":
|
|
print("🛑 Error: GEMINI_API_KEY not found or not set.")
|
|
print(" Please create a .env file and add your key: GEMINI_API_KEY='...'")
|
|
exit(1)
|
|
genai.configure(api_key=API_KEY)
|
|
|
|
# --- Main Functions ---
|
|
|
|
def get_image_from_path(path: str) -> Image:
|
|
"""Safely opens an image from a given path."""
|
|
try:
|
|
img = Image.open(path)
|
|
return img
|
|
except FileNotFoundError:
|
|
print(f"🛑 Error: The file '{path}' was not found.")
|
|
exit(1)
|
|
except Exception as e:
|
|
print(f"🛑 Error: Could not open or process the image at '{path}'. Reason: {e}")
|
|
exit(1)
|
|
|
|
def generate_mask(original_image: Image, description: str) -> Image:
|
|
"""
|
|
Generates a black and white mask for a subject in an image using the Gemini API.
|
|
"""
|
|
print(f"🤖 Generating mask for: '{description}'...")
|
|
model = genai.GenerativeModel('gemini-1.5-flash-latest')
|
|
prompt = f"""
|
|
Analyze the provided image to identify the subject described as: "{description}".
|
|
Your task is to create a new image based on this analysis.
|
|
In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
|
|
Every other part of the image, which is the background, MUST be solid black (#000000).
|
|
The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
|
|
"""
|
|
try:
|
|
response = model.generate_content([prompt, original_image])
|
|
# Assuming the API returns the image directly in the first part
|
|
img_data = response.parts[0].blob.data
|
|
mask = Image.open(io.BytesIO(img_data)).convert("L") # Convert to grayscale
|
|
return mask
|
|
except Exception as e:
|
|
print(f"🛑 Error during mask generation: {e}")
|
|
print(" The API might have blocked the request or returned an unexpected format.")
|
|
exit(1)
|
|
|
|
def apply_mask(original_image: Image, mask: Image) -> Image:
|
|
"""
|
|
Applies a mask to an image to create a new image with a transparent background.
|
|
"""
|
|
print(" applying mask...")
|
|
# Ensure the original image has an alpha channel
|
|
original_rgba = original_image.convert("RGBA")
|
|
# Resize mask to match original image if necessary
|
|
if original_rgba.size != mask.size:
|
|
mask = mask.resize(original_rgba.size, Image.LANCZOS)
|
|
|
|
original_rgba.putalpha(mask)
|
|
return original_rgba
|
|
|
|
def generate_final_image(image1: Image, image2: Image, prompt: str) -> Image:
|
|
"""
|
|
Generates the final composite image from two segmented images and a prompt.
|
|
"""
|
|
print("🎨 Generating the final masterpiece...")
|
|
model = genai.GenerativeModel('gemini-1.5-flash-latest')
|
|
|
|
full_prompt = f"""
|
|
Task: Create a new photorealistic 16:9 image by composing the subjects from the two provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
|
|
Instructions: {prompt}
|
|
"""
|
|
|
|
try:
|
|
response = model.generate_content([full_prompt, image1, image2])
|
|
img_data = response.parts[0].blob.data
|
|
final_image = Image.open(io.BytesIO(img_data))
|
|
return final_image
|
|
except Exception as e:
|
|
print(f"🛑 Error during final image generation: {e}")
|
|
exit(1)
|
|
|
|
def main():
|
|
"""Main CLI application logic."""
|
|
parser = argparse.ArgumentParser(description="Merge two images using AI based on a prompt.")
|
|
parser.add_argument("image1", help="Path to the first image file.")
|
|
parser.add_argument("image2", help="Path to the second image file.")
|
|
parser.add_argument("-o", "--output_dir", default="output", help="Directory to save the final image.")
|
|
args = parser.parse_args()
|
|
|
|
# 1. Load images
|
|
print("--- Step 1: Loading Images ---")
|
|
img1 = get_image_from_path(args.image1)
|
|
img2 = get_image_from_path(args.image2)
|
|
print(f"✅ Loaded '{args.image1}' and '{args.image2}'.\n")
|
|
|
|
# 2. Get descriptions and create segmented images
|
|
print("--- Step 2: Describing & Segmenting Subjects ---")
|
|
desc1 = input("➡️ Describe the main subject in the first image (e.g., 'the person on the left'): ")
|
|
mask1 = generate_mask(img1, desc1)
|
|
segmented1 = apply_mask(img1, mask1)
|
|
|
|
desc2 = input("➡️ Describe the main subject in the second image (e.g., 'the person with the hat'): ")
|
|
mask2 = generate_mask(img2, desc2)
|
|
segmented2 = apply_mask(img2, mask2)
|
|
print("✅ Subjects segmented.\n")
|
|
|
|
# 3. Get final prompt
|
|
print("--- Step 3: Final Composition ---")
|
|
final_prompt = input("➡️ Describe how to combine these subjects into a new scene: ")
|
|
print("✅ Prompt received.\n")
|
|
|
|
# 4. Generate final image
|
|
print("--- Step 4: Generating Final Image ---")
|
|
final_image = generate_final_image(segmented1, segmented2, final_prompt)
|
|
|
|
# 5. Save the result
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
output_filename = f"result_{timestamp}.png"
|
|
output_path = os.path.join(args.output_dir, output_filename)
|
|
|
|
try:
|
|
final_image.save(output_path)
|
|
print(f"\n🎉 Success! Your image has been saved to: {output_path}")
|
|
except Exception as e:
|
|
print(f"🛑 Error saving the final image: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|