import os import argparse from datetime import datetime from dotenv import load_dotenv import google.generativeai as genai from PIL import Image import io # --- Configuration --- load_dotenv() API_KEY = os.getenv("GEMINI_API_KEY") if not API_KEY or API_KEY == "YOUR_API_KEY": print("šŸ›‘ Error: GEMINI_API_KEY not found or not set.") print(" Please create a .env file and add your key: GEMINI_API_KEY='...'") exit(1) genai.configure(api_key=API_KEY) # --- Main Functions --- def get_image_from_path(path: str) -> Image: """Safely opens an image from a given path.""" try: img = Image.open(path) return img except FileNotFoundError: print(f"šŸ›‘ Error: The file '{path}' was not found.") exit(1) except Exception as e: print(f"šŸ›‘ Error: Could not open or process the image at '{path}'. Reason: {e}") exit(1) def generate_mask(original_image: Image, description: str) -> Image: """ Generates a black and white mask for a subject in an image using the Gemini API. """ print(f"šŸ¤– Generating mask for: '{description}'...") model = genai.GenerativeModel('gemini-1.5-flash-latest') prompt = f""" Analyze the provided image to identify the subject described as: "{description}". Your task is to create a new image based on this analysis. In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF). Every other part of the image, which is the background, MUST be solid black (#000000). The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response. """ try: response = model.generate_content([prompt, original_image]) # Assuming the API returns the image directly in the first part img_data = response.parts[0].blob.data mask = Image.open(io.BytesIO(img_data)).convert("L") # Convert to grayscale return mask except Exception as e: print(f"šŸ›‘ Error during mask generation: {e}") print(" The API might have blocked the request or returned an unexpected format.") exit(1) def apply_mask(original_image: Image, mask: Image) -> Image: """ Applies a mask to an image to create a new image with a transparent background. """ print(" applying mask...") # Ensure the original image has an alpha channel original_rgba = original_image.convert("RGBA") # Resize mask to match original image if necessary if original_rgba.size != mask.size: mask = mask.resize(original_rgba.size, Image.LANCZOS) original_rgba.putalpha(mask) return original_rgba def generate_final_image(image1: Image, image2: Image, prompt: str) -> Image: """ Generates the final composite image from two segmented images and a prompt. """ print("šŸŽØ Generating the final masterpiece...") model = genai.GenerativeModel('gemini-1.5-flash-latest') full_prompt = f""" Task: Create a new photorealistic 16:9 image by composing the subjects from the two provided images into a new scene. The subjects are provided as separate images with transparent backgrounds. Instructions: {prompt} """ try: response = model.generate_content([full_prompt, image1, image2]) img_data = response.parts[0].blob.data final_image = Image.open(io.BytesIO(img_data)) return final_image except Exception as e: print(f"šŸ›‘ Error during final image generation: {e}") exit(1) def main(): """Main CLI application logic.""" parser = argparse.ArgumentParser(description="Merge two images using AI based on a prompt.") parser.add_argument("image1", help="Path to the first image file.") parser.add_argument("image2", help="Path to the second image file.") parser.add_argument("-o", "--output_dir", default="output", help="Directory to save the final image.") args = parser.parse_args() # 1. Load images print("--- Step 1: Loading Images ---") img1 = get_image_from_path(args.image1) img2 = get_image_from_path(args.image2) print(f"āœ… Loaded '{args.image1}' and '{args.image2}'.\n") # 2. Get descriptions and create segmented images print("--- Step 2: Describing & Segmenting Subjects ---") desc1 = input("āž”ļø Describe the main subject in the first image (e.g., 'the person on the left'): ") mask1 = generate_mask(img1, desc1) segmented1 = apply_mask(img1, mask1) desc2 = input("āž”ļø Describe the main subject in the second image (e.g., 'the person with the hat'): ") mask2 = generate_mask(img2, desc2) segmented2 = apply_mask(img2, mask2) print("āœ… Subjects segmented.\n") # 3. Get final prompt print("--- Step 3: Final Composition ---") final_prompt = input("āž”ļø Describe how to combine these subjects into a new scene: ") print("āœ… Prompt received.\n") # 4. Generate final image print("--- Step 4: Generating Final Image ---") final_image = generate_final_image(segmented1, segmented2, final_prompt) # 5. Save the result timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_filename = f"result_{timestamp}.png" output_path = os.path.join(args.output_dir, output_filename) try: final_image.save(output_path) print(f"\nšŸŽ‰ Success! Your image has been saved to: {output_path}") except Exception as e: print(f"šŸ›‘ Error saving the final image: {e}") if __name__ == "__main__": main()