Brancheneinstufung2/python_image_merger/merger.py

import os
import argparse
from datetime import datetime
from dotenv import load_dotenv
import google.generativeai as genai
from PIL import Image
import io

# --- Configuration ---
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY or API_KEY == "YOUR_API_KEY":
    print("🛑 Error: GEMINI_API_KEY not found or not set.")
    print("   Please create a .env file and add your key: GEMINI_API_KEY='...'")
    exit(1)
genai.configure(api_key=API_KEY)

# --- Main Functions ---

def get_image_from_path(path: str) -> Image:
    """Safely opens an image from a given path."""
    try:
        img = Image.open(path)
        return img
    except FileNotFoundError:
        print(f"🛑 Error: The file '{path}' was not found.")
        exit(1)
    except Exception as e:
        print(f"🛑 Error: Could not open or process the image at '{path}'. Reason: {e}")
        exit(1)

def generate_mask(original_image: Image, description: str) -> Image:
    """
    Generates a black and white mask for a subject in an image using the Gemini API.
    """
    print(f"🤖 Generating mask for: '{description}'...")
    model = genai.GenerativeModel('gemini-1.5-flash-latest')
    prompt = f"""
      Analyze the provided image to identify the subject described as: "{description}".
      Your task is to create a new image based on this analysis.
      In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
      Every other part of the image, which is the background, MUST be solid black (#000000).
      The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
    """
    try:
        response = model.generate_content([prompt, original_image])
        # Assuming the API returns the image directly in the first part
        img_data = response.parts[0].blob.data
        mask = Image.open(io.BytesIO(img_data)).convert("L") # Convert to grayscale
        return mask
    except Exception as e:
        print(f"🛑 Error during mask generation: {e}")
        print("   The API might have blocked the request or returned an unexpected format.")
        exit(1)

def apply_mask(original_image: Image, mask: Image) -> Image:
    """
    Applies a mask to an image to create a new image with a transparent background.
    """
    print(" applying mask...")
    # Ensure the original image has an alpha channel
    original_rgba = original_image.convert("RGBA")
    # Resize mask to match original image if necessary
    if original_rgba.size != mask.size:
        mask = mask.resize(original_rgba.size, Image.LANCZOS)

    original_rgba.putalpha(mask)
    return original_rgba

def generate_final_image(image1: Image, image2: Image, prompt: str) -> Image:
    """
    Generates the final composite image from two segmented images and a prompt.
    """
    print("🎨 Generating the final masterpiece...")
    model = genai.GenerativeModel('gemini-1.5-flash-latest')

    full_prompt = f"""
    Task: Create a new photorealistic 16:9 image by composing the subjects from the two provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
    Instructions: {prompt}
    """

    try:
        response = model.generate_content([full_prompt, image1, image2])
        img_data = response.parts[0].blob.data
        final_image = Image.open(io.BytesIO(img_data))
        return final_image
    except Exception as e:
        print(f"🛑 Error during final image generation: {e}")
        exit(1)

def main():
    """Main CLI application logic."""
    parser = argparse.ArgumentParser(description="Merge two images using AI based on a prompt.")
    parser.add_argument("image1", help="Path to the first image file.")
    parser.add_argument("image2", help="Path to the second image file.")
    parser.add_argument("-o", "--output_dir", default="output", help="Directory to save the final image.")
    args = parser.parse_args()

    # 1. Load images
    print("--- Step 1: Loading Images ---")
    img1 = get_image_from_path(args.image1)
    img2 = get_image_from_path(args.image2)
    print(f"✅ Loaded '{args.image1}' and '{args.image2}'.\n")

    # 2. Get descriptions and create segmented images
    print("--- Step 2: Describing & Segmenting Subjects ---")
    desc1 = input("➡️ Describe the main subject in the first image (e.g., 'the person on the left'): ")
    mask1 = generate_mask(img1, desc1)
    segmented1 = apply_mask(img1, mask1)

    desc2 = input("➡️ Describe the main subject in the second image (e.g., 'the person with the hat'): ")
    mask2 = generate_mask(img2, desc2)
    segmented2 = apply_mask(img2, mask2)
    print("✅ Subjects segmented.\n")

    # 3. Get final prompt
    print("--- Step 3: Final Composition ---")
    final_prompt = input("➡️ Describe how to combine these subjects into a new scene: ")
    print("✅ Prompt received.\n")

    # 4. Generate final image
    print("--- Step 4: Generating Final Image ---")
    final_image = generate_final_image(segmented1, segmented2, final_prompt)

    # 5. Save the result
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"result_{timestamp}.png"
    output_path = os.path.join(args.output_dir, output_filename)

    try:
        final_image.save(output_path)
        print(f"\n🎉 Success! Your image has been saved to: {output_path}")
    except Exception as e:
        print(f"🛑 Error saving the final image: {e}")

if __name__ == "__main__":
    main()