Refactor GTM Architect to v2: Python-driven architecture, 9-phase process, new DB and Docker setup

2026-01-02 19:00:05 +00:00
parent a3dc012da8
commit b47a65eb83
300 changed files with 68128 additions and 4782 deletions
--- a/python_image_merger/merger.py
+++ b/python_image_merger/merger.py
@@ -0,0 +1,137 @@
+import os
+import argparse
+from datetime import datetime
+from dotenv import load_dotenv
+import google.generativeai as genai
+from PIL import Image
+import io
+
+# --- Configuration ---
+load_dotenv()
+API_KEY = os.getenv("GEMINI_API_KEY")
+if not API_KEY or API_KEY == "YOUR_API_KEY":
+    print("🛑 Error: GEMINI_API_KEY not found or not set.")
+    print("   Please create a .env file and add your key: GEMINI_API_KEY='...'")
+    exit(1)
+genai.configure(api_key=API_KEY)
+
+# --- Main Functions ---
+
+def get_image_from_path(path: str) -> Image:
+    """Safely opens an image from a given path."""
+    try:
+        img = Image.open(path)
+        return img
+    except FileNotFoundError:
+        print(f"🛑 Error: The file '{path}' was not found.")
+        exit(1)
+    except Exception as e:
+        print(f"🛑 Error: Could not open or process the image at '{path}'. Reason: {e}")
+        exit(1)
+
+def generate_mask(original_image: Image, description: str) -> Image:
+    """
+    Generates a black and white mask for a subject in an image using the Gemini API.
+    """
+    print(f"🤖 Generating mask for: '{description}'...")
+    model = genai.GenerativeModel('gemini-1.5-flash-latest')
+    prompt = f"""
+      Analyze the provided image to identify the subject described as: "{description}".
+      Your task is to create a new image based on this analysis.
+      In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
+      Every other part of the image, which is the background, MUST be solid black (#000000).
+      The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
+    """
+    try:
+        response = model.generate_content([prompt, original_image])
+        # Assuming the API returns the image directly in the first part
+        img_data = response.parts[0].blob.data
+        mask = Image.open(io.BytesIO(img_data)).convert("L") # Convert to grayscale
+        return mask
+    except Exception as e:
+        print(f"🛑 Error during mask generation: {e}")
+        print("   The API might have blocked the request or returned an unexpected format.")
+        exit(1)
+
+def apply_mask(original_image: Image, mask: Image) -> Image:
+    """
+    Applies a mask to an image to create a new image with a transparent background.
+    """
+    print(" applying mask...")
+    # Ensure the original image has an alpha channel
+    original_rgba = original_image.convert("RGBA")
+    # Resize mask to match original image if necessary
+    if original_rgba.size != mask.size:
+        mask = mask.resize(original_rgba.size, Image.LANCZOS)
+    
+    original_rgba.putalpha(mask)
+    return original_rgba
+
+def generate_final_image(image1: Image, image2: Image, prompt: str) -> Image:
+    """
+    Generates the final composite image from two segmented images and a prompt.
+    """
+    print("🎨 Generating the final masterpiece...")
+    model = genai.GenerativeModel('gemini-1.5-flash-latest')
+    
+    full_prompt = f"""
+    Task: Create a new photorealistic 16:9 image by composing the subjects from the two provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
+    Instructions: {prompt}
+    """
+    
+    try:
+        response = model.generate_content([full_prompt, image1, image2])
+        img_data = response.parts[0].blob.data
+        final_image = Image.open(io.BytesIO(img_data))
+        return final_image
+    except Exception as e:
+        print(f"🛑 Error during final image generation: {e}")
+        exit(1)
+
+def main():
+    """Main CLI application logic."""
+    parser = argparse.ArgumentParser(description="Merge two images using AI based on a prompt.")
+    parser.add_argument("image1", help="Path to the first image file.")
+    parser.add_argument("image2", help="Path to the second image file.")
+    parser.add_argument("-o", "--output_dir", default="output", help="Directory to save the final image.")
+    args = parser.parse_args()
+
+    # 1. Load images
+    print("--- Step 1: Loading Images ---")
+    img1 = get_image_from_path(args.image1)
+    img2 = get_image_from_path(args.image2)
+    print(f"✅ Loaded '{args.image1}' and '{args.image2}'.\n")
+
+    # 2. Get descriptions and create segmented images
+    print("--- Step 2: Describing & Segmenting Subjects ---")
+    desc1 = input("➡️ Describe the main subject in the first image (e.g., 'the person on the left'): ")
+    mask1 = generate_mask(img1, desc1)
+    segmented1 = apply_mask(img1, mask1)
+
+    desc2 = input("➡️ Describe the main subject in the second image (e.g., 'the person with the hat'): ")
+    mask2 = generate_mask(img2, desc2)
+    segmented2 = apply_mask(img2, mask2)
+    print("✅ Subjects segmented.\n")
+
+    # 3. Get final prompt
+    print("--- Step 3: Final Composition ---")
+    final_prompt = input("➡️ Describe how to combine these subjects into a new scene: ")
+    print("✅ Prompt received.\n")
+
+    # 4. Generate final image
+    print("--- Step 4: Generating Final Image ---")
+    final_image = generate_final_image(segmented1, segmented2, final_prompt)
+    
+    # 5. Save the result
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_filename = f"result_{timestamp}.png"
+    output_path = os.path.join(args.output_dir, output_filename)
+    
+    try:
+        final_image.save(output_path)
+        print(f"\n🎉 Success! Your image has been saved to: {output_path}")
+    except Exception as e:
+        print(f"🛑 Error saving the final image: {e}")
+
+if __name__ == "__main__":
+    main()