Refactor GTM Architect to v2: Python-driven architecture, 9-phase process, new DB and Docker setup

This commit is contained in:
2026-01-02 19:00:05 +00:00
parent a3dc012da8
commit b47a65eb83
300 changed files with 68128 additions and 4782 deletions

View File

@@ -0,0 +1,137 @@
import os
import argparse
from datetime import datetime
from dotenv import load_dotenv
import google.generativeai as genai
from PIL import Image
import io
# --- Configuration ---
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY or API_KEY == "YOUR_API_KEY":
print("🛑 Error: GEMINI_API_KEY not found or not set.")
print(" Please create a .env file and add your key: GEMINI_API_KEY='...'")
exit(1)
genai.configure(api_key=API_KEY)
# --- Main Functions ---
def get_image_from_path(path: str) -> Image:
"""Safely opens an image from a given path."""
try:
img = Image.open(path)
return img
except FileNotFoundError:
print(f"🛑 Error: The file '{path}' was not found.")
exit(1)
except Exception as e:
print(f"🛑 Error: Could not open or process the image at '{path}'. Reason: {e}")
exit(1)
def generate_mask(original_image: Image, description: str) -> Image:
"""
Generates a black and white mask for a subject in an image using the Gemini API.
"""
print(f"🤖 Generating mask for: '{description}'...")
model = genai.GenerativeModel('gemini-1.5-flash-latest')
prompt = f"""
Analyze the provided image to identify the subject described as: "{description}".
Your task is to create a new image based on this analysis.
In this new image, the area that corresponds to the identified subject MUST be solid white (#FFFFFF).
Every other part of the image, which is the background, MUST be solid black (#000000).
The final output must ONLY be the image file. Do not include any text, explanations, or any other content in your response.
"""
try:
response = model.generate_content([prompt, original_image])
# Assuming the API returns the image directly in the first part
img_data = response.parts[0].blob.data
mask = Image.open(io.BytesIO(img_data)).convert("L") # Convert to grayscale
return mask
except Exception as e:
print(f"🛑 Error during mask generation: {e}")
print(" The API might have blocked the request or returned an unexpected format.")
exit(1)
def apply_mask(original_image: Image, mask: Image) -> Image:
"""
Applies a mask to an image to create a new image with a transparent background.
"""
print(" applying mask...")
# Ensure the original image has an alpha channel
original_rgba = original_image.convert("RGBA")
# Resize mask to match original image if necessary
if original_rgba.size != mask.size:
mask = mask.resize(original_rgba.size, Image.LANCZOS)
original_rgba.putalpha(mask)
return original_rgba
def generate_final_image(image1: Image, image2: Image, prompt: str) -> Image:
"""
Generates the final composite image from two segmented images and a prompt.
"""
print("🎨 Generating the final masterpiece...")
model = genai.GenerativeModel('gemini-1.5-flash-latest')
full_prompt = f"""
Task: Create a new photorealistic 16:9 image by composing the subjects from the two provided images into a new scene. The subjects are provided as separate images with transparent backgrounds.
Instructions: {prompt}
"""
try:
response = model.generate_content([full_prompt, image1, image2])
img_data = response.parts[0].blob.data
final_image = Image.open(io.BytesIO(img_data))
return final_image
except Exception as e:
print(f"🛑 Error during final image generation: {e}")
exit(1)
def main():
"""Main CLI application logic."""
parser = argparse.ArgumentParser(description="Merge two images using AI based on a prompt.")
parser.add_argument("image1", help="Path to the first image file.")
parser.add_argument("image2", help="Path to the second image file.")
parser.add_argument("-o", "--output_dir", default="output", help="Directory to save the final image.")
args = parser.parse_args()
# 1. Load images
print("--- Step 1: Loading Images ---")
img1 = get_image_from_path(args.image1)
img2 = get_image_from_path(args.image2)
print(f"✅ Loaded '{args.image1}' and '{args.image2}'.\n")
# 2. Get descriptions and create segmented images
print("--- Step 2: Describing & Segmenting Subjects ---")
desc1 = input("➡️ Describe the main subject in the first image (e.g., 'the person on the left'): ")
mask1 = generate_mask(img1, desc1)
segmented1 = apply_mask(img1, mask1)
desc2 = input("➡️ Describe the main subject in the second image (e.g., 'the person with the hat'): ")
mask2 = generate_mask(img2, desc2)
segmented2 = apply_mask(img2, mask2)
print("✅ Subjects segmented.\n")
# 3. Get final prompt
print("--- Step 3: Final Composition ---")
final_prompt = input("➡️ Describe how to combine these subjects into a new scene: ")
print("✅ Prompt received.\n")
# 4. Generate final image
print("--- Step 4: Generating Final Image ---")
final_image = generate_final_image(segmented1, segmented2, final_prompt)
# 5. Save the result
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"result_{timestamp}.png"
output_path = os.path.join(args.output_dir, output_filename)
try:
final_image.save(output_path)
print(f"\n🎉 Success! Your image has been saved to: {output_path}")
except Exception as e:
print(f"🛑 Error saving the final image: {e}")
if __name__ == "__main__":
main()