Spaces:

kevin1kevin1k
/

WeavePrompt

Runtime error

App Files Files Community

kevin1kevin1k commited on Oct 12

Commit

c6eb9ce

verified ·

1 Parent(s): 0514939

Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.dockerignore +28 -0
.gitignore +23 -0
Dockerfile +42 -9
README.md +30 -16
README_HF.md +41 -0
app.py +115 -0
fal_image_generator.py +47 -0
image_to_text.py +273 -0
lpips_evaluator.py +25 -0
mock_components.py +53 -0
prompt_refiner.py +49 -0
pyproject.toml +17 -0
requirements.txt +9 -3
spaces_config.yml +2 -0
uv.lock +0 -0
weave_prompt.py +166 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,28 @@

+.git
+.github
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env
+.env
+.venv/
+venv/
+pip-log.txt
+pip-delete-this-directory.txt
+.tox
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.gitignore
+.vscode
+.idea
+*.swp
+*.swo
+*~
+.DS_Store

.gitignore ADDED Viewed

	@@ -0,0 +1,23 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+.python-version
+# Environment variables and secrets
+.env
+*.env
+# API keys and sensitive data
+config.json
+secrets.json
+# History files
+.history/

Dockerfile CHANGED Viewed

@@ -1,20 +1,53 @@
-FROM python:3.13.5-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
     git \
     && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Use Python 3.11 slim image
+FROM python:3.11-slim
+# Set working directory
 WORKDIR /app
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
     git \
+    curl \
+    build-essential \
     && rm -rf /var/lib/apt/lists/*
+# Install UV package manager
+RUN pip install uv
+# Copy UV configuration files first for better caching
+COPY pyproject.toml uv.lock ./
+# Install Python dependencies using UV
+RUN uv venv /opt/venv && \
+    . /opt/venv/bin/activate && \
+    uv sync --frozen
+# Set the virtual environment as the default Python
+ENV PATH="/opt/venv/bin:$PATH"
+# Copy application code
+COPY . .
+# Create a non-root user
+RUN useradd -m -u 1000 user
+USER user
+# Set environment variables
+ENV HOME=/home/user \
+    PATH="/opt/venv/bin:/home/user/.local/bin:$PATH" \
+    PYTHONPATH=/app
+# Change to user's home directory
+WORKDIR $HOME/app
+# Copy app to user directory
+COPY --chown=user . $HOME/app
+# Expose the port Streamlit runs on
+EXPOSE 7860
+# Health check
+HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
+# Run the Streamlit application
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless=true", "--server.fileWatcherType=none", "--browser.gatherUsageStats=false"]

README.md CHANGED Viewed

@@ -1,19 +1,33 @@
----
-title: WeavePrompt
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: Streamlit template space
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+# WeavePrompt
+Iterative prompt refinement for text-to-image models.
+Given a target image, WeavePrompt automatically generates and refines text prompts to make a model's output resemble the target image, using vision-language models and perceptual metrics.
+## Features
+- Upload a target image
+- Step-by-step prompt optimization
+- View prompt and generated image at each iteration
+- Full optimization history
+## Installation
+1. Clone the repository:
+	```bash
+	git clone <repo-url>
+	cd WeavePrompt
+	```
+2. Install dependencies:
+	```bash
+	uv venv
+	uv sync
+    source .venv/bin/activate
+	```
+## Usage
+Run the demo app:
+```bash
+streamlit run app.py
+```
+Follow the instructions in the browser to upload an image and step through the optimization process.

README_HF.md ADDED Viewed

	@@ -0,0 +1,41 @@

+---
+title: WeavePrompt
+emoji: 🎨
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+license: mit
+app_port: 7860
+---
+# WeavePrompt
+An intelligent prompt optimization system that iteratively refines text-to-image generation prompts to better match target images.
+## Features
+- 🎯 **Target-driven optimization**: Upload a target image and get optimized prompts
+- 🔄 **Iterative refinement**: Automatically improves prompts through multiple iterations
+- 📊 **Similarity tracking**: Monitor progress with visual similarity metrics
+- 🎨 **High-quality generation**: Uses advanced text-to-image models
+## How it works
+1. Upload your target image
+2. Provide an initial prompt (or let the system generate one)
+3. Watch as the system iteratively refines the prompt
+4. Get optimized prompts that better match your target image
+## Usage
+Simply run the Streamlit app and follow the interactive interface to optimize your prompts!
+## Configuration
+Set your API keys as environment variables:
+- `FAL_KEY`: Your FAL AI API key for image generation
+---
+Built with ❤️ using Streamlit and advanced AI models.

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import streamlit as st
+from PIL import Image
+import time
+from image_to_text import LlamaEvaluator
+from prompt_refiner import LlamaPromptRefiner
+from weave_prompt import PromptOptimizer
+from mock_components import MockTextToImageModel, MockImageEvaluator, MockPromptRefiner
+from lpips_evaluator import LPIPSImageSimilarityMetric
+from fal_image_generator import FalImageGenerator
+import io
+st.set_page_config(
+    page_title="WeavePrompt Demo",
+    page_icon="🎨",
+    layout="wide"
+)
+def main():
+    st.title("🎨 WeavePrompt: Iterative Prompt Optimization")
+    st.markdown("""
+    Upload a target image and watch as WeavePrompt iteratively optimizes a text prompt to recreate it.
+    This demo uses mock components for illustration.
+    """)
+    # Initialize session state
+    if 'optimizer' not in st.session_state:
+        st.session_state.optimizer = PromptOptimizer(
+            model=FalImageGenerator(),
+            evaluator=LlamaEvaluator(),
+            refiner=LlamaPromptRefiner(),
+            similarity_metric=LPIPSImageSimilarityMetric(),
+            max_iterations=10,
+            similarity_threshold=0.95
+        )
+    if 'optimization_started' not in st.session_state:
+        st.session_state.optimization_started = False
+    if 'current_results' not in st.session_state:
+        st.session_state.current_results = None
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a target image", type=['png', 'jpg', 'jpeg'])
+    if uploaded_file is not None:
+        # Display target image
+        target_image = Image.open(uploaded_file)
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Target Image")
+            st.image(target_image, width='stretch')
+        # Start button
+        if not st.session_state.optimization_started:
+            if st.button("Start Optimization"):
+                st.session_state.optimization_started = True
+                # Initialize optimization
+                is_completed, prompt, generated_image = st.session_state.optimizer.initialize(target_image)
+                st.session_state.current_results = (is_completed, prompt, generated_image)
+        # Display optimization progress
+        if st.session_state.optimization_started:
+            with col2:
+                st.subheader("Generated Image")
+                is_completed, prompt, generated_image = st.session_state.current_results
+                st.image(generated_image, width='stretch')
+            # Display prompt and controls
+            st.text_area("Current Prompt", prompt, height=100)
+            # Progress metrics
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.metric("Iteration", len(st.session_state.optimizer.history))
+            with col2:
+                if len(st.session_state.optimizer.history) > 0:
+                    similarity = st.session_state.optimizer.history[-1]['similarity']
+                    st.metric("Similarity", f"{similarity:.2%}")
+            with col3:
+                st.metric("Status", "Completed" if is_completed else "In Progress")
+            # Next step button
+            if not is_completed:
+                if st.button("Next Step"):
+                    is_completed, prompt, generated_image = st.session_state.optimizer.step()
+                    st.session_state.current_results = (is_completed, prompt, generated_image)
+                    st.rerun()
+            else:
+                st.success("Optimization completed! Click 'Reset' to try another image.")
+            # Reset button
+            if st.button("Reset"):
+                st.session_state.optimization_started = False
+                st.session_state.current_results = None
+                st.rerun()
+            # Display history
+            if len(st.session_state.optimizer.history) > 0:
+                st.subheader("Optimization History")
+                for idx, hist_entry in enumerate(st.session_state.optimizer.history):
+                    st.markdown(f"### Step {idx + 1}")
+                    col1, col2 = st.columns([2, 3])
+                    with col1:
+                        st.image(hist_entry['image'], width='stretch')
+                    with col2:
+                        st.text(f"Similarity: {hist_entry['similarity']:.2%}")
+                        st.text("Prompt:")
+                        st.text(hist_entry['prompt'])
+                        st.text("\nAnalysis:")
+                        for key, value in hist_entry['analysis'].items():
+                            st.text(f"{key}: {value}")
+if __name__ == "__main__":
+    main()

fal_image_generator.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import fal_client
+from PIL import Image
+from typing import Dict, Any
+import requests
+from io import BytesIO
+from weave_prompt import TextToImageModel
+import load_keys
+class FalImageGenerator(TextToImageModel):
+    """Handles image generation using fal_client."""
+    def __init__(self, model_name: str = "fal-ai/flux-pro"):
+        self.model_name = model_name
+    def _on_queue_update(self, update):
+        """Handle queue updates during image generation."""
+        if isinstance(update, fal_client.InProgress):
+            for log in update.logs:
+                print(log["message"])
+    def generate(self, prompt: str, **kwargs) -> Image.Image:
+        """Generate an image from a text prompt using fal_client."""
+        result = fal_client.subscribe(
+            self.model_name,
+            arguments={
+                "prompt": prompt,
+                **kwargs
+            },
+            with_logs=True,
+            on_queue_update=self._on_queue_update,
+        )
+        print(result)
+        return self._extract_image_from_result(result)
+    def _extract_image_from_result(self, result: Dict[str, Any]) -> Image.Image:
+        """Extract and download image from fal_client result."""
+        if result and 'images' in result and len(result['images']) > 0:
+            image_url = result['images'][0]['url']
+            response = requests.get(image_url)
+            response.raise_for_status()  # Raise an exception for bad status codes
+            image = Image.open(BytesIO(response.content))
+            return image
+        else:
+            raise ValueError("No image found in the result")

image_to_text.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import openai
+import weave
+import base64
+import json
+import tempfile
+import os
+from pathlib import Path
+from PIL import Image
+from typing import Dict, Any, Optional
+from weave_prompt import ImageEvaluator
+import load_keys
+# Weave autopatches OpenAI to log LLM calls to W&B
+weave.init("meta-llama")
+class LlamaEvaluator(ImageEvaluator):
+    """Llama-based image evaluator using W&B Inference."""
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the Llama evaluator with OpenAI client.
+        Args:
+            api_key: Optional API key. If not provided, will look for OPENAI_API_KEY
+                    or WANDB_API_KEY environment variables.
+        """
+        # Get API key from parameter, environment variables, or raise error
+        if api_key is None:
+            api_key = os.getenv('WANDB_API_KEY')
+            if api_key is None:
+                raise ValueError(
+                    "API key not provided. Please either:\n"
+                    "1. Pass api_key parameter to LlamaEvaluator()\n"
+                    "2. Set OPENAI_API_KEY environment variable\n"
+                    "3. Set WANDB_API_KEY environment variable\n"
+                    "Get your API key from https://wandb.ai/authorize"
+                )
+        self.client = openai.OpenAI(
+            # The custom base URL points to W&B Inference
+            base_url='https://api.inference.wandb.ai/v1',
+            # Get your API key from https://wandb.ai/authorize
+            api_key=api_key,
+        )
+        self.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+    def _encode_image(self, image: Image.Image) -> str:
+        """Encode PIL Image to base64 string."""
+        try:
+            # Save image to temporary file and encode
+            with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
+                image.save(tmp_file.name, format='JPEG')
+                with open(tmp_file.name, "rb") as image_file:
+                    encoded = base64.b64encode(image_file.read()).decode('utf-8')
+                # Clean up temp file
+                Path(tmp_file.name).unlink()
+                return encoded
+        except Exception as e:
+            print(f"Error encoding image: {e}")
+            return None
+    def _call_vision_model(self, prompt: str, images: list) -> str:
+        """Call the vision model with prompt and images."""
+        try:
+            # Prepare content with text and images
+            content = [{"type": "text", "text": prompt}]
+            for i, img in enumerate(images):
+                base64_image = self._encode_image(img)
+                if base64_image:
+                    if i > 0:  # Add label for multiple images
+                        content.append({
+                            "type": "text",
+                            "text": f"Image {i+1}:"
+                        })
+                    content.append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                    })
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are an expert image analyst. Provide detailed, accurate analysis."
+                    },
+                    {
+                        "role": "user",
+                        "content": content
+                    }
+                ],
+                max_tokens=1000
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"Error calling vision model: {e}")
+            return None
+    def generate_initial_prompt(self, generated_img: Image.Image) -> str:
+        """Generate an initial prompt by describing the generated_img image."""
+        prompt = """
+        Analyze this image and generate a detailed text prompt that could be used to recreate it.
+        Focus on:
+        - Main subjects and objects
+        - Visual style and artistic technique
+        - Colors, lighting, and mood
+        - Composition and layout
+        - Important details and textures
+        Provide a concise but comprehensive prompt suitable for image generation.
+        """
+        description = self._call_vision_model(prompt, [generated_img])
+        if description:
+            return description.strip()
+        else:
+            # Fallback prompt
+            return "A beautiful image with vibrant colors and detailed composition"
+    def analyze_differences(self, generated_img: Image.Image, target_img: Image.Image) -> Dict[str, Any]:
+        """Analyze differences between generated and target images."""
+        analysis_prompt = """
+        Compare these two images and analyze their differences. The first image is generated, the second is the target.
+        Please provide a detailed analysis in JSON format with the following structure:
+        {
+            "missing_elements": ["list of elements present in target but missing in generated"],
+            "style_differences": ["list of style differences between the images"],
+            "color_differences": ["differences in color, lighting, or tone"],
+            "composition_differences": ["differences in layout, positioning, or framing"],
+            "quality_differences": ["differences in detail, sharpness, or overall quality"],
+            "similarity_score": "percentage of how similar the images are (0-100)",
+            "overall_assessment": "brief summary of the main differences"
+        }
+        Focus on identifying what elements, styles, or qualities are present in the target image but missing or different in the generated image.
+        """
+        response_text = self._call_vision_model(analysis_prompt, [generated_img, target_img])
+        if not response_text:
+            return {
+                "missing_elements": ["texture", "details"],
+                "style_differences": ["color intensity", "composition"],
+                "error": "Failed to analyze images"
+            }
+        try:
+            # Extract JSON from the response if it's wrapped in markdown
+            if "```json" in response_text:
+                json_start = response_text.find("```json") + 7
+                json_end = response_text.find("```", json_start)
+                json_text = response_text[json_start:json_end].strip()
+            elif "{" in response_text and "}" in response_text:
+                # Find the JSON object in the response
+                json_start = response_text.find("{")
+                json_end = response_text.rfind("}") + 1
+                json_text = response_text[json_start:json_end]
+            else:
+                json_text = response_text
+            analysis_result = json.loads(json_text)
+            # Ensure required keys exist with fallback values
+            if "missing_elements" not in analysis_result:
+                analysis_result["missing_elements"] = ["texture", "details"]
+            if "style_differences" not in analysis_result:
+                analysis_result["style_differences"] = ["color intensity", "composition"]
+            return analysis_result
+        except json.JSONDecodeError:
+            # If JSON parsing fails, return a structured response with fallback values
+            return {
+                "missing_elements": ["texture", "details"],
+                "style_differences": ["color intensity", "composition"],
+                "raw_analysis": response_text,
+                "note": "JSON parsing failed, using fallback analysis"
+            }
+    def describe_image(self, image: Image.Image, custom_prompt: str = None) -> str:
+        """Generate a detailed description of an image."""
+        if not custom_prompt:
+            custom_prompt = "Please describe this image in detail, including objects, people, colors, setting, and any notable features."
+        description = self._call_vision_model(custom_prompt, [image])
+        return description if description else "Failed to generate description"
+# Utility functions for backward compatibility
+def encode_image_from_path(image_path: str) -> str:
+    """Encode image from file path to base64 string."""
+    try:
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    except FileNotFoundError:
+        print(f"Error: Image file not found at {image_path}")
+        return None
+    except Exception as e:
+        print(f"Error encoding image: {e}")
+        return None
+def describe_image_from_path(image_path: str, custom_prompt: str = None) -> str:
+    """Generate description for an image from file path."""
+    if not Path(image_path).exists():
+        print(f"Error: Image file does not exist at {image_path}")
+        return None
+    # Load image and use evaluator
+    image = Image.open(image_path)
+    evaluator = LlamaEvaluator()
+    return evaluator.describe_image(image, custom_prompt)
+def analyze_differences_from_paths(generated_img_path: str, target_img_path: str) -> Dict[str, Any]:
+    """Analyze differences between two images from file paths."""
+    try:
+        generated_img = Image.open(generated_img_path)
+        target_img = Image.open(target_img_path)
+        evaluator = LlamaEvaluator()
+        return evaluator.analyze_differences(generated_img, target_img)
+    except Exception as e:
+        return {
+            "missing_elements": ["texture", "details"],
+            "style_differences": ["color intensity", "composition"],
+            "error": str(e)
+        }
+# Example usage
+if __name__ == "__main__":
+    # Example 1: Using the class directly
+    evaluator = LlamaEvaluator()
+    # Load images
+    try:
+        image_path = "/Users/chuchwu/Downloads/happy-190806.jpg"
+        target_image = Image.open(image_path)
+        # Generate initial prompt
+        print("Generating initial prompt...")
+        initial_prompt = evaluator.generate_initial_prompt(target_image)
+        print(f"Initial Prompt: {initial_prompt}")
+        print("\n" + "="*50 + "\n")
+        # Describe the image
+        print("Describing image...")
+        description = evaluator.describe_image(target_image)
+        print(f"Description: {description}")
+        print("\n" + "="*50 + "\n")
+        # Example 2: Analyze differences (using same image for demo)
+        print("Analyzing differences...")
+        differences = evaluator.analyze_differences(target_image, target_image)
+        print("Difference Analysis:")
+        print(f"Missing Elements: {differences.get('missing_elements', [])}")
+        print(f"Style Differences: {differences.get('style_differences', [])}")
+        if 'similarity_score' in differences:
+            print(f"Similarity Score: {differences['similarity_score']}%")
+        if 'overall_assessment' in differences:
+            print(f"Overall Assessment: {differences['overall_assessment']}")
+    except FileNotFoundError:
+        print("Image file not found. Please update the image_path variable.")
+    except Exception as e:
+        print(f"Error: {e}")

lpips_evaluator.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from weave_prompt import ImageSimilarityMetric
+from PIL import Image
+import lpips
+import torch
+import numpy as np
+class LPIPSImageSimilarityMetric(ImageSimilarityMetric):
+    """Image similarity metric using LPIPS perceptual similarity."""
+    def __init__(self, net: str = 'alex', device: str = 'cpu'):
+        self.lpips_model = lpips.LPIPS(net=net).to(device)
+        self.device = device
+    def compute(self, generated_img: Image.Image, target_img: Image.Image) -> float:
+        def img_to_tensor(img):
+            img = img.convert('RGB')  # Ensure image has 3 channels for handling PNG
+            arr = np.array(img.resize((256, 256))).astype(np.float32) / 255.0
+            arr = arr.transpose(2, 0, 1)  # HWC to CHW
+            tensor = torch.tensor(arr).unsqueeze(0)
+            return tensor * 2 - 1  # LPIPS expects [-1, 1]
+        gen_tensor = img_to_tensor(generated_img).to(self.device)
+        tgt_tensor = img_to_tensor(target_img).to(self.device)
+        distance = self.lpips_model(gen_tensor, tgt_tensor).item()
+        similarity = max(0.0, 1.0 - distance)
+        return similarity

mock_components.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import fal_client
+from weave_prompt import TextToImageModel, ImageEvaluator, PromptRefiner
+from PIL import Image
+import numpy as np
+from typing import Dict, Any
+import os
+from fal_image_generator import FalImageGenerator
+class MockTextToImageModel(TextToImageModel):
+    """Mock text-to-image model for demonstration."""
+    def __init__(self):
+        self.image_generator = FalImageGenerator()
+    def generate(self, prompt: str, **kwargs) -> Image.Image:
+        """Generate an image using the fal image generator."""
+        return self.image_generator.generate_image(prompt, **kwargs)
+class MockImageEvaluator(ImageEvaluator):
+    """Mock image evaluator for demonstration."""
+    def generate_initial_prompt(self, target_img: Image.Image) -> str:
+        """Generate a mock initial prompt."""
+        return "A beautiful image with vibrant colors"
+    def analyze_differences(self, generated_img: Image.Image, target_img: Image.Image) -> Dict[str, Any]:
+        """Mock difference analysis."""
+        return {
+            "missing_elements": ["texture", "details"],
+            "style_differences": ["color intensity", "composition"]
+        }
+class MockSimilarityMetric:
+    """Mock similarity metric that gradually increases."""
+    def compute(self, generated_img: Image.Image, target_img: Image.Image) -> float:
+        """Mock similarity computation that gradually increases."""
+        # Randomly increase similarity over time
+        return np.random.uniform(0.5, 0.95)
+class MockPromptRefiner(PromptRefiner):
+    """Mock prompt refiner for demonstration."""
+    def refine_prompt(self, current_prompt: str, analysis: Dict[str, Any], similarity_score: float) -> str:
+        """Mock prompt refinement by adding random modifiers."""
+        modifiers = [
+            "with more detail",
+            "in vibrant colors",
+            "with better composition",
+            "high quality",
+            "masterfully crafted"
+        ]
+        return f"{current_prompt}, {np.random.choice(modifiers)}"

prompt_refiner.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from typing import Any, Dict
+import openai
+import weave
+import os
+from weave_prompt import PromptRefiner
+import load_keys
+# Weave autopatches OpenAI to log LLM calls to W&B
+weave.init(project_name="meta-llama")
+class LlamaPromptRefiner(PromptRefiner):
+    @weave.op()
+    def refine_prompt(self, current_prompt: str, analysis: Dict[str, Any], similarity_score):
+        client = openai.OpenAI(
+            # The custom base URL points to W&B Inference
+            base_url='https://api.inference.wandb.ai/v1',
+            # Get your API key from https://wandb.ai/authorize
+            # Consider setting it in the environment as OPENAI_API_KEY instead for safety
+            api_key=os.getenv("WANDB_API_KEY"),
+        )
+        response = client.chat.completions.create(
+            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "You are an expert at prompt engineering for text-to-image models. "
+                        "Given a current prompt and an analysis of the differences between a generated image and a target image, "
+                        "your job is to suggest a new prompt that will make the generated image more similar to the target. "
+                        "Limit the new prompt to 100 words at most. "
+                        "The user message will contain two sections: one for the current prompt and one for the analysis, each delimited by 'START OF CURRENT PROMPT'/'END OF CURRENT PROMPT' and 'START OF ANALYSIS'/'END OF ANALYSIS'. "
+                        "Only return the improved prompt."
+                    )
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        f"<START OF CURRENT PROMPT>\n{current_prompt}\n<END OF CURRENT PROMPT>\n"
+                        f"<START OF ANALYSIS>\n{str(analysis)}\n<END OF ANALYSIS>\n"
+                        "Suggest a new, improved prompt. Only return the prompt. Do not exceed 100 words."
+                    )
+                }
+            ],
+        )
+        return response.choices[0].message.content

pyproject.toml ADDED Viewed

	@@ -0,0 +1,17 @@

+[project]
+name = "weaveprompt"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "lpips>=0.1.4",
+    "numpy>=2.3.3",
+    "openai>=2.3.0",
+    "pillow>=11.3.0",
+    "streamlit>=1.50.0",
+    "wandb>=0.22.2",
+    "weave>=0.52.9",
+    "fal-client",
+    "python-dotenv>=1.1.1",
+]

requirements.txt CHANGED Viewed

@@ -1,3 +1,9 @@
-altair
-pandas
-streamlit

+lpips>=0.1.4
+numpy>=2.3.3
+openai>=2.3.0
+pillow>=11.3.0
+streamlit>=1.50.0
+wandb>=0.22.2
+weave>=0.52.9
+fal-client
+requests

spaces_config.yml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ sdk: docker
2	+ app_port: 7860

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

weave_prompt.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Union
+import PIL.Image as Image
+class TextToImageModel(ABC):
+    """Abstract base class for text-to-image models."""
+    @abstractmethod
+    def generate(self, prompt: str, **kwargs) -> Image.Image:
+        """Generate an image from a text prompt.
+        Args:
+            prompt: The text prompt to generate from
+            **kwargs: Additional model-specific parameters
+        Returns:
+            A PIL Image object
+        """
+        pass
+class ImageSimilarityMetric(ABC):
+    """Abstract base class for image similarity metrics."""
+    @abstractmethod
+    def compute(self, generated_img: Image.Image, target_img: Image.Image) -> float:
+        """Compute similarity score between generated and target images.
+        Args:
+            generated_img: The generated image to evaluate
+            target_img: The target image to compare against
+        Returns:
+            Similarity score (higher means more similar)
+        """
+        pass
+class ImageEvaluator(ABC):
+    """Abstract base class for evaluating image similarity."""
+    @abstractmethod
+    def generate_initial_prompt(self, target_img: Image.Image) -> str:
+        """Generate initial prompt from target image using VLM.
+        Args:
+            target_img: The target image to analyze
+        Returns:
+            Initial prompt describing the target image
+        """
+        pass
+    @abstractmethod
+    def analyze_differences(self, generated_img: Image.Image, target_img: Image.Image) -> Dict[str, Any]:
+        """Analyze differences between generated and target images using VLM.
+        Args:
+            generated_img: The generated image to analyze
+            target_img: The target image to compare against
+        Returns:
+            Dictionary containing analysis results (e.g. missing elements, style differences)
+        """
+        pass
+class PromptRefiner(ABC):
+    """Abstract base class for prompt refinement strategies."""
+    @abstractmethod
+    def refine_prompt(self,
+                     current_prompt: str,
+                     analysis: Dict[str, Any],
+                     similarity_score: float) -> str:
+        """Refine the current prompt based on image analysis.
+        Args:
+            current_prompt: The current prompt PMT_i
+            analysis: Analysis results from ImageEvaluator
+            similarity_score: Current similarity score
+        Returns:
+            Refined prompt PMT_{i+1}
+        """
+        pass
+class PromptOptimizer:
+    """Main class that orchestrates the prompt optimization process."""
+    def __init__(self,
+                 model: TextToImageModel,
+                 evaluator: ImageEvaluator,
+                 refiner: PromptRefiner,
+                 similarity_metric: ImageSimilarityMetric,
+                 max_iterations: int = 10,
+                 similarity_threshold: float = 0.95):
+        """Initialize the optimizer.
+        Args:
+            model: Text-to-image model to use
+            evaluator: Image evaluator for generating initial prompt and analysis
+            refiner: Prompt refinement strategy
+            similarity_metric: Image similarity metric
+            max_iterations: Maximum number of optimization iterations
+            similarity_threshold: Target similarity threshold for early stopping
+        """
+        # Configuration
+        self.model = model
+        self.evaluator = evaluator
+        self.refiner = refiner
+        self.similarity_metric = similarity_metric
+        self.max_iterations = max_iterations
+        self.similarity_threshold = similarity_threshold
+        # Optimization state
+        self.target_img: Optional[Image.Image] = None
+        self.current_prompt: Optional[str] = None
+        self.iteration: int = 0
+        # Progress tracking
+        self.history: List[Dict[str, Any]] = []
+    def initialize(self, target_img: Image.Image) -> tuple[bool, str, Image.Image]:
+        """Initialize the optimization process with a target image.
+        Args:
+            target_img: Target image to optimize towards
+        Returns:
+            Tuple of (is_completed, current_prompt, current_generated_image)
+        """
+        self.target_img = target_img
+        self.current_prompt = self.evaluator.generate_initial_prompt(target_img)
+        self.iteration = 0
+        self.history = []
+        return self.step()
+    def step(self) -> tuple[bool, str, Image.Image]:
+        """Perform one optimization step.
+        Returns:
+            Tuple of (is_completed, current_prompt, current_generated_image)
+            is_completed: True if optimization is complete (reached threshold or max iterations)
+            current_prompt: The current prompt
+            current_generated_image: The image generated from current prompt
+        """
+        if self.target_img is None or self.current_prompt is None:
+            raise RuntimeError("Must call initialize() before step()")
+        if self.iteration >= self.max_iterations:
+            return True, self.current_prompt, self.model.generate(self.current_prompt)
+        # Generate image with current prompt
+        generated_img = self.model.generate(self.current_prompt)
+        # Evaluate similarity
+        similarity = self.similarity_metric.compute(generated_img, self.target_img)
+        # Analyze differences
+        analysis = self.evaluator.analyze_differences(generated_img, self.target_img)
+        # Track progress
+        self.history.append({
+            'iteration': self.iteration,
+            'prompt': self.current_prompt,
+            'similarity': similarity,
+            'analysis': analysis,
+            'image': generated_img
+        })
+        # Check if we've reached target similarity
+        is_completed = similarity >= self.similarity_threshold
+        if not is_completed:
+            # Refine prompt
+            self.current_prompt = self.refiner.refine_prompt(
+                self.current_prompt, analysis, similarity)
+            self.iteration += 1
+        return is_completed, self.current_prompt, generated_img