Spaces:

kevin1kevin1k
/

WeavePrompt

Runtime error

App Files Files Community

kevin1kevin1k commited on Oct 12

Commit

fb2f0a7

verified ·

1 Parent(s): 4584c11

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

app.py +4 -4
image_evaluators.py +276 -0
image_generators.py +48 -0
prompt_refiners.py +52 -0
similarity_metrics.py +25 -0
weave_prompt.py +6 -6

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import streamlit as st
 from PIL import Image
 from dotenv import load_dotenv
-from image_to_text import LlamaEvaluator
-from prompt_refiner import LlamaPromptRefiner
 from weave_prompt import PromptOptimizer
-from lpips_evaluator import LPIPSImageSimilarityMetric
-from fal_image_generator import FalImageGenerator
 # Load environment variables from .env file
 load_dotenv()

 import streamlit as st
 from PIL import Image
 from dotenv import load_dotenv
+from image_evaluators import LlamaEvaluator
+from prompt_refiners import LlamaPromptRefiner
 from weave_prompt import PromptOptimizer
+from similarity_metrics import LPIPSImageSimilarityMetric
+from image_generators import FalImageGenerator
 # Load environment variables from .env file
 load_dotenv()

image_evaluators.py ADDED Viewed

	@@ -0,0 +1,276 @@

+import openai
+import weave
+import base64
+import json
+import tempfile
+import os
+from pathlib import Path
+from PIL import Image
+from typing import Dict, Any, Optional
+from weave_prompt import ImageEvaluator
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# Weave autopatches OpenAI to log LLM calls to W&B
+weave.init("meta-llama")
+class LlamaEvaluator(ImageEvaluator):
+    """Llama-based image evaluator using W&B Inference."""
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the Llama evaluator with OpenAI client.
+        Args:
+            api_key: Optional API key. If not provided, will look for OPENAI_API_KEY
+                    or WANDB_API_KEY environment variables.
+        """
+        # Get API key from parameter, environment variables, or raise error
+        if api_key is None:
+            api_key = os.getenv('WANDB_API_KEY')
+            if api_key is None:
+                raise ValueError(
+                    "API key not provided. Please either:\n"
+                    "1. Pass api_key parameter to LlamaEvaluator()\n"
+                    "2. Set OPENAI_API_KEY environment variable\n"
+                    "3. Set WANDB_API_KEY environment variable\n"
+                    "Get your API key from https://wandb.ai/authorize"
+                )
+        self.client = openai.OpenAI(
+            # The custom base URL points to W&B Inference
+            base_url='https://api.inference.wandb.ai/v1',
+            # Get your API key from https://wandb.ai/authorize
+            api_key=api_key,
+        )
+        self.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+    def _encode_image(self, image: Image.Image) -> str:
+        """Encode PIL Image to base64 string."""
+        try:
+            # Save image to temporary file and encode
+            with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp_file:
+                image.save(tmp_file.name, format='JPEG')
+                with open(tmp_file.name, "rb") as image_file:
+                    encoded = base64.b64encode(image_file.read()).decode('utf-8')
+                # Clean up temp file
+                Path(tmp_file.name).unlink()
+                return encoded
+        except Exception as e:
+            print(f"Error encoding image: {e}")
+            return None
+    def _call_vision_model(self, prompt: str, images: list) -> str:
+        """Call the vision model with prompt and images."""
+        try:
+            # Prepare content with text and images
+            content = [{"type": "text", "text": prompt}]
+            for i, img in enumerate(images):
+                base64_image = self._encode_image(img)
+                if base64_image:
+                    if i > 0:  # Add label for multiple images
+                        content.append({
+                            "type": "text",
+                            "text": f"Image {i+1}:"
+                        })
+                    content.append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                    })
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are an expert image analyst. Provide detailed, accurate analysis."
+                    },
+                    {
+                        "role": "user",
+                        "content": content
+                    }
+                ],
+                max_tokens=1000
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"Error calling vision model: {e}")
+            return None
+    def generate_initial_prompt(self, generated_img: Image.Image) -> str:
+        """Generate an initial prompt by describing the generated_img image."""
+        prompt = """
+        Analyze this image and generate a detailed text prompt that could be used to recreate it.
+        Focus on:
+        - Main subjects and objects
+        - Visual style and artistic technique
+        - Colors, lighting, and mood
+        - Composition and layout
+        - Important details and textures
+        Provide a concise but comprehensive prompt suitable for image generation.
+        """
+        description = self._call_vision_model(prompt, [generated_img])
+        if description:
+            return description.strip()
+        else:
+            # Fallback prompt
+            return "A beautiful image with vibrant colors and detailed composition"
+    @weave.op()
+    def analyze_differences(self, generated_img: Image.Image, target_img: Image.Image) -> Dict[str, Any]:
+        """Analyze differences between generated and target images."""
+        analysis_prompt = """
+        Compare these two images and analyze their differences. The first image is generated, the second is the target.
+        Please provide a detailed analysis in JSON format with the following structure:
+        {
+            "missing_elements": ["list of elements present in target but missing in generated"],
+            "style_differences": ["list of style differences between the images"],
+            "color_differences": ["differences in color, lighting, or tone"],
+            "composition_differences": ["differences in layout, positioning, or framing"],
+            "quality_differences": ["differences in detail, sharpness, or overall quality"],
+            "similarity_score": "percentage of how similar the images are (0-100)",
+            "overall_assessment": "brief summary of the main differences"
+        }
+        Focus on identifying what elements, styles, or qualities are present in the target image but missing or different in the generated image.
+        """
+        response_text = self._call_vision_model(analysis_prompt, [generated_img, target_img])
+        if not response_text:
+            return {
+                "missing_elements": ["texture", "details"],
+                "style_differences": ["color intensity", "composition"],
+                "error": "Failed to analyze images"
+            }
+        try:
+            # Extract JSON from the response if it's wrapped in markdown
+            if "```json" in response_text:
+                json_start = response_text.find("```json") + 7
+                json_end = response_text.find("```", json_start)
+                json_text = response_text[json_start:json_end].strip()
+            elif "{" in response_text and "}" in response_text:
+                # Find the JSON object in the response
+                json_start = response_text.find("{")
+                json_end = response_text.rfind("}") + 1
+                json_text = response_text[json_start:json_end]
+            else:
+                json_text = response_text
+            analysis_result = json.loads(json_text)
+            # Ensure required keys exist with fallback values
+            if "missing_elements" not in analysis_result:
+                analysis_result["missing_elements"] = ["texture", "details"]
+            if "style_differences" not in analysis_result:
+                analysis_result["style_differences"] = ["color intensity", "composition"]
+            return analysis_result
+        except json.JSONDecodeError:
+            # If JSON parsing fails, return a structured response with fallback values
+            return {
+                "missing_elements": ["texture", "details"],
+                "style_differences": ["color intensity", "composition"],
+                "raw_analysis": response_text,
+                "note": "JSON parsing failed, using fallback analysis"
+            }
+    @weave.op()
+    def describe_image(self, image: Image.Image, custom_prompt: str = None) -> str:
+        """Generate a detailed description of an image."""
+        if not custom_prompt:
+            custom_prompt = "Please describe this image in detail, including objects, people, colors, setting, and any notable features."
+        description = self._call_vision_model(custom_prompt, [image])
+        return description if description else "Failed to generate description"
+# Utility functions for backward compatibility
+def encode_image_from_path(image_path: str) -> str:
+    """Encode image from file path to base64 string."""
+    try:
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    except FileNotFoundError:
+        print(f"Error: Image file not found at {image_path}")
+        return None
+    except Exception as e:
+        print(f"Error encoding image: {e}")
+        return None
+def describe_image_from_path(image_path: str, custom_prompt: str = None) -> str:
+    """Generate description for an image from file path."""
+    if not Path(image_path).exists():
+        print(f"Error: Image file does not exist at {image_path}")
+        return None
+    # Load image and use evaluator
+    image = Image.open(image_path)
+    evaluator = LlamaEvaluator()
+    return evaluator.describe_image(image, custom_prompt)
+def analyze_differences_from_paths(generated_img_path: str, target_img_path: str) -> Dict[str, Any]:
+    """Analyze differences between two images from file paths."""
+    try:
+        generated_img = Image.open(generated_img_path)
+        target_img = Image.open(target_img_path)
+        evaluator = LlamaEvaluator()
+        return evaluator.analyze_differences(generated_img, target_img)
+    except Exception as e:
+        return {
+            "missing_elements": ["texture", "details"],
+            "style_differences": ["color intensity", "composition"],
+            "error": str(e)
+        }
+# Example usage
+if __name__ == "__main__":
+    # Example 1: Using the class directly
+    evaluator = LlamaEvaluator()
+    # Load images
+    try:
+        image_path = "/Users/chuchwu/Downloads/happy-190806.jpg"
+        target_image = Image.open(image_path)
+        # Generate initial prompt
+        print("Generating initial prompt...")
+        initial_prompt = evaluator.generate_initial_prompt(target_image)
+        print(f"Initial Prompt: {initial_prompt}")
+        print("\n" + "="*50 + "\n")
+        # Describe the image
+        print("Describing image...")
+        description = evaluator.describe_image(target_image)
+        print(f"Description: {description}")
+        print("\n" + "="*50 + "\n")
+        # Example 2: Analyze differences (using same image for demo)
+        print("Analyzing differences...")
+        differences = evaluator.analyze_differences(target_image, target_image)
+        print("Difference Analysis:")
+        print(f"Missing Elements: {differences.get('missing_elements', [])}")
+        print(f"Style Differences: {differences.get('style_differences', [])}")
+        if 'similarity_score' in differences:
+            print(f"Similarity Score: {differences['similarity_score']}%")
+        if 'overall_assessment' in differences:
+            print(f"Overall Assessment: {differences['overall_assessment']}")
+    except FileNotFoundError:
+        print("Image file not found. Please update the image_path variable.")
+    except Exception as e:
+        print(f"Error: {e}")

image_generators.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import fal_client
+from PIL import Image
+from typing import Dict, Any
+import requests
+from io import BytesIO
+from weave_prompt import ImageGenerator
+from dotenv import load_dotenv
+load_dotenv()
+class FalImageGenerator(ImageGenerator):
+    """Handles image generation using fal_client."""
+    def __init__(self, model_name: str = "fal-ai/flux-pro"):
+        self.model_name = model_name
+    def _on_queue_update(self, update):
+        """Handle queue updates during image generation."""
+        if isinstance(update, fal_client.InProgress):
+            for log in update.logs:
+                print(log["message"])
+    def generate(self, prompt: str, **kwargs) -> Image.Image:
+        """Generate an image from a text prompt using fal_client."""
+        result = fal_client.subscribe(
+            self.model_name,
+            arguments={
+                "prompt": prompt,
+                **kwargs
+            },
+            with_logs=True,
+            on_queue_update=self._on_queue_update,
+        )
+        print(result)
+        return self._extract_image_from_result(result)
+    def _extract_image_from_result(self, result: Dict[str, Any]) -> Image.Image:
+        """Extract and download image from fal_client result."""
+        if result and 'images' in result and len(result['images']) > 0:
+            image_url = result['images'][0]['url']
+            response = requests.get(image_url)
+            response.raise_for_status()  # Raise an exception for bad status codes
+            image = Image.open(BytesIO(response.content))
+            return image
+        else:
+            raise ValueError("No image found in the result")

prompt_refiners.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Any, Dict
+import openai
+import weave
+import os
+from dotenv import load_dotenv
+from weave_prompt import PromptRefiner
+# Load environment variables from .env file
+load_dotenv()
+# Weave autopatches OpenAI to log LLM calls to W&B
+weave.init(project_name="meta-llama")
+class LlamaPromptRefiner(PromptRefiner):
+    @weave.op()
+    def refine_prompt(self, current_prompt: str, analysis: Dict[str, Any], similarity_score):
+        client = openai.OpenAI(
+            # The custom base URL points to W&B Inference
+            base_url='https://api.inference.wandb.ai/v1',
+            # Get your API key from https://wandb.ai/authorize
+            # Consider setting it in the environment as OPENAI_API_KEY instead for safety
+            api_key=os.getenv("WANDB_API_KEY"),
+        )
+        response = client.chat.completions.create(
+            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "You are an expert at prompt engineering for text-to-image models. "
+                        "Given a current prompt and an analysis of the differences between a generated image and a target image, "
+                        "your job is to suggest a new prompt that will make the generated image more similar to the target. "
+                        "Limit the new prompt to 100 words at most. "
+                        "The user message will contain two sections: one for the current prompt and one for the analysis, each delimited by 'START OF CURRENT PROMPT'/'END OF CURRENT PROMPT' and 'START OF ANALYSIS'/'END OF ANALYSIS'. "
+                        "Only return the improved prompt."
+                    )
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        f"<START OF CURRENT PROMPT>\n{current_prompt}\n<END OF CURRENT PROMPT>\n"
+                        f"<START OF ANALYSIS>\n{str(analysis)}\n<END OF ANALYSIS>\n"
+                        "Suggest a new, improved prompt. Only return the prompt. Do not exceed 100 words."
+                    )
+                }
+            ],
+        )
+        return response.choices[0].message.content

similarity_metrics.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from weave_prompt import ImageSimilarityMetric
+from PIL import Image
+import lpips
+import torch
+import numpy as np
+class LPIPSImageSimilarityMetric(ImageSimilarityMetric):
+    """Image similarity metric using LPIPS perceptual similarity."""
+    def __init__(self, net: str = 'alex', device: str = 'cpu'):
+        self.lpips_model = lpips.LPIPS(net=net).to(device)
+        self.device = device
+    def compute(self, generated_img: Image.Image, target_img: Image.Image) -> float:
+        def img_to_tensor(img):
+            img = img.convert('RGB')  # Ensure image has 3 channels for handling PNG
+            arr = np.array(img.resize((256, 256))).astype(np.float32) / 255.0
+            arr = arr.transpose(2, 0, 1)  # HWC to CHW
+            tensor = torch.tensor(arr).unsqueeze(0)
+            return tensor * 2 - 1  # LPIPS expects [-1, 1]
+        gen_tensor = img_to_tensor(generated_img).to(self.device)
+        tgt_tensor = img_to_tensor(target_img).to(self.device)
+        distance = self.lpips_model(gen_tensor, tgt_tensor).item()
+        similarity = max(0.0, 1.0 - distance)
+        return similarity

weave_prompt.py CHANGED Viewed

@@ -3,7 +3,7 @@ from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional, Union
 import PIL.Image as Image
-class TextToImageModel(ABC):
     """Abstract base class for text-to-image models."""
     @abstractmethod
@@ -85,7 +85,7 @@ class PromptOptimizer:
     """Main class that orchestrates the prompt optimization process."""
     def __init__(self,
-                 model: TextToImageModel,
                  evaluator: ImageEvaluator,
                  refiner: PromptRefiner,
                  similarity_metric: ImageSimilarityMetric,
@@ -94,7 +94,7 @@ class PromptOptimizer:
         """Initialize the optimizer.
         Args:
-            model: Text-to-image model to use
             evaluator: Image evaluator for generating initial prompt and analysis
             refiner: Prompt refinement strategy
             similarity_metric: Image similarity metric
@@ -102,7 +102,7 @@ class PromptOptimizer:
             similarity_threshold: Target similarity threshold for early stopping
         """
         # Configuration
-        self.model = model
         self.evaluator = evaluator
         self.refiner = refiner
         self.similarity_metric = similarity_metric
@@ -141,9 +141,9 @@ class PromptOptimizer:
         if self.target_img is None or self.current_prompt is None:
             raise RuntimeError("Must call initialize() before step()")
         if self.iteration >= self.max_iterations:
-            return True, self.current_prompt, self.model.generate(self.current_prompt)
         # Generate image with current prompt
-        generated_img = self.model.generate(self.current_prompt)
         # Evaluate similarity
         similarity = self.similarity_metric.compute(generated_img, self.target_img)
         # Analyze differences

 from typing import Any, Dict, List, Optional, Union
 import PIL.Image as Image
+class ImageGenerator(ABC):
     """Abstract base class for text-to-image models."""
     @abstractmethod
     """Main class that orchestrates the prompt optimization process."""
     def __init__(self,
+                 image_generator: ImageGenerator,
                  evaluator: ImageEvaluator,
                  refiner: PromptRefiner,
                  similarity_metric: ImageSimilarityMetric,
         """Initialize the optimizer.
         Args:
+            image_generator: Text-to-image generator to use
             evaluator: Image evaluator for generating initial prompt and analysis
             refiner: Prompt refinement strategy
             similarity_metric: Image similarity metric
             similarity_threshold: Target similarity threshold for early stopping
         """
         # Configuration
+        self.image_generator = image_generator
         self.evaluator = evaluator
         self.refiner = refiner
         self.similarity_metric = similarity_metric
         if self.target_img is None or self.current_prompt is None:
             raise RuntimeError("Must call initialize() before step()")
         if self.iteration >= self.max_iterations:
+            return True, self.current_prompt, self.image_generator.generate(self.current_prompt)
         # Generate image with current prompt
+        generated_img = self.image_generator.generate(self.current_prompt)
         # Evaluate similarity
         similarity = self.similarity_metric.compute(generated_img, self.target_img)
         # Analyze differences