Spaces:

pimcore
/

local-inference-service

Running

App Files Files Community

embeddings

by fashxp - opened Aug 28

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

-580

Files changed (5) hide show

Dockerfile +2 -8
docker-compose.yaml +2 -15
requirements.txt +1 -4
src/embeddings.py +0 -360
src/main.py +1 -193

Dockerfile CHANGED Viewed

@@ -4,20 +4,14 @@ RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
-	PATH=/home/user/.local/bin:$PATH \
-	PYTHONDONTWRITEBYTECODE=1 \
-	PYTHONUNBUFFERED=1
 WORKDIR $HOME/app
-# Copy requirements first for better caching
 COPY --chown=user requirements.txt requirements.txt
-# Install dependencies with caching
-RUN pip install --upgrade pip && \
-    pip install --no-cache-dir --user -r requirements.txt
-# Copy application code
 COPY --chown=user . .
 CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]

 USER user
 ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
 WORKDIR $HOME/app
 COPY --chown=user requirements.txt requirements.txt
+RUN pip install --upgrade -r requirements.txt
 COPY --chown=user . .
 CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]

docker-compose.yaml CHANGED Viewed

@@ -2,27 +2,14 @@ services:
   server:
     build:
       context: .
-      # Enable BuildKit for better caching
-      cache_from:
-        - python:3.9
     ports:
       - 7860:7860
     develop:
       watch:
-        # Only rebuild on requirements.txt changes, sync code changes otherwise
         - action: rebuild
-          path: ./requirements.txt
-        - action: sync
-          path: ./src
-          target: /home/user/app/src
-        - action: sync
-          path: ./README.md
-          target: /home/user/app/README.md
     volumes:
       - python-cache:/home/user/.cache
-      # Cache pip packages
-      - pip-cache:/home/user/.cache/pip
 volumes:
-  python-cache:
-  pip-cache:

   server:
     build:
       context: .
     ports:
       - 7860:7860
     develop:
       watch:
         - action: rebuild
+          path: .
     volumes:
       - python-cache:/home/user/.cache
 volumes:
+  python-cache:

requirements.txt CHANGED Viewed

@@ -6,7 +6,4 @@ sentencepiece
 sacremoses
 torch
 pillow
-protobuf
-# Optional dependencies for specific features
-einops

 sacremoses
 torch
 pillow
+# Optional dependencies for specific features

src/embeddings.py DELETED Viewed

@@ -1,360 +0,0 @@
-# -------------------------------------------------------------------
-# This source file is available under the terms of the
-# Pimcore Open Core License (POCL)
-# Full copyright and license information is available in
-# LICENSE.md which is distributed with this source code.
-#
-#  @copyright  Copyright (c) Pimcore GmbH (https://www.pimcore.com)
-#  @license    Pimcore Open Core License (POCL)
-# -------------------------------------------------------------------
-import torch
-import base64
-import io
-import logging
-from PIL import Image
-from pydantic import BaseModel
-from fastapi import Request, HTTPException
-import json
-from typing import Optional, Union, Dict, Any
-from transformers import AutoProcessor, AutoModel
-class EmbeddingRequest(BaseModel):
-    inputs: str
-    parameters: Optional[dict] = None
-class BaseEmbeddingTaskService:
-    """Base class for embedding services with common functionality"""
-    def __init__(self, logger: logging.Logger):
-        self._logger = logger
-        self._model_cache = {}
-        self._processor_cache = {}
-    async def get_embedding_request(self, request: Request) -> EmbeddingRequest:
-        """Parse request body into EmbeddingRequest"""
-        content_type = request.headers.get("content-type", "")
-        if content_type.startswith("application/json"):
-            data = await request.json()
-            return EmbeddingRequest(**data)
-        if content_type.startswith("application/x-www-form-urlencoded"):
-            raw = await request.body()
-            try:
-                data = json.loads(raw)
-                return EmbeddingRequest(**data)
-            except Exception:
-                try:
-                    data = json.loads(raw.decode("utf-8"))
-                    return EmbeddingRequest(**data)
-                except Exception:
-                    raise HTTPException(status_code=400, detail="Invalid request body")
-        raise HTTPException(status_code=400, detail="Unsupported content type")
-    def _get_device(self) -> torch.device:
-        """Get the appropriate device (GPU if available, otherwise CPU)"""
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self._logger.info(f"Using device: {device}")
-        return device
-    def _load_processor(self, model_name: str):
-        """Load and cache processor for the model using AutoProcessor"""
-        if model_name not in self._processor_cache:
-            try:
-                self._processor_cache[model_name] = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-                self._logger.info(f"Loaded processor for model: {model_name}")
-            except Exception as e:
-                self._logger.error(f"Failed to load processor for model '{model_name}': {str(e)}")
-                raise HTTPException(
-                    status_code=404,
-                    detail=f"Processor for model '{model_name}' could not be loaded: {str(e)}"
-                )
-        else:
-            self._logger.info(f"Using cached processor for model: {model_name}")
-        return self._processor_cache[model_name]
-    def _load_model(self, model_name: str, cache_suffix: str = ""):
-        """Load and cache model using AutoModel"""
-        cache_key = f"{model_name}{cache_suffix}"
-        if cache_key not in self._model_cache:
-            try:
-                device = self._get_device()
-                model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
-                model.to(device)
-                self._model_cache[cache_key] = model
-                self._logger.info(f"Loaded model: {model_name} on {device}")
-            except Exception as e:
-                self._logger.error(f"Failed to load model '{model_name}': {str(e)}")
-                raise HTTPException(
-                    status_code=404,
-                    detail=f"Model '{model_name}' could not be loaded: {str(e)}"
-                )
-        else:
-            self._logger.info(f"Using cached model: {model_name} (cache key: {cache_key})")
-        return self._model_cache[cache_key]
-    async def get_embedding_vector_size(self, model_name: str) -> dict:
-        """Get the vector size of embeddings for a given model"""
-        try:
-            # Load the model to get its configuration
-            model = self._load_model(model_name)
-            # Try to get the embedding dimension from the model configuration
-            used_attribute = None
-            if hasattr(model.config, 'hidden_size'):
-                vector_size = model.config.hidden_size
-                used_attribute = "hidden_size"
-            elif hasattr(model.config, 'projection_dim'):
-                vector_size = model.config.projection_dim
-                used_attribute = "projection_dim"
-            elif hasattr(model.config, 'd_model'):
-                vector_size = model.config.d_model
-                used_attribute = "d_model"
-            elif hasattr(model.config, 'text_config') and hasattr(model.config.text_config, 'hidden_size'):
-                vector_size = model.config.text_config.hidden_size
-                used_attribute = "text_config.hidden_size"
-            elif hasattr(model.config, 'vision_config') and hasattr(model.config.vision_config, 'hidden_size'):
-                vector_size = model.config.vision_config.hidden_size
-                used_attribute = "vision_config.hidden_size"
-            else:
-                # If we can't determine from config, we'll need to run a dummy inference
-                raise AttributeError("Could not determine vector size from model configuration")
-            self._logger.info(f"Model {model_name} has embedding vector size: {vector_size}")
-            return {
-                "model_name": model_name,
-                "vector_size": vector_size,
-                "config_attribute_used": used_attribute
-            }
-        except Exception as e:
-            self._logger.error(f"Failed to get vector size for model '{model_name}': {str(e)}")
-            raise HTTPException(
-                status_code=404,
-                detail=f"Could not determine vector size for model '{model_name}': {str(e)}"
-            )
-    def _extract_embeddings(self, model_output, model_name: str) -> torch.Tensor:
-        """Extract embeddings from model output with fallback strategies"""
-        # Try different embedding extraction methods in order of preference
-        # 1. Check for pooler_output (most common)
-        if hasattr(model_output, 'pooler_output') and model_output.pooler_output is not None:
-            self._logger.debug(f"Using pooler_output for {model_name}")
-            return model_output.pooler_output
-        # 2. Check for last_hidden_state and pool it
-        if hasattr(model_output, 'last_hidden_state') and model_output.last_hidden_state is not None:
-            self._logger.debug(f"Using pooled last_hidden_state for {model_name}")
-            # Mean pooling over sequence dimension
-            return model_output.last_hidden_state.mean(dim=1)
-        # 3. Check for image_embeds (CLIP-style models)
-        if hasattr(model_output, 'image_embeds') and model_output.image_embeds is not None:
-            self._logger.debug(f"Using image_embeds for {model_name}")
-            return model_output.image_embeds
-        # 4. Check for text_embeds (CLIP-style models)
-        if hasattr(model_output, 'text_embeds') and model_output.text_embeds is not None:
-            self._logger.debug(f"Using text_embeds for {model_name}")
-            return model_output.text_embeds
-        # 5. Fallback: try to use the output directly if it's a tensor
-        if isinstance(model_output, torch.Tensor):
-            self._logger.debug(f"Using direct tensor output for {model_name}")
-            return model_output
-        # 6. Last resort: check if output is a tuple and use the first element
-        if isinstance(model_output, tuple) and len(model_output) > 0:
-            self._logger.debug(f"Using first element of tuple output for {model_name}")
-            return model_output[0]
-        # If none of the above work, raise an error
-        raise HTTPException(
-            status_code=500,
-            detail=f"Could not extract embeddings from model output for {model_name}. "
-                   f"Available attributes: {dir(model_output) if hasattr(model_output, '__dict__') else 'Unknown'}"
-        )
-class ImageEmbeddingTaskService(BaseEmbeddingTaskService):
-    """Service for generating image embeddings"""
-    def _decode_base64_image(self, base64_string: str) -> Image.Image:
-        """Decode base64 string to PIL Image"""
-        try:
-            # Remove data URL prefix if present
-            if base64_string.startswith('data:image'):
-                base64_string = base64_string.split(',')[1]
-            image_data = base64.b64decode(base64_string)
-            image = Image.open(io.BytesIO(image_data))
-            # Convert to RGB if necessary
-            if image.mode != 'RGB':
-                image = image.convert('RGB')
-            return image
-        except Exception as e:
-            raise HTTPException(status_code=400, detail=f"Invalid image data: {str(e)}")
-    def _generate_image_embeddings(self, image: Image.Image, model, processor, model_name: str) -> list:
-        """Generate embeddings for an image"""
-        device = self._get_device()
-        # Process the image
-        inputs = processor(images=image, return_tensors="pt", padding=True)
-        # Move inputs to the same device as the model
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-        # Get the embeddings
-        with torch.no_grad():
-            # Try using specialized methods first for CLIP-like models
-            if hasattr(model, 'get_image_features'):
-                self._logger.debug(f"Using get_image_features for {model_name}")
-                embeddings = model.get_image_features(pixel_values=inputs.get('pixel_values'))
-            elif hasattr(model, 'vision_model'):
-                self._logger.debug(f"Using vision_model for {model_name}")
-                vision_outputs = model.vision_model(**inputs)
-                embeddings = self._extract_embeddings(vision_outputs, model_name)
-            else:
-                self._logger.debug(f"Using full model for {model_name}")
-                outputs = model(**inputs)
-                embeddings = self._extract_embeddings(outputs, model_name)
-        self._logger.info(f"Image embedding shape: {embeddings.shape}")
-        # Move back to CPU before converting to numpy
-        embeddings_array = embeddings.cpu().numpy()
-        return embeddings_array[0].tolist()
-    async def generate_embedding(self, request: Request, model_name: str):
-        """Main method to generate image embeddings"""
-        embedding_request: EmbeddingRequest = await self.get_embedding_request(request)
-        self._logger.info(f"Generating image embedding for model: {model_name}")
-        # Load processor and model using auto-detection
-        processor = self._load_processor(model_name)
-        model = self._load_model(model_name, "_image")
-        # Decode image from base64
-        image = self._decode_base64_image(embedding_request.inputs)
-        try:
-            # Generate embeddings
-            embeddings = self._generate_image_embeddings(image, model, processor, model_name)
-            self._logger.info("Image embedding generation completed")
-            return {"embeddings": embeddings}
-        except Exception as e:
-            self._logger.error(f"Embedding generation failed for model '{model_name}': {str(e)}")
-            raise HTTPException(
-                status_code=500,
-                detail=f"Embedding generation failed: {str(e)}"
-            )
-    async def generate_embedding_from_upload(self, uploaded_file, model_name: str):
-        """Generate image embeddings from uploaded file"""
-        from fastapi import UploadFile
-        self._logger.info(f"Generating image embedding from uploaded file for model: {model_name}")
-        # Validate file type
-        if not uploaded_file.content_type.startswith('image/'):
-            raise HTTPException(
-                status_code=400,
-                detail=f"Invalid file type: {uploaded_file.content_type}. Only image files are supported."
-            )
-        try:
-            # Read file content
-            file_content = await uploaded_file.read()
-            # Convert to PIL Image
-            image = Image.open(io.BytesIO(file_content)).convert('RGB')
-            # Load processor and model using auto-detection
-            processor = self._load_processor(model_name)
-            model = self._load_model(model_name, "_image")
-            # Generate embeddings
-            embeddings = self._generate_image_embeddings(image, model, processor, model_name)
-            self._logger.info("Image embedding generation from upload completed")
-            return {"embeddings": embeddings}
-        except Exception as e:
-            self._logger.error(f"Embedding generation from upload failed for model '{model_name}': {str(e)}")
-            raise HTTPException(
-                status_code=500,
-                detail=f"Embedding generation from upload failed: {str(e)}"
-            )
-class TextEmbeddingTaskService(BaseEmbeddingTaskService):
-    """Service for generating text embeddings"""
-    def _generate_text_embeddings(self, text: str, model, processor, model_name: str) -> list:
-        """Generate embeddings for text"""
-        device = self._get_device()
-        # Process the text
-        inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
-        # Move inputs to the same device as the model
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-        # Get the embeddings
-        with torch.no_grad():
-            # Try using specialized methods first for CLIP-like models
-            if hasattr(model, 'get_text_features'):
-                self._logger.debug(f"Using get_text_features for {model_name}")
-                embeddings = model.get_text_features(
-                    input_ids=inputs.get('input_ids'),
-                    attention_mask=inputs.get('attention_mask')
-                )
-            elif hasattr(model, 'text_model'):
-                self._logger.debug(f"Using text_model for {model_name}")
-                text_outputs = model.text_model(**inputs)
-                embeddings = self._extract_embeddings(text_outputs, model_name)
-            else:
-                self._logger.debug(f"Using full model for {model_name}")
-                outputs = model(**inputs)
-                embeddings = self._extract_embeddings(outputs, model_name)
-        self._logger.info(f"Text embedding shape: {embeddings.shape}")
-        # Move back to CPU before converting to numpy
-        embeddings_array = embeddings.cpu().numpy()
-        return embeddings_array[0].tolist()
-    async def generate_embedding(self, request: Request, model_name: str):
-        """Main method to generate text embeddings"""
-        embedding_request: EmbeddingRequest = await self.get_embedding_request(request)
-        self._logger.info(f"Generating text embedding for: {embedding_request.inputs[:500]}...")
-        # Load processor and model using auto-detection
-        processor = self._load_processor(model_name)
-        model = self._load_model(model_name, "_text")
-        try:
-            # Generate embeddings
-            embeddings = self._generate_text_embeddings(embedding_request.inputs, model, processor, model_name)
-            self._logger.info("Text embedding generation completed")
-            return {"embeddings": embeddings}
-        except Exception as e:
-            self._logger.error(f"Embedding generation failed for model '{model_name}': {str(e)}")
-            raise HTTPException(
-                status_code=500,
-                detail=f"Embedding generation failed: {str(e)}"
-            )

src/main.py CHANGED Viewed

@@ -10,14 +10,13 @@
 import torch
-from fastapi import FastAPI, Path, Request, File, UploadFile
 import logging
 import sys
 from .translation_task import TranslationTaskService
 from .classification import ClassificationTaskService
 from .text_to_image import TextToImageTaskService
-from .embeddings import ImageEmbeddingTaskService, TextEmbeddingTaskService
 app = FastAPI(
     title="Pimcore Local Inference Service",
@@ -29,10 +28,6 @@ logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s')
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
-# Create singleton instances of embedding services to enable model caching across requests
-image_embedding_service = ImageEmbeddingTaskService(logger)
-text_embedding_service = TextEmbeddingTaskService(logger)
 class StreamToLogger(object):
     def __init__(self, logger, log_level):
@@ -299,190 +294,3 @@ async def image_to_text(
     model_name = model_name.rstrip("/")
     imageToTextTask = TextToImageTaskService(logger)
     return await imageToTextTask.extract(request, model_name)
-# =========================
-# Image Embedding Task
-# =========================
-@app.post(
-    "/image-embedding/{model_name:path}",
-    openapi_extra={
-        "requestBody": {
-            "content": {
-                "application/json": {
-                    "example": {
-                        "inputs": "base64_encoded_image_string"
-                    }
-                }
-            }
-        }
-    }
-)
-async def image_embedding(
-    request: Request,
-    model_name: str = Path(
-        ...,
-        description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
-        example="google/siglip-so400m-patch14-384"
-    )
-    ):
-    """
-    Generate embedding vectors for image data.
-    The service supports multiple model types including SigLIP, CLIP, and BLIP models.
-    Returns a dense vector representation of the input image.
-    Returns:
-        list: The embedding vector as a list of float values.
-    """
-    model_name = model_name.rstrip("/")
-    return await image_embedding_service.generate_embedding(request, model_name)
-# =========================
-# Image Embedding Upload Task (Development/Testing)
-# =========================
-@app.post(
-    "/image-embedding-upload/{model_name:path}",
-    openapi_extra={
-        "requestBody": {
-            "content": {
-                "multipart/form-data": {
-                    "schema": {
-                        "type": "object",
-                        "properties": {
-                            "image": {
-                                "type": "string",
-                                "format": "binary",
-                                "description": "Image file to upload for embedding generation"
-                            }
-                        },
-                        "required": ["image"]
-                    }
-                }
-            }
-        },
-        "responses": {
-            "200": {
-                "description": "Image embedding vector",
-                "content": {
-                    "application/json": {
-                        "example": {
-                            "embeddings": [0.1, -0.2, 0.3, "..."]
-                        }
-                    }
-                }
-            }
-        }
-    }
-)
-async def image_embedding_upload(
-    image: UploadFile = File(..., description="Image file to generate embeddings for"),
-    model_name: str = Path(
-        ...,
-        description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
-        example="google/siglip-so400m-patch14-384"
-    )
-    ):
-    """
-    Generate embedding vectors for uploaded image data (Development/Testing endpoint).
-    This endpoint allows you to upload an image file directly through the Swagger UI
-    for development and testing purposes. The image is processed and converted to
-    embedding vectors using the specified model.
-    Supported formats: JPEG, PNG, GIF, BMP, TIFF
-    The service supports multiple model types including SigLIP, CLIP, and BLIP models.
-    Returns a dense vector representation of the uploaded image.
-    Returns:
-        dict: The embedding vector as a list of float values.
-    """
-    model_name = model_name.rstrip("/")
-    return await image_embedding_service.generate_embedding_from_upload(image, model_name)
-# =========================
-# Text Embedding Task
-# =========================
-@app.post(
-    "/text-embedding/{model_name:path}",
-    openapi_extra={
-        "requestBody": {
-            "content": {
-                "application/json": {
-                    "example": {
-                        "inputs": "text to embed"
-                    }
-                }
-            }
-        }
-    }
-)
-async def text_embedding(
-    request: Request,
-    model_name: str = Path(
-        ...,
-        description="The name of the text embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
-        example="google/siglip-so400m-patch14-384"
-    )
-    ):
-    """
-    Generate embedding vectors for text data.
-    The service supports multiple model types including SigLIP, CLIP, and BLIP models.
-    Returns a dense vector representation of the input text.
-    Returns:
-        list: The embedding vector as a list of float values.
-    """
-    model_name = model_name.rstrip("/")
-    return await text_embedding_service.generate_embedding(request, model_name)
-# =========================
-# Embedding Vector Size
-# =========================
-@app.get(
-    "/embedding-vector-size/{model_name:path}",
-    openapi_extra={
-        "responses": {
-            "200": {
-                "description": "Vector size information",
-                "content": {
-                    "application/json": {
-                        "example": {
-                            "model_name": "google/siglip-so400m-patch14-384",
-                            "vector_size": 1152,
-                            "config_attribute_used": "hidden_size"
-                        }
-                    }
-                }
-            }
-        }
-    }
-)
-async def embedding_vector_size(
-    model_name: str = Path(
-        ...,
-        description="The name of the embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
-        example="google/siglip-so400m-patch14-384"
-    )
-    ):
-    """
-    Get the vector size of embeddings for a given model.
-    This endpoint returns the dimensionality of the embedding vectors that the model produces.
-    Useful for understanding the output format before generating embeddings.
-    Returns:
-        dict: Information about the vector size including model name, vector size, and configuration attribute used.
-    """
-    model_name = model_name.rstrip("/")
-    # We can use either embedding service as they inherit from the same base class
-    return await image_embedding_service.get_embedding_vector_size(model_name)

 import torch
+from fastapi import FastAPI, Path, Request
 import logging
 import sys
 from .translation_task import TranslationTaskService
 from .classification import ClassificationTaskService
 from .text_to_image import TextToImageTaskService
 app = FastAPI(
     title="Pimcore Local Inference Service",
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 class StreamToLogger(object):
     def __init__(self, logger, log_level):
     model_name = model_name.rstrip("/")
     imageToTextTask = TextToImageTaskService(logger)
     return await imageToTextTask.extract(request, model_name)