Files changed (5) hide show
  1. Dockerfile +2 -8
  2. docker-compose.yaml +2 -15
  3. requirements.txt +1 -4
  4. src/embeddings.py +0 -360
  5. src/main.py +1 -193
Dockerfile CHANGED
@@ -4,20 +4,14 @@ RUN useradd -m -u 1000 user
4
  USER user
5
 
6
  ENV HOME=/home/user \
7
- PATH=/home/user/.local/bin:$PATH \
8
- PYTHONDONTWRITEBYTECODE=1 \
9
- PYTHONUNBUFFERED=1
10
 
11
  WORKDIR $HOME/app
12
 
13
- # Copy requirements first for better caching
14
  COPY --chown=user requirements.txt requirements.txt
15
 
16
- # Install dependencies with caching
17
- RUN pip install --upgrade pip && \
18
- pip install --no-cache-dir --user -r requirements.txt
19
 
20
- # Copy application code
21
  COPY --chown=user . .
22
 
23
  CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
4
  USER user
5
 
6
  ENV HOME=/home/user \
7
+ PATH=/home/user/.local/bin:$PATH
 
 
8
 
9
  WORKDIR $HOME/app
10
 
 
11
  COPY --chown=user requirements.txt requirements.txt
12
 
13
+ RUN pip install --upgrade -r requirements.txt
 
 
14
 
 
15
  COPY --chown=user . .
16
 
17
  CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]
docker-compose.yaml CHANGED
@@ -2,27 +2,14 @@ services:
2
  server:
3
  build:
4
  context: .
5
- # Enable BuildKit for better caching
6
- cache_from:
7
- - python:3.9
8
  ports:
9
  - 7860:7860
10
  develop:
11
  watch:
12
- # Only rebuild on requirements.txt changes, sync code changes otherwise
13
  - action: rebuild
14
- path: ./requirements.txt
15
- - action: sync
16
- path: ./src
17
- target: /home/user/app/src
18
- - action: sync
19
- path: ./README.md
20
- target: /home/user/app/README.md
21
  volumes:
22
  - python-cache:/home/user/.cache
23
- # Cache pip packages
24
- - pip-cache:/home/user/.cache/pip
25
 
26
  volumes:
27
- python-cache:
28
- pip-cache:
 
2
  server:
3
  build:
4
  context: .
 
 
 
5
  ports:
6
  - 7860:7860
7
  develop:
8
  watch:
 
9
  - action: rebuild
10
+ path: .
 
 
 
 
 
 
11
  volumes:
12
  - python-cache:/home/user/.cache
 
 
13
 
14
  volumes:
15
+ python-cache:
 
requirements.txt CHANGED
@@ -6,7 +6,4 @@ sentencepiece
6
  sacremoses
7
  torch
8
  pillow
9
- protobuf
10
-
11
- # Optional dependencies for specific features
12
- einops
 
6
  sacremoses
7
  torch
8
  pillow
9
+ # Optional dependencies for specific features
 
 
 
src/embeddings.py DELETED
@@ -1,360 +0,0 @@
1
- # -------------------------------------------------------------------
2
- # This source file is available under the terms of the
3
- # Pimcore Open Core License (POCL)
4
- # Full copyright and license information is available in
5
- # LICENSE.md which is distributed with this source code.
6
- #
7
- # @copyright Copyright (c) Pimcore GmbH (https://www.pimcore.com)
8
- # @license Pimcore Open Core License (POCL)
9
- # -------------------------------------------------------------------
10
-
11
- import torch
12
- import base64
13
- import io
14
- import logging
15
- from PIL import Image
16
- from pydantic import BaseModel
17
- from fastapi import Request, HTTPException
18
- import json
19
- from typing import Optional, Union, Dict, Any
20
- from transformers import AutoProcessor, AutoModel
21
-
22
-
23
- class EmbeddingRequest(BaseModel):
24
- inputs: str
25
- parameters: Optional[dict] = None
26
-
27
-
28
- class BaseEmbeddingTaskService:
29
- """Base class for embedding services with common functionality"""
30
-
31
- def __init__(self, logger: logging.Logger):
32
- self._logger = logger
33
- self._model_cache = {}
34
- self._processor_cache = {}
35
-
36
- async def get_embedding_request(self, request: Request) -> EmbeddingRequest:
37
- """Parse request body into EmbeddingRequest"""
38
- content_type = request.headers.get("content-type", "")
39
- if content_type.startswith("application/json"):
40
- data = await request.json()
41
- return EmbeddingRequest(**data)
42
- if content_type.startswith("application/x-www-form-urlencoded"):
43
- raw = await request.body()
44
- try:
45
- data = json.loads(raw)
46
- return EmbeddingRequest(**data)
47
- except Exception:
48
- try:
49
- data = json.loads(raw.decode("utf-8"))
50
- return EmbeddingRequest(**data)
51
- except Exception:
52
- raise HTTPException(status_code=400, detail="Invalid request body")
53
- raise HTTPException(status_code=400, detail="Unsupported content type")
54
-
55
- def _get_device(self) -> torch.device:
56
- """Get the appropriate device (GPU if available, otherwise CPU)"""
57
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
58
- self._logger.info(f"Using device: {device}")
59
- return device
60
-
61
- def _load_processor(self, model_name: str):
62
- """Load and cache processor for the model using AutoProcessor"""
63
- if model_name not in self._processor_cache:
64
- try:
65
- self._processor_cache[model_name] = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
66
- self._logger.info(f"Loaded processor for model: {model_name}")
67
- except Exception as e:
68
- self._logger.error(f"Failed to load processor for model '{model_name}': {str(e)}")
69
- raise HTTPException(
70
- status_code=404,
71
- detail=f"Processor for model '{model_name}' could not be loaded: {str(e)}"
72
- )
73
- else:
74
- self._logger.info(f"Using cached processor for model: {model_name}")
75
- return self._processor_cache[model_name]
76
-
77
- def _load_model(self, model_name: str, cache_suffix: str = ""):
78
- """Load and cache model using AutoModel"""
79
- cache_key = f"{model_name}{cache_suffix}"
80
- if cache_key not in self._model_cache:
81
- try:
82
- device = self._get_device()
83
- model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
84
- model.to(device)
85
- self._model_cache[cache_key] = model
86
- self._logger.info(f"Loaded model: {model_name} on {device}")
87
- except Exception as e:
88
- self._logger.error(f"Failed to load model '{model_name}': {str(e)}")
89
- raise HTTPException(
90
- status_code=404,
91
- detail=f"Model '{model_name}' could not be loaded: {str(e)}"
92
- )
93
- else:
94
- self._logger.info(f"Using cached model: {model_name} (cache key: {cache_key})")
95
- return self._model_cache[cache_key]
96
-
97
- async def get_embedding_vector_size(self, model_name: str) -> dict:
98
- """Get the vector size of embeddings for a given model"""
99
- try:
100
- # Load the model to get its configuration
101
- model = self._load_model(model_name)
102
-
103
- # Try to get the embedding dimension from the model configuration
104
- used_attribute = None
105
- if hasattr(model.config, 'hidden_size'):
106
- vector_size = model.config.hidden_size
107
- used_attribute = "hidden_size"
108
- elif hasattr(model.config, 'projection_dim'):
109
- vector_size = model.config.projection_dim
110
- used_attribute = "projection_dim"
111
- elif hasattr(model.config, 'd_model'):
112
- vector_size = model.config.d_model
113
- used_attribute = "d_model"
114
- elif hasattr(model.config, 'text_config') and hasattr(model.config.text_config, 'hidden_size'):
115
- vector_size = model.config.text_config.hidden_size
116
- used_attribute = "text_config.hidden_size"
117
- elif hasattr(model.config, 'vision_config') and hasattr(model.config.vision_config, 'hidden_size'):
118
- vector_size = model.config.vision_config.hidden_size
119
- used_attribute = "vision_config.hidden_size"
120
- else:
121
- # If we can't determine from config, we'll need to run a dummy inference
122
- raise AttributeError("Could not determine vector size from model configuration")
123
-
124
- self._logger.info(f"Model {model_name} has embedding vector size: {vector_size}")
125
- return {
126
- "model_name": model_name,
127
- "vector_size": vector_size,
128
- "config_attribute_used": used_attribute
129
- }
130
-
131
- except Exception as e:
132
- self._logger.error(f"Failed to get vector size for model '{model_name}': {str(e)}")
133
- raise HTTPException(
134
- status_code=404,
135
- detail=f"Could not determine vector size for model '{model_name}': {str(e)}"
136
- )
137
-
138
- def _extract_embeddings(self, model_output, model_name: str) -> torch.Tensor:
139
- """Extract embeddings from model output with fallback strategies"""
140
-
141
- # Try different embedding extraction methods in order of preference
142
-
143
- # 1. Check for pooler_output (most common)
144
- if hasattr(model_output, 'pooler_output') and model_output.pooler_output is not None:
145
- self._logger.debug(f"Using pooler_output for {model_name}")
146
- return model_output.pooler_output
147
-
148
- # 2. Check for last_hidden_state and pool it
149
- if hasattr(model_output, 'last_hidden_state') and model_output.last_hidden_state is not None:
150
- self._logger.debug(f"Using pooled last_hidden_state for {model_name}")
151
- # Mean pooling over sequence dimension
152
- return model_output.last_hidden_state.mean(dim=1)
153
-
154
- # 3. Check for image_embeds (CLIP-style models)
155
- if hasattr(model_output, 'image_embeds') and model_output.image_embeds is not None:
156
- self._logger.debug(f"Using image_embeds for {model_name}")
157
- return model_output.image_embeds
158
-
159
- # 4. Check for text_embeds (CLIP-style models)
160
- if hasattr(model_output, 'text_embeds') and model_output.text_embeds is not None:
161
- self._logger.debug(f"Using text_embeds for {model_name}")
162
- return model_output.text_embeds
163
-
164
- # 5. Fallback: try to use the output directly if it's a tensor
165
- if isinstance(model_output, torch.Tensor):
166
- self._logger.debug(f"Using direct tensor output for {model_name}")
167
- return model_output
168
-
169
- # 6. Last resort: check if output is a tuple and use the first element
170
- if isinstance(model_output, tuple) and len(model_output) > 0:
171
- self._logger.debug(f"Using first element of tuple output for {model_name}")
172
- return model_output[0]
173
-
174
- # If none of the above work, raise an error
175
- raise HTTPException(
176
- status_code=500,
177
- detail=f"Could not extract embeddings from model output for {model_name}. "
178
- f"Available attributes: {dir(model_output) if hasattr(model_output, '__dict__') else 'Unknown'}"
179
- )
180
-
181
-
182
- class ImageEmbeddingTaskService(BaseEmbeddingTaskService):
183
- """Service for generating image embeddings"""
184
-
185
- def _decode_base64_image(self, base64_string: str) -> Image.Image:
186
- """Decode base64 string to PIL Image"""
187
- try:
188
- # Remove data URL prefix if present
189
- if base64_string.startswith('data:image'):
190
- base64_string = base64_string.split(',')[1]
191
-
192
- image_data = base64.b64decode(base64_string)
193
- image = Image.open(io.BytesIO(image_data))
194
-
195
- # Convert to RGB if necessary
196
- if image.mode != 'RGB':
197
- image = image.convert('RGB')
198
-
199
- return image
200
- except Exception as e:
201
- raise HTTPException(status_code=400, detail=f"Invalid image data: {str(e)}")
202
-
203
- def _generate_image_embeddings(self, image: Image.Image, model, processor, model_name: str) -> list:
204
- """Generate embeddings for an image"""
205
- device = self._get_device()
206
-
207
- # Process the image
208
- inputs = processor(images=image, return_tensors="pt", padding=True)
209
-
210
- # Move inputs to the same device as the model
211
- inputs = {k: v.to(device) for k, v in inputs.items()}
212
-
213
- # Get the embeddings
214
- with torch.no_grad():
215
- # Try using specialized methods first for CLIP-like models
216
- if hasattr(model, 'get_image_features'):
217
- self._logger.debug(f"Using get_image_features for {model_name}")
218
- embeddings = model.get_image_features(pixel_values=inputs.get('pixel_values'))
219
- elif hasattr(model, 'vision_model'):
220
- self._logger.debug(f"Using vision_model for {model_name}")
221
- vision_outputs = model.vision_model(**inputs)
222
- embeddings = self._extract_embeddings(vision_outputs, model_name)
223
- else:
224
- self._logger.debug(f"Using full model for {model_name}")
225
- outputs = model(**inputs)
226
- embeddings = self._extract_embeddings(outputs, model_name)
227
-
228
- self._logger.info(f"Image embedding shape: {embeddings.shape}")
229
-
230
- # Move back to CPU before converting to numpy
231
- embeddings_array = embeddings.cpu().numpy()
232
-
233
- return embeddings_array[0].tolist()
234
-
235
- async def generate_embedding(self, request: Request, model_name: str):
236
- """Main method to generate image embeddings"""
237
- embedding_request: EmbeddingRequest = await self.get_embedding_request(request)
238
-
239
- self._logger.info(f"Generating image embedding for model: {model_name}")
240
-
241
- # Load processor and model using auto-detection
242
- processor = self._load_processor(model_name)
243
- model = self._load_model(model_name, "_image")
244
-
245
- # Decode image from base64
246
- image = self._decode_base64_image(embedding_request.inputs)
247
-
248
- try:
249
- # Generate embeddings
250
- embeddings = self._generate_image_embeddings(image, model, processor, model_name)
251
-
252
- self._logger.info("Image embedding generation completed")
253
- return {"embeddings": embeddings}
254
-
255
- except Exception as e:
256
- self._logger.error(f"Embedding generation failed for model '{model_name}': {str(e)}")
257
- raise HTTPException(
258
- status_code=500,
259
- detail=f"Embedding generation failed: {str(e)}"
260
- )
261
-
262
- async def generate_embedding_from_upload(self, uploaded_file, model_name: str):
263
- """Generate image embeddings from uploaded file"""
264
- from fastapi import UploadFile
265
-
266
- self._logger.info(f"Generating image embedding from uploaded file for model: {model_name}")
267
-
268
- # Validate file type
269
- if not uploaded_file.content_type.startswith('image/'):
270
- raise HTTPException(
271
- status_code=400,
272
- detail=f"Invalid file type: {uploaded_file.content_type}. Only image files are supported."
273
- )
274
-
275
- try:
276
- # Read file content
277
- file_content = await uploaded_file.read()
278
-
279
- # Convert to PIL Image
280
- image = Image.open(io.BytesIO(file_content)).convert('RGB')
281
-
282
- # Load processor and model using auto-detection
283
- processor = self._load_processor(model_name)
284
- model = self._load_model(model_name, "_image")
285
-
286
- # Generate embeddings
287
- embeddings = self._generate_image_embeddings(image, model, processor, model_name)
288
-
289
- self._logger.info("Image embedding generation from upload completed")
290
- return {"embeddings": embeddings}
291
-
292
- except Exception as e:
293
- self._logger.error(f"Embedding generation from upload failed for model '{model_name}': {str(e)}")
294
- raise HTTPException(
295
- status_code=500,
296
- detail=f"Embedding generation from upload failed: {str(e)}"
297
- )
298
-
299
-
300
- class TextEmbeddingTaskService(BaseEmbeddingTaskService):
301
- """Service for generating text embeddings"""
302
-
303
- def _generate_text_embeddings(self, text: str, model, processor, model_name: str) -> list:
304
- """Generate embeddings for text"""
305
- device = self._get_device()
306
-
307
- # Process the text
308
- inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
309
-
310
- # Move inputs to the same device as the model
311
- inputs = {k: v.to(device) for k, v in inputs.items()}
312
-
313
- # Get the embeddings
314
- with torch.no_grad():
315
- # Try using specialized methods first for CLIP-like models
316
- if hasattr(model, 'get_text_features'):
317
- self._logger.debug(f"Using get_text_features for {model_name}")
318
- embeddings = model.get_text_features(
319
- input_ids=inputs.get('input_ids'),
320
- attention_mask=inputs.get('attention_mask')
321
- )
322
- elif hasattr(model, 'text_model'):
323
- self._logger.debug(f"Using text_model for {model_name}")
324
- text_outputs = model.text_model(**inputs)
325
- embeddings = self._extract_embeddings(text_outputs, model_name)
326
- else:
327
- self._logger.debug(f"Using full model for {model_name}")
328
- outputs = model(**inputs)
329
- embeddings = self._extract_embeddings(outputs, model_name)
330
-
331
- self._logger.info(f"Text embedding shape: {embeddings.shape}")
332
-
333
- # Move back to CPU before converting to numpy
334
- embeddings_array = embeddings.cpu().numpy()
335
-
336
- return embeddings_array[0].tolist()
337
-
338
- async def generate_embedding(self, request: Request, model_name: str):
339
- """Main method to generate text embeddings"""
340
- embedding_request: EmbeddingRequest = await self.get_embedding_request(request)
341
-
342
- self._logger.info(f"Generating text embedding for: {embedding_request.inputs[:500]}...")
343
-
344
- # Load processor and model using auto-detection
345
- processor = self._load_processor(model_name)
346
- model = self._load_model(model_name, "_text")
347
-
348
- try:
349
- # Generate embeddings
350
- embeddings = self._generate_text_embeddings(embedding_request.inputs, model, processor, model_name)
351
-
352
- self._logger.info("Text embedding generation completed")
353
- return {"embeddings": embeddings}
354
-
355
- except Exception as e:
356
- self._logger.error(f"Embedding generation failed for model '{model_name}': {str(e)}")
357
- raise HTTPException(
358
- status_code=500,
359
- detail=f"Embedding generation failed: {str(e)}"
360
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/main.py CHANGED
@@ -10,14 +10,13 @@
10
 
11
  import torch
12
 
13
- from fastapi import FastAPI, Path, Request, File, UploadFile
14
  import logging
15
  import sys
16
 
17
  from .translation_task import TranslationTaskService
18
  from .classification import ClassificationTaskService
19
  from .text_to_image import TextToImageTaskService
20
- from .embeddings import ImageEmbeddingTaskService, TextEmbeddingTaskService
21
 
22
  app = FastAPI(
23
  title="Pimcore Local Inference Service",
@@ -29,10 +28,6 @@ logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s')
29
  logger = logging.getLogger(__name__)
30
  logger.setLevel(logging.DEBUG)
31
 
32
- # Create singleton instances of embedding services to enable model caching across requests
33
- image_embedding_service = ImageEmbeddingTaskService(logger)
34
- text_embedding_service = TextEmbeddingTaskService(logger)
35
-
36
 
37
  class StreamToLogger(object):
38
  def __init__(self, logger, log_level):
@@ -299,190 +294,3 @@ async def image_to_text(
299
  model_name = model_name.rstrip("/")
300
  imageToTextTask = TextToImageTaskService(logger)
301
  return await imageToTextTask.extract(request, model_name)
302
-
303
-
304
- # =========================
305
- # Image Embedding Task
306
- # =========================
307
- @app.post(
308
- "/image-embedding/{model_name:path}",
309
- openapi_extra={
310
- "requestBody": {
311
- "content": {
312
- "application/json": {
313
- "example": {
314
- "inputs": "base64_encoded_image_string"
315
- }
316
- }
317
- }
318
- }
319
- }
320
- )
321
- async def image_embedding(
322
- request: Request,
323
- model_name: str = Path(
324
- ...,
325
- description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
326
- example="google/siglip-so400m-patch14-384"
327
- )
328
- ):
329
- """
330
- Generate embedding vectors for image data.
331
-
332
- The service supports multiple model types including SigLIP, CLIP, and BLIP models.
333
- Returns a dense vector representation of the input image.
334
-
335
- Returns:
336
- list: The embedding vector as a list of float values.
337
- """
338
-
339
- model_name = model_name.rstrip("/")
340
- return await image_embedding_service.generate_embedding(request, model_name)
341
-
342
-
343
- # =========================
344
- # Image Embedding Upload Task (Development/Testing)
345
- # =========================
346
- @app.post(
347
- "/image-embedding-upload/{model_name:path}",
348
- openapi_extra={
349
- "requestBody": {
350
- "content": {
351
- "multipart/form-data": {
352
- "schema": {
353
- "type": "object",
354
- "properties": {
355
- "image": {
356
- "type": "string",
357
- "format": "binary",
358
- "description": "Image file to upload for embedding generation"
359
- }
360
- },
361
- "required": ["image"]
362
- }
363
- }
364
- }
365
- },
366
- "responses": {
367
- "200": {
368
- "description": "Image embedding vector",
369
- "content": {
370
- "application/json": {
371
- "example": {
372
- "embeddings": [0.1, -0.2, 0.3, "..."]
373
- }
374
- }
375
- }
376
- }
377
- }
378
- }
379
- )
380
- async def image_embedding_upload(
381
- image: UploadFile = File(..., description="Image file to generate embeddings for"),
382
- model_name: str = Path(
383
- ...,
384
- description="The name of the image embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
385
- example="google/siglip-so400m-patch14-384"
386
- )
387
- ):
388
- """
389
- Generate embedding vectors for uploaded image data (Development/Testing endpoint).
390
-
391
- This endpoint allows you to upload an image file directly through the Swagger UI
392
- for development and testing purposes. The image is processed and converted to
393
- embedding vectors using the specified model.
394
-
395
- Supported formats: JPEG, PNG, GIF, BMP, TIFF
396
-
397
- The service supports multiple model types including SigLIP, CLIP, and BLIP models.
398
- Returns a dense vector representation of the uploaded image.
399
-
400
- Returns:
401
- dict: The embedding vector as a list of float values.
402
- """
403
-
404
- model_name = model_name.rstrip("/")
405
- return await image_embedding_service.generate_embedding_from_upload(image, model_name)
406
-
407
-
408
- # =========================
409
- # Text Embedding Task
410
- # =========================
411
- @app.post(
412
- "/text-embedding/{model_name:path}",
413
- openapi_extra={
414
- "requestBody": {
415
- "content": {
416
- "application/json": {
417
- "example": {
418
- "inputs": "text to embed"
419
- }
420
- }
421
- }
422
- }
423
- }
424
- )
425
- async def text_embedding(
426
- request: Request,
427
- model_name: str = Path(
428
- ...,
429
- description="The name of the text embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
430
- example="google/siglip-so400m-patch14-384"
431
- )
432
- ):
433
- """
434
- Generate embedding vectors for text data.
435
-
436
- The service supports multiple model types including SigLIP, CLIP, and BLIP models.
437
- Returns a dense vector representation of the input text.
438
-
439
- Returns:
440
- list: The embedding vector as a list of float values.
441
- """
442
-
443
- model_name = model_name.rstrip("/")
444
- return await text_embedding_service.generate_embedding(request, model_name)
445
-
446
-
447
- # =========================
448
- # Embedding Vector Size
449
- # =========================
450
- @app.get(
451
- "/embedding-vector-size/{model_name:path}",
452
- openapi_extra={
453
- "responses": {
454
- "200": {
455
- "description": "Vector size information",
456
- "content": {
457
- "application/json": {
458
- "example": {
459
- "model_name": "google/siglip-so400m-patch14-384",
460
- "vector_size": 1152,
461
- "config_attribute_used": "hidden_size"
462
- }
463
- }
464
- }
465
- }
466
- }
467
- }
468
- )
469
- async def embedding_vector_size(
470
- model_name: str = Path(
471
- ...,
472
- description="The name of the embedding model. Supported models include: google/siglip-so400m-patch14-384, openai/clip-vit-large-patch14, openai/clip-vit-base-patch16, laion/CLIP-ViT-bigG-14-laion2B-39B-b160k, Salesforce/blip-itm-large-flickr",
473
- example="google/siglip-so400m-patch14-384"
474
- )
475
- ):
476
- """
477
- Get the vector size of embeddings for a given model.
478
-
479
- This endpoint returns the dimensionality of the embedding vectors that the model produces.
480
- Useful for understanding the output format before generating embeddings.
481
-
482
- Returns:
483
- dict: Information about the vector size including model name, vector size, and configuration attribute used.
484
- """
485
-
486
- model_name = model_name.rstrip("/")
487
- # We can use either embedding service as they inherit from the same base class
488
- return await image_embedding_service.get_embedding_vector_size(model_name)
 
10
 
11
  import torch
12
 
13
+ from fastapi import FastAPI, Path, Request
14
  import logging
15
  import sys
16
 
17
  from .translation_task import TranslationTaskService
18
  from .classification import ClassificationTaskService
19
  from .text_to_image import TextToImageTaskService
 
20
 
21
  app = FastAPI(
22
  title="Pimcore Local Inference Service",
 
28
  logger = logging.getLogger(__name__)
29
  logger.setLevel(logging.DEBUG)
30
 
 
 
 
 
31
 
32
  class StreamToLogger(object):
33
  def __init__(self, logger, log_level):
 
294
  model_name = model_name.rstrip("/")
295
  imageToTextTask = TextToImageTaskService(logger)
296
  return await imageToTextTask.extract(request, model_name)