Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ from sentence_transformers import SentenceTransformer
|
|
| 23 |
import faiss
|
| 24 |
import numpy as np
|
| 25 |
from PIL import Image
|
| 26 |
-
|
| 27 |
|
| 28 |
|
| 29 |
|
|
@@ -213,29 +213,19 @@ class ModelManager:
|
|
| 213 |
}
|
| 214 |
|
| 215 |
def load_model(self, model_type: str):
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
|
| 221 |
-
|
| 222 |
-
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
config["model_id"],
|
| 230 |
-
**config["kwargs"]
|
| 231 |
-
)
|
| 232 |
-
model = config["model"].from_pretrained(
|
| 233 |
-
config["model_id"],
|
| 234 |
-
**config["kwargs"]
|
| 235 |
-
)
|
| 236 |
-
self.loaded_models[model_type] = (model, tokenizer)
|
| 237 |
-
|
| 238 |
-
elif model_type == "image_processor":
|
| 239 |
processor = config["processor"].from_pretrained(
|
| 240 |
config["model_id"],
|
| 241 |
**config["kwargs"]
|
|
@@ -244,14 +234,14 @@ class ModelManager:
|
|
| 244 |
config["model_id"],
|
| 245 |
**config["kwargs"]
|
| 246 |
)
|
|
|
|
|
|
|
| 247 |
self.loaded_models[model_type] = (model, processor)
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
raise ModelError(f"Error loading {model_type} model: {str(e)}")
|
| 254 |
-
|
| 255 |
def unload_model(self, model_type: str):
|
| 256 |
"""Unload a model to free memory"""
|
| 257 |
if model_type in self.loaded_models:
|
|
@@ -309,21 +299,27 @@ class MultimodalRAG:
|
|
| 309 |
logger.error(f"Error saving template embeddings: {e}")
|
| 310 |
|
| 311 |
def encode_image(self, image: Image.Image) -> np.ndarray:
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
|
|
|
| 325 |
def encode_text(self, text: str) -> np.ndarray:
|
| 326 |
"""Encode text using sentence-transformers"""
|
|
|
|
| 327 |
try:
|
| 328 |
return self.text_encoder.encode(text)
|
| 329 |
except Exception as e:
|
|
|
|
| 23 |
import faiss
|
| 24 |
import numpy as np
|
| 25 |
from PIL import Image
|
| 26 |
+
from transformers import BlipForConditionalGeneration
|
| 27 |
|
| 28 |
|
| 29 |
|
|
|
|
| 213 |
}
|
| 214 |
|
| 215 |
def load_model(self, model_type: str):
|
| 216 |
+
"""Load a model by type"""
|
| 217 |
+
try:
|
| 218 |
+
if model_type not in self.model_configs:
|
| 219 |
+
raise ModelError(f"Unknown model type: {model_type}")
|
| 220 |
|
| 221 |
+
if model_type in self.loaded_models:
|
| 222 |
+
return self.loaded_models[model_type]
|
| 223 |
|
| 224 |
+
config = self.model_configs[model_type]
|
| 225 |
+
logger.info(f"Loading {model_type} model...")
|
| 226 |
|
| 227 |
+
if model_type == "image_processor":
|
| 228 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
processor = config["processor"].from_pretrained(
|
| 230 |
config["model_id"],
|
| 231 |
**config["kwargs"]
|
|
|
|
| 234 |
config["model_id"],
|
| 235 |
**config["kwargs"]
|
| 236 |
)
|
| 237 |
+
if torch.cuda.is_available():
|
| 238 |
+
model = model.to("cuda")
|
| 239 |
self.loaded_models[model_type] = (model, processor)
|
| 240 |
+
logger.info(f"{model_type} model loaded successfully")
|
| 241 |
+
|
| 242 |
+
except Exception as e:
|
| 243 |
+
logger.error(f"Error loading {model_type} model: {e}")
|
| 244 |
+
raise ModelError(f"Failed to load {model_type} model: {e}")
|
|
|
|
|
|
|
| 245 |
def unload_model(self, model_type: str):
|
| 246 |
"""Unload a model to free memory"""
|
| 247 |
if model_type in self.loaded_models:
|
|
|
|
| 299 |
logger.error(f"Error saving template embeddings: {e}")
|
| 300 |
|
| 301 |
def encode_image(self, image: Image.Image) -> np.ndarray:
|
| 302 |
+
"""Encode image using BLIP"""
|
| 303 |
+
try:
|
| 304 |
+
model, processor = self.model_manager.load_model("image_processor")
|
| 305 |
+
|
| 306 |
+
# Process image
|
| 307 |
+
inputs = processor(images=image, return_tensors="pt").to(model.device)
|
| 308 |
+
|
| 309 |
+
# Get image features using the proper method
|
| 310 |
+
with torch.no_grad():
|
| 311 |
+
outputs = model.get_image_features(**inputs)
|
| 312 |
+
image_features = outputs.last_hidden_state.mean(dim=1) # Average pooling
|
| 313 |
+
|
| 314 |
+
return image_features.cpu().numpy()
|
| 315 |
|
| 316 |
+
except Exception as e:
|
| 317 |
+
logger.error(f"Error encoding image: {str(e)}")
|
| 318 |
+
raise ModelError(f"Error encoding image: {str(e)}")
|
| 319 |
+
|
| 320 |
def encode_text(self, text: str) -> np.ndarray:
|
| 321 |
"""Encode text using sentence-transformers"""
|
| 322 |
+
|
| 323 |
try:
|
| 324 |
return self.text_encoder.encode(text)
|
| 325 |
except Exception as e:
|