matinsn2000's picture
Used better model for text embedding
d667f1f
import os
import numpy as np
from huggingface_hub import InferenceClient
from typing import List, Dict, Tuple
import re
from dotenv import load_dotenv
load_dotenv()
class ImageEmbeddingGenerator:
def __init__(self, model_name: str = "Qwen/Qwen3-Embedding-8B"):
"""
Initialize the embedding generator with a Hugging Face model.
"""
self.client = InferenceClient(
provider="nebius",
api_key=os.environ["HF_TOKEN_1"],
)
self.model_name = model_name
def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray:
"""
Generate a 4096-d embedding for an image using its tags, description, and caption.
Args:
tags: List of tags related to the image
description: Long descriptive text of the image
caption: Short caption for the image
Returns:
embedding: 1D numpy array of shape (4096,), normalized to unit length
"""
# Combine text fields into a single string
text = " ".join(tags) + " " + description + " " + caption
# Request embedding from Hugging Face
result = self.client.feature_extraction(
text,
model=self.model_name,
)
# Convert to numpy array
embedding = np.array(result, dtype=np.float32).reshape(-1)
# Ensure shape is (4096,)
if embedding.shape[0] != 4096:
raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
# Normalize to unit length (L2 normalization)
# This ensures distances stay consistent across models and dimensions
norm = np.linalg.norm(embedding)
if norm > 0:
embedding = embedding / norm
return embedding
def _embed_text(self, text: str) -> np.ndarray:
"""
Internal helper to call Hugging Face feature_extraction and return a numpy array.
Embeddings are normalized to unit length for consistent distance calculations.
"""
result = self.client.feature_extraction(
text,
model=self.model_name,
)
embedding = np.array(result, dtype=np.float32).reshape(-1)
if embedding.shape[0] != 4096:
raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
# Normalize to unit length (L2 normalization)
norm = np.linalg.norm(embedding)
if norm > 0:
embedding = embedding / norm
return embedding
class TextSummarizer:
def __init__(self, model_name: str = "facebook/bart-large-cnn"):
"""
Initialize the text summarizer with a Hugging Face model.
"""
self.client = InferenceClient(
provider="hf-inference",
api_key=os.environ["HF_TOKEN_1"],
)
self.model_name = model_name
def summarize(self, text: str) -> str:
"""
Generate a summary of the given text.
Args:
text: Text to summarize
Returns:
summary: Generated summary string
"""
if not text or text.strip() == "":
return "Album of photos"
try:
result = self.client.summarization(
text,
model=self.model_name,
)
# Extract the summary text from the result object
if isinstance(result, list) and len(result) > 0:
return result[0].get("summary_text", str(result[0]))
elif isinstance(result, dict):
return result.get("summary_text", str(result))
else:
return str(result)
except Exception as e:
# Fallback if summarization fails
return f"Collection: {text[:80]}..."
# Example usage:
if __name__ == "__main__":
generator = ImageEmbeddingGenerator()
tags = ["nature", "sun", "ice cream"]
description = "A sunny day in the park with children enjoying ice cream."
caption = "Sunny day with ice cream."
embedding = generator.generate_embedding(tags, description, caption)
print("Embedding shape:", embedding.shape)