Spaces:
Running
Running
| import os | |
| import numpy as np | |
| from huggingface_hub import InferenceClient | |
| from typing import List, Dict, Tuple | |
| import re | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| class ImageEmbeddingGenerator: | |
| def __init__(self, model_name: str = "Qwen/Qwen3-Embedding-8B"): | |
| """ | |
| Initialize the embedding generator with a Hugging Face model. | |
| """ | |
| self.client = InferenceClient( | |
| provider="nebius", | |
| api_key=os.environ["HF_TOKEN_1"], | |
| ) | |
| self.model_name = model_name | |
| def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray: | |
| """ | |
| Generate a 4096-d embedding for an image using its tags, description, and caption. | |
| Args: | |
| tags: List of tags related to the image | |
| description: Long descriptive text of the image | |
| caption: Short caption for the image | |
| Returns: | |
| embedding: 1D numpy array of shape (4096,), normalized to unit length | |
| """ | |
| # Combine text fields into a single string | |
| text = " ".join(tags) + " " + description + " " + caption | |
| # Request embedding from Hugging Face | |
| result = self.client.feature_extraction( | |
| text, | |
| model=self.model_name, | |
| ) | |
| # Convert to numpy array | |
| embedding = np.array(result, dtype=np.float32).reshape(-1) | |
| # Ensure shape is (4096,) | |
| if embedding.shape[0] != 4096: | |
| raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}") | |
| # Normalize to unit length (L2 normalization) | |
| # This ensures distances stay consistent across models and dimensions | |
| norm = np.linalg.norm(embedding) | |
| if norm > 0: | |
| embedding = embedding / norm | |
| return embedding | |
| def _embed_text(self, text: str) -> np.ndarray: | |
| """ | |
| Internal helper to call Hugging Face feature_extraction and return a numpy array. | |
| Embeddings are normalized to unit length for consistent distance calculations. | |
| """ | |
| result = self.client.feature_extraction( | |
| text, | |
| model=self.model_name, | |
| ) | |
| embedding = np.array(result, dtype=np.float32).reshape(-1) | |
| if embedding.shape[0] != 4096: | |
| raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}") | |
| # Normalize to unit length (L2 normalization) | |
| norm = np.linalg.norm(embedding) | |
| if norm > 0: | |
| embedding = embedding / norm | |
| return embedding | |
| class TextSummarizer: | |
| def __init__(self, model_name: str = "facebook/bart-large-cnn"): | |
| """ | |
| Initialize the text summarizer with a Hugging Face model. | |
| """ | |
| self.client = InferenceClient( | |
| provider="hf-inference", | |
| api_key=os.environ["HF_TOKEN_1"], | |
| ) | |
| self.model_name = model_name | |
| def summarize(self, text: str) -> str: | |
| """ | |
| Generate a summary of the given text. | |
| Args: | |
| text: Text to summarize | |
| Returns: | |
| summary: Generated summary string | |
| """ | |
| if not text or text.strip() == "": | |
| return "Album of photos" | |
| try: | |
| result = self.client.summarization( | |
| text, | |
| model=self.model_name, | |
| ) | |
| # Extract the summary text from the result object | |
| if isinstance(result, list) and len(result) > 0: | |
| return result[0].get("summary_text", str(result[0])) | |
| elif isinstance(result, dict): | |
| return result.get("summary_text", str(result)) | |
| else: | |
| return str(result) | |
| except Exception as e: | |
| # Fallback if summarization fails | |
| return f"Collection: {text[:80]}..." | |
| # Example usage: | |
| if __name__ == "__main__": | |
| generator = ImageEmbeddingGenerator() | |
| tags = ["nature", "sun", "ice cream"] | |
| description = "A sunny day in the park with children enjoying ice cream." | |
| caption = "Sunny day with ice cream." | |
| embedding = generator.generate_embedding(tags, description, caption) | |
| print("Embedding shape:", embedding.shape) | |