Spaces:

userx2000
/

cloudzy_ai_challenge

Running

App Files Files Community

cloudzy_ai_challenge / cloudzy /ai_utils.py

matinsn2000

Used better model for text embedding

d667f1f 3 days ago

raw

history blame contribute delete

4.31 kB

	import os
	import numpy as np
	from huggingface_hub import InferenceClient
	from typing import List, Dict, Tuple
	import re

	from dotenv import load_dotenv
	load_dotenv()



	class ImageEmbeddingGenerator:
	def __init__(self, model_name: str = "Qwen/Qwen3-Embedding-8B"):
	"""
	Initialize the embedding generator with a Hugging Face model.
	"""
	self.client = InferenceClient(
	provider="nebius",
	api_key=os.environ["HF_TOKEN_1"],
	)
	self.model_name = model_name

	def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray:
	"""
	Generate a 4096-d embedding for an image using its tags, description, and caption.

	Args:
	tags: List of tags related to the image
	description: Long descriptive text of the image
	caption: Short caption for the image

	Returns:
	embedding: 1D numpy array of shape (4096,), normalized to unit length
	"""
	# Combine text fields into a single string
	text = " ".join(tags) + " " + description + " " + caption

	# Request embedding from Hugging Face
	result = self.client.feature_extraction(
	text,
	model=self.model_name,
	)

	# Convert to numpy array
	embedding = np.array(result, dtype=np.float32).reshape(-1)

	# Ensure shape is (4096,)
	if embedding.shape[0] != 4096:
	raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")

	# Normalize to unit length (L2 normalization)
	# This ensures distances stay consistent across models and dimensions
	norm = np.linalg.norm(embedding)
	if norm > 0:
	embedding = embedding / norm

	return embedding


	def _embed_text(self, text: str) -> np.ndarray:
	"""
	Internal helper to call Hugging Face feature_extraction and return a numpy array.
	Embeddings are normalized to unit length for consistent distance calculations.
	"""
	result = self.client.feature_extraction(
	text,
	model=self.model_name,
	)
	embedding = np.array(result, dtype=np.float32).reshape(-1)

	if embedding.shape[0] != 4096:
	raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")

	# Normalize to unit length (L2 normalization)
	norm = np.linalg.norm(embedding)
	if norm > 0:
	embedding = embedding / norm

	return embedding




	class TextSummarizer:
	def __init__(self, model_name: str = "facebook/bart-large-cnn"):
	"""
	Initialize the text summarizer with a Hugging Face model.
	"""
	self.client = InferenceClient(
	provider="hf-inference",
	api_key=os.environ["HF_TOKEN_1"],
	)
	self.model_name = model_name

	def summarize(self, text: str) -> str:
	"""
	Generate a summary of the given text.

	Args:
	text: Text to summarize

	Returns:
	summary: Generated summary string
	"""
	if not text or text.strip() == "":
	return "Album of photos"

	try:
	result = self.client.summarization(
	text,
	model=self.model_name,
	)
	# Extract the summary text from the result object
	if isinstance(result, list) and len(result) > 0:
	return result[0].get("summary_text", str(result[0]))
	elif isinstance(result, dict):
	return result.get("summary_text", str(result))
	else:
	return str(result)
	except Exception as e:
	# Fallback if summarization fails
	return f"Collection: {text[:80]}..."

	# Example usage:
	if __name__ == "__main__":
	generator = ImageEmbeddingGenerator()

	tags = ["nature", "sun", "ice cream"]
	description = "A sunny day in the park with children enjoying ice cream."
	caption = "Sunny day with ice cream."

	embedding = generator.generate_embedding(tags, description, caption)
	print("Embedding shape:", embedding.shape)