import os
import numpy as np
from huggingface_hub import InferenceClient
from typing import List, Dict, Tuple
import re

from dotenv import load_dotenv
load_dotenv()


class ImageEmbeddingGenerator:
    def __init__(self, model_name: str = "Qwen/Qwen3-Embedding-8B"):
        """
        Initialize the embedding generator with a Hugging Face model.
        """
        self.client = InferenceClient(
            provider="nebius",
            api_key=os.environ["HF_TOKEN_1"],
        )
        self.model_name = model_name

    def generate_embedding(self, tags: list[str], description: str, caption: str) -> np.ndarray:
        """
        Generate a 4096-d embedding for an image using its tags, description, and caption.

        Args:
            tags: List of tags related to the image
            description: Long descriptive text of the image
            caption: Short caption for the image

        Returns:
            embedding: 1D numpy array of shape (4096,), normalized to unit length
        """
        # Combine text fields into a single string
        text = " ".join(tags) + " " + description + " " + caption
        
        # Request embedding from Hugging Face
        result = self.client.feature_extraction(
            text,
            model=self.model_name,
        )
        
        # Convert to numpy array
        embedding = np.array(result, dtype=np.float32).reshape(-1)
        
        # Ensure shape is (4096,)
        if embedding.shape[0] != 4096:
            raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
        
        # Normalize to unit length (L2 normalization)
        # This ensures distances stay consistent across models and dimensions
        norm = np.linalg.norm(embedding)
        if norm > 0:
            embedding = embedding / norm
        
        return embedding
    

    def _embed_text(self, text: str) -> np.ndarray:
        """
        Internal helper to call Hugging Face feature_extraction and return a numpy array.
        Embeddings are normalized to unit length for consistent distance calculations.
        """
        result = self.client.feature_extraction(
            text,
            model=self.model_name,
        )
        embedding = np.array(result, dtype=np.float32).reshape(-1)

        if embedding.shape[0] != 4096:
            raise ValueError(f"Expected embedding of size 4096, got {embedding.shape[0]}")
        
        # Normalize to unit length (L2 normalization)
        norm = np.linalg.norm(embedding)
        if norm > 0:
            embedding = embedding / norm
        
        return embedding


class TextSummarizer:
    def __init__(self, model_name: str = "facebook/bart-large-cnn"):
        """
        Initialize the text summarizer with a Hugging Face model.
        """
        self.client = InferenceClient(
            provider="hf-inference",
            api_key=os.environ["HF_TOKEN_1"],
        )
        self.model_name = model_name

    def summarize(self, text: str) -> str:
        """
        Generate a summary of the given text.
        
        Args:
            text: Text to summarize
            
        Returns:
            summary: Generated summary string
        """
        if not text or text.strip() == "":
            return "Album of photos"
        
        try:
            result = self.client.summarization(
                text,
                model=self.model_name,
            )
            # Extract the summary text from the result object
            if isinstance(result, list) and len(result) > 0:
                return result[0].get("summary_text", str(result[0]))
            elif isinstance(result, dict):
                return result.get("summary_text", str(result))
            else:
                return str(result)
        except Exception as e:
            # Fallback if summarization fails
            return f"Collection: {text[:80]}..."

# Example usage:
if __name__ == "__main__":
    generator = ImageEmbeddingGenerator()
    
    tags = ["nature", "sun", "ice cream"]
    description = "A sunny day in the park with children enjoying ice cream."
    caption = "Sunny day with ice cream."
    
    embedding = generator.generate_embedding(tags, description, caption)
    print("Embedding shape:", embedding.shape)