Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM | |
| from huggingface_hub import HfApi, login | |
| class MultimodalAI: | |
| def __init__(self): | |
| # Obtain Hugging Face token in .env file | |
| self.HUGGINGFACE_TOKEN = os.environ["HUGGINGFACE_TOKEN"] | |
| # Check if the token is retrieved successfully | |
| if self.HUGGINGFACE_TOKEN is None: | |
| raise ValueError("HUGGINGFACE_TOKEN environment variable is not set.") | |
| # Authenticate with Hugging Face | |
| self.api = HfApi() | |
| login(token=self.HUGGINGFACE_TOKEN) | |
| # Model selection | |
| self.model_name = "Lin-Chen/sharegpt4video-8b" | |
| # Check if a CUDA-enabled GPU is available. | |
| # If available, move the model to the GPU (cuda:0) for faster computation. | |
| # Otherwise, move the model to the CPU. | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Load the model and tokenizer | |
| self._load_model_and_tokenizer() | |
| def _load_model_and_tokenizer(self): | |
| # Load LLama model and tokenizer | |
| self.model = AutoModelForCausalLM.from_pretrained(self.model_name, | |
| token=self.HUGGINGFACE_TOKEN).to(self.device) | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, | |
| token=self.HUGGINGFACE_TOKEN) | |
| def generate_response(self, text_input, max_new_tokens=50): | |
| # Tokenize input text | |
| inputs = self.tokenizer(text_input, return_tensors="pt").to(self.device) | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = self.model.generate(**inputs, max_new_tokens=max_new_tokens, pad_token_id=self.tokenizer.pad_token_id) | |
| # Decode and return the response | |
| response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response_text | |