Spaces:
Runtime error
Runtime error
| import transformers | |
| import torch | |
| # Specify the model you want to use | |
| model_id = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct" | |
| # Set up the text-generation pipeline | |
| pipeline = transformers.pipeline( | |
| "text-generation", # You are using the text generation pipeline | |
| model=model_id, | |
| model_kwargs={"torch_dtype": torch.bfloat16}, # Specifying the torch dtype | |
| device_map="auto", # This will use available hardware (GPU or CPU) | |
| ) | |
| # Define the conversation/messages you want the model to handle | |
| messages = [ | |
| {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, | |
| {"role": "user", "content": "Who are you?"} | |
| ] | |
| # Use the pipeline to generate a response | |
| outputs = pipeline( | |
| messages[1]["content"], # Use only the user message here | |
| max_new_tokens=256, # Limit the number of tokens generated | |
| ) | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model_id, | |
| model_kwargs={"torch_dtype": torch.bfloat16}, | |
| device=-1, # Use CPU (avoid device_map) | |
| ) | |
| # Print the generated text from the output | |
| print(outputs[0]["generated_text"]) | |