Spaces:

nagasurendra
/

bot

Runtime error

bot / app.py

Update app.py

eedee66 verified 7 months ago

1.12 kB

	import transformers
	import torch

	# Specify the model you want to use
	model_id = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"

	# Set up the text-generation pipeline
	pipeline = transformers.pipeline(
	"text-generation", # You are using the text generation pipeline
	model=model_id,
	model_kwargs={"torch_dtype": torch.bfloat16}, # Specifying the torch dtype
	device_map="auto", # This will use available hardware (GPU or CPU)
	)

	# Define the conversation/messages you want the model to handle
	messages = [
	{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
	{"role": "user", "content": "Who are you?"}
	]

	# Use the pipeline to generate a response
	outputs = pipeline(
	messages[1]["content"], # Use only the user message here
	max_new_tokens=256, # Limit the number of tokens generated
	)
	pipeline = transformers.pipeline(
	"text-generation",
	model=model_id,
	model_kwargs={"torch_dtype": torch.bfloat16},
	device=-1, # Use CPU (avoid device_map)
	)


	# Print the generated text from the output
	print(outputs[0]["generated_text"])