testing / app.py
Ramaravind's picture
Update app.py
ff2617c verified
raw
history blame
375 Bytes
import os
from huggingface_hub import InferenceClient
client = InferenceClient(
provider="auto",
api_key=os.environ["HF_API_TOKEN"],
)
stream = client.chat.completions.create(
model="meta-llama/Llama-3.2-1B-Instruct",
messages=[],
temperature=0.5,
top_p=0.7,
stream=True,
)
for chunk in stream:
print(chunk.choices[0].delta.content, end="")