SmolLM2-chatbot

Sleeping

SmolLM2-chatbot / app.py

Update app.py to start with 135M model

d163488 verified 5 months ago

1.83 kB

	import gradio as gr


	from transformers import pipeline
	import torch

	MAX_NEW_TOKENS = 250

	MODEL="HuggingFaceTB/SmolLM2-135M-Instruct"
	# MODEL="HuggingFaceTB/SmolLM2-360M-Instruct"
	# MODEL="HuggingFaceTB/SmolLM2-1.7B-Instruct"
	TEMPERATURE = 0.6
	TOP_P = 0.95
	REPETITION_PENALTY = 1.2



	pipe = pipeline("text-generation", model=MODEL)


	def message_fx(message, history):
	if len(history) == 0:
	send_to_api = [{'role':'user', 'content':message}]
	print(send_to_api)
	with torch.no_grad():
	response = pipe(send_to_api,
	do_sample=True,
	max_new_tokens=MAX_NEW_TOKENS,
	temperature=TEMPERATURE, # 1.0 = lots of creativity, high odd of hallucination 0.1 very specific writing and low odds
	# top_k=50,
	top_p=TOP_P,
	repetition_penalty=REPETITION_PENALTY, # Added to discourage repetition
	# no_repeat_ngram_size=3
	)[0]['generated_text'][1]['content']
	return response

	else:
	send_to_api = history + [{'role':'user', 'content':message}]
	print(send_to_api)
	with torch.no_grad():
	response = pipe(send_to_api,
	do_sample=True,
	max_new_tokens=MAX_NEW_TOKENS,
	temperature=TEMPERATURE, # 1.0 = lots of creativity, high odd of hallucination 0.1 very specific writing and low odds
	# top_k=50,
	top_p=TOP_P,
	repetition_penalty=REPETITION_PENALTY, # Added to discourage repetition
	# no_repeat_ngram_size=3
	)[0]['generated_text'][-1]['content']
	return response


	gr.ChatInterface(
	fn=message_fx,
	type="messages"
	).launch()