Spaces:

junaidbaber
/

demo_lowcode_llm

Sleeping

App Files Files Community

demo_lowcode_llm / app.py

junaidbaber

Update app.py

7ec6449 verified 10 months ago

raw

history blame

2.45 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Hugging Face repository details
	# MODEL_ID = "meta-llama/CodeLlama-7b-Instruct-hf"
	MODEL_ID = "meta-llama/Llama-3.1-8B"

	from huggingface_hub import login
	login()
	def load_model():
	"""Load the Hugging Face model and tokenizer."""
	try:
	st.write("Loading model and tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID, device_map="auto", torch_dtype=torch.float16
	)
	st.write("Model and tokenizer successfully loaded.")
	return tokenizer, model
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return None, None

	# Load the model and tokenizer
	@st.cache_resource
	def get_model():
	return load_model()

	tokenizer, model = get_model()

	# Streamlit UI
	st.title("LowCode Chatbot")
	st.write("This chatbot provides interaction with LLM. Type your question below!")

	if model is None or tokenizer is None:
	st.error("Model failed to load. Please check the Hugging Face model path or environment configuration.")
	else:
	user_input = st.text_input("You:", placeholder="Enter your medical question here...", key="input_box")

	if st.button("Send"):
	if user_input.strip():
	# Construct the prompt
	SYSTEM_PROMPT = "You are a helpful assistant. Provide accurate and concise answers."
	full_prompt = f"{SYSTEM_PROMPT}\nUser: {user_input}\nAssistant:"

	# Tokenize the input
	inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True).to("cuda")

	try:
	# Generate the response
	outputs = model.generate(
	inputs["input_ids"],
	max_length=200, # Limit response length
	temperature=0.7, # Control randomness
	top_p=0.9, # Top-p sampling
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode and display the response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
	st.write(f"Model: {response}")

	except Exception as e:
	st.error(f"Error generating response: {e}")
	else:
	st.warning("Please enter a valid question.")