Spaces:

junaidbaber
/

demo_lowcode_llm

Sleeping

App Files Files Community

junaidbaber commited on Jan 15

Commit

ef628bc

verified ·

1 Parent(s): df0aaaf

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -49

app.py CHANGED Viewed

@@ -1,69 +1,63 @@
-import streamlit as st
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-import os
-# Hugging Face repository details
-# MODEL_ID = "meta-llama/CodeLlama-7b-Instruct-hf"
-MODEL_ID = "meta-llama/Llama-3.1-8B"
 from huggingface_hub import login
 token = os.environ.get("hf")
 login(token)
-def load_model():
-    """Load the Hugging Face model and tokenizer."""
     try:
-        st.write("Loading model and tokenizer...")
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID, device_map="auto", torch_dtype=torch.float16
         )
-        st.write("Model and tokenizer successfully loaded.")
-        return tokenizer, model
     except Exception as e:
-        st.error(f"Error loading model: {e}")
-        return None, None
-# Load the model and tokenizer
-@st.cache_resource
-def get_model():
-    return load_model()
-tokenizer, model = get_model()
 # Streamlit UI
-st.title("LowCode Chatbot")
-st.write("This chatbot provides interaction with LLM. Type your question below!")
-if model is None or tokenizer is None:
-    st.error("Model failed to load. Please check the Hugging Face model path or environment configuration.")
 else:
-    user_input = st.text_input("You:", placeholder="Enter your medical question here...", key="input_box")
     if st.button("Send"):
         if user_input.strip():
-            # Construct the prompt
-            SYSTEM_PROMPT = "You are a helpful assistant. Provide accurate and concise answers."
-            full_prompt = f"{SYSTEM_PROMPT}\nUser: {user_input}\nAssistant:"
-            # Tokenize the input
-            inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True).to("cuda")
             try:
-                # Generate the response
-                outputs = model.generate(
-                    inputs["input_ids"],
-                    max_length=200,  # Limit response length
-                    temperature=0.7,  # Control randomness
-                    top_p=0.9,  # Top-p sampling
-                    pad_token_id=tokenizer.eos_token_id
                 )
-                # Decode and display the response
-                response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
-                st.write(f"**Model:** {response}")
             except Exception as e:
                 st.error(f"Error generating response: {e}")
         else:
-            st.warning("Please enter a valid question.")

 from huggingface_hub import login
+import os
 token = os.environ.get("hf")
 login(token)
+import streamlit as st
+from transformers import pipeline
+import torch
+# Model ID
+MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+@st.cache_resource
+def load_pipeline():
     try:
+        st.write("Loading the instruct pipeline...")
+        instruct_pipeline = pipeline(
+            "text-generation",
+            model=MODEL_ID,
+            model_kwargs={"torch_dtype": torch.bfloat16},
+            device_map="auto",
         )
+        st.write("Pipeline successfully loaded.")
+        return instruct_pipeline
     except Exception as e:
+        st.error(f"Error loading pipeline: {e}")
+        return None
+# Load the pipeline
+instruct_pipeline = load_pipeline()
 # Streamlit UI
+st.title("Instruction Chatbot")
+st.write("Chat with the instruction-tuned model!")
+if instruct_pipeline is None:
+    st.error("Pipeline failed to load. Please check the configuration.")
 else:
+    # Message-based interaction
+    system_message = st.text_area("System Message", value="You are a helpful assistant.", height=100)
+    user_input = st.text_input("User:", placeholder="Ask a question or provide an instruction...")
     if st.button("Send"):
         if user_input.strip():
             try:
+                messages = [
+                    {"role": "system", "content": system_message},
+                    {"role": "user", "content": user_input},
+                ]
+                # Generate response
+                outputs = instruct_pipeline(
+                    messages,
+                    max_new_tokens=150,  # Limit response length
                 )
+                # Display the generated response
+                response = outputs[0]["generated_text"]
+                st.write(f"**Assistant:** {response}")
             except Exception as e:
                 st.error(f"Error generating response: {e}")
         else:
+            st.warning("Please enter a valid message.")