Spaces:

junaidbaber
/

demo_lowcode_llm

Sleeping

App Files Files Community

junaidbaber commited on Jan 15

Commit

7077c22

1 Parent(s): 0c6a823

Uploading the demo

Browse files

Files changed (4) hide show

README.md +4 -4
app.py +64 -0
requirements.txt +4 -0
run_model.py +36 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Demo Lowcode Llm
-emoji: 🦀
-colorFrom: red
-colorTo: blue
 sdk: streamlit
 sdk_version: 1.41.1
 app_file: app.py

 ---
+title: Llm Medical
+emoji: 🏆
+colorFrom: gray
+colorTo: green
 sdk: streamlit
 sdk_version: 1.41.1
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Hugging Face repository details
+MODEL_ID = "meta-llama/CodeLlama-7b-Instruct-hf"
+def load_model():
+    """Load the Hugging Face model and tokenizer."""
+    try:
+        st.write("Loading model and tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID, device_map="auto", torch_dtype=torch.float16
+        )
+        st.write("Model and tokenizer successfully loaded.")
+        return tokenizer, model
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return None, None
+# Load the model and tokenizer
+@st.cache_resource
+def get_model():
+    return load_model()
+tokenizer, model = get_model()
+# Streamlit UI
+st.title("Medical Chatbot")
+st.write("This chatbot provides medical assistance. Type your question below!")
+if model is None or tokenizer is None:
+    st.error("Model failed to load. Please check the Hugging Face model path or environment configuration.")
+else:
+    user_input = st.text_input("You:", placeholder="Enter your medical question here...", key="input_box")
+    if st.button("Send"):
+        if user_input.strip():
+            # Construct the prompt
+            SYSTEM_PROMPT = "You are a helpful medical assistant. Provide accurate and concise answers."
+            full_prompt = f"{SYSTEM_PROMPT}\nUser: {user_input}\nAssistant:"
+            # Tokenize the input
+            inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True).to("cuda")
+            try:
+                # Generate the response
+                outputs = model.generate(
+                    inputs["input_ids"],
+                    max_length=200,  # Limit response length
+                    temperature=0.7,  # Control randomness
+                    top_p=0.9,  # Top-p sampling
+                    pad_token_id=tokenizer.eos_token_id
+                )
+                # Decode and display the response
+                response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
+                st.write(f"**Model:** {response}")
+            except Exception as e:
+                st.error(f"Error generating response: {e}")
+        else:
+            st.warning("Please enter a valid question.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+llama-cpp-python
+huggingface-hub

run_model.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from llama_cpp import Llama
+# Path to the GGUF model file
+MODEL_PATH = "llama-3.1-8B.gguf"
+# Load the model
+print("Loading the model...")
+try:
+    llama = Llama(model_path=MODEL_PATH, n_ctx=1024, n_threads=4)
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Failed to load the model: {e}")
+    exit(1)
+# Chat loop
+print("Chat with the model! Type 'exit' to end the conversation.")
+while True:
+    user_input = input("You: ").strip()
+    if user_input.lower() == "exit":
+        print("Exiting chat. Goodbye!")
+        break
+    # Query the model
+    print("Thinking...")
+    response = llama(
+        user_input,
+        max_tokens=50,       # Limit response length
+        temperature=0.7,     # Control randomness
+        top_p=0.9,           # Top-p sampling
+        stop=["You:"]        # Stop at the next user prompt
+    )
+    # Extract and clean response text
+    response_text = response['choices'][0]['text'].strip()
+    print(f"Model: {response_text}")