Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import os | |
| def initialize_model(): | |
| """Initialize a small and fast model for CPU""" | |
| # Using a tiny model optimized for CPU | |
| model_id = "facebook/opt-125m" # Much smaller model (125M parameters) | |
| model_id ="GEB-AGI/geb-1.3b" | |
| try: | |
| # Initialize the pipeline directly - more efficient than loading model separately | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model_id, | |
| device_map="cpu", | |
| model_kwargs={"low_cpu_mem_usage": True} | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| return pipe, tokenizer | |
| except Exception as e: | |
| print(f"Error loading model: {str(e)}") | |
| raise e | |
| def generate_response(pipe, tokenizer, prompt, conversation_history): | |
| """Generate model response""" | |
| try: | |
| # Format conversation context | |
| context = "" | |
| for turn in conversation_history[-3:]: # Only use last 3 turns for efficiency | |
| context += f"Human: {turn['user']}\nAssistant: {turn['assistant']}\n" | |
| # Create the full prompt | |
| full_prompt = f"{context}Human: {prompt}\nAssistant:" | |
| # Generate response with conservative parameters | |
| response = pipe( | |
| full_prompt, | |
| max_new_tokens=50, # Limit response length | |
| temperature=0.7, | |
| top_p=0.9, | |
| num_return_sequences=1, | |
| do_sample=True, | |
| pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id | |
| )[0]['generated_text'] | |
| # Extract only the assistant's response | |
| try: | |
| assistant_response = response.split("Assistant:")[-1].strip() | |
| if not assistant_response: | |
| return "I apologize, but I couldn't generate a proper response." | |
| return assistant_response | |
| except: | |
| return response.split(prompt)[-1].strip() | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| def main(): | |
| st.set_page_config(page_title="LLM Chat Interface", page_icon="π€") | |
| st.title("π¬ Quick Chat Assistant") | |
| # Initialize session state | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| if "model_loaded" not in st.session_state: | |
| st.session_state.model_loaded = False | |
| # Initialize model (only once) | |
| if not st.session_state.model_loaded: | |
| with st.spinner("Loading the model... (this should take just a few seconds)"): | |
| try: | |
| pipe, tokenizer = initialize_model() | |
| st.session_state.pipe = pipe | |
| st.session_state.tokenizer = tokenizer | |
| st.session_state.model_loaded = True | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| return | |
| # Display chat messages | |
| for message in st.session_state.chat_history: | |
| with st.chat_message("user"): | |
| st.write(message["user"]) | |
| with st.chat_message("assistant"): | |
| st.write(message["assistant"]) | |
| # Chat input | |
| if prompt := st.chat_input("Ask me anything!"): | |
| # Display user message | |
| with st.chat_message("user"): | |
| st.write(prompt) | |
| # Generate and display assistant response | |
| with st.chat_message("assistant"): | |
| with st.spinner("Thinking..."): | |
| current_turn = {"user": prompt, "assistant": ""} | |
| st.session_state.chat_history.append(current_turn) | |
| response = generate_response( | |
| st.session_state.pipe, | |
| st.session_state.tokenizer, | |
| prompt, | |
| st.session_state.chat_history | |
| ) | |
| st.write(response) | |
| st.session_state.chat_history[-1]["assistant"] = response | |
| # Keep only last 5 turns | |
| if len(st.session_state.chat_history) > 5: | |
| st.session_state.chat_history = st.session_state.chat_history[-5:] | |
| # Sidebar | |
| with st.sidebar: | |
| if st.button("Clear Chat"): | |
| st.session_state.chat_history = [] | |
| st.rerun() | |
| st.markdown("---") | |
| st.markdown(""" | |
| ### Chat Info | |
| - Using OPT-125M model | |
| - Optimized for quick responses | |
| - Best for short conversations | |
| """) | |
| if __name__ == "__main__": | |
| main() |