Spaces:
Sleeping
Sleeping
| from llama_cpp import Llama | |
| # Path to the GGUF model file | |
| MODEL_PATH = "llama-3.1-8B.gguf" | |
| # Load the model | |
| print("Loading the model...") | |
| try: | |
| llama = Llama(model_path=MODEL_PATH, n_ctx=1024, n_threads=4) | |
| print("Model loaded successfully!") | |
| except Exception as e: | |
| print(f"Failed to load the model: {e}") | |
| exit(1) | |
| # Chat loop | |
| print("Chat with the model! Type 'exit' to end the conversation.") | |
| while True: | |
| user_input = input("You: ").strip() | |
| if user_input.lower() == "exit": | |
| print("Exiting chat. Goodbye!") | |
| break | |
| # Query the model | |
| print("Thinking...") | |
| response = llama( | |
| user_input, | |
| max_tokens=50, # Limit response length | |
| temperature=0.7, # Control randomness | |
| top_p=0.9, # Top-p sampling | |
| stop=["You:"] # Stop at the next user prompt | |
| ) | |
| # Extract and clean response text | |
| response_text = response['choices'][0]['text'].strip() | |
| print(f"Model: {response_text}") | |