Spaces:
Sleeping
Sleeping
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| def load_model() -> Llama: | |
| """Downlaod model from Huggingface Hub and load it.""" | |
| try: | |
| model = Llama( | |
| model_path=hf_hub_download( | |
| repo_id="microsoft/Phi-3-mini-4k-instruct-gguf", | |
| filename="Phi-3-mini-4k-instruct-q4.gguf", | |
| ), | |
| n_ctx=4096, | |
| n_threads=8, | |
| n_gpu_layers=0, | |
| stop=["\n", " Q:"], | |
| ) | |
| return model | |
| except Exception as e: | |
| raise Exception(f"Failed to load model: {e}") | |