Spaces:
Runtime error
Runtime error
| from typing import Dict, List | |
| from llama_cpp import Llama | |
| llama_args={"n_gpu_layers":100,"main_gpu":0,"verbose":False} | |
| class Model: | |
| def __init__(self): | |
| pass | |
| def __call__(self, msg:str, stop:List[str], max_tokens:int): | |
| raise NotImplementedError | |
| def conv(self, msgs:List[Dict[str, str]])->str: | |
| raise NotImplementedError | |
| def starttok(self, user:str)->str: | |
| raise NotImplementedError | |
| def start(self)->str: | |
| return "" | |
| def close(self): | |
| pass | |
| class Phi35RPMax(Model): | |
| def __init__(self): | |
| self.llm = Llama.from_pretrained( | |
| repo_id="ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-GGUF", | |
| filename="ArliAI-RPMax-3.8B-v1.1-fp16.gguf", | |
| **llama_args, | |
| ) | |
| def __call__(self, msg:str, stop:List[str], max_tokens:int): | |
| print("Autocomplete: ",msg) | |
| ret=self.llm(msg, stop=stop, max_tokens=max_tokens) | |
| return ret | |
| def conv(self,msgs:List[Dict[str, str]]): | |
| return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs]) | |
| def starttok(self,user:str): | |
| return f"<|{user}|>\n" | |
| def close(self): | |
| self.llm.close() | |
| Phi35RPMax.modelname="Phi35RPMax-fp16" | |
| class Phi35(Model): | |
| def __init__(self): | |
| self.llm = Llama.from_pretrained( | |
| repo_id="bartowski/Phi-3.5-mini-instruct-GGUF", | |
| filename="Phi-3.5-mini-instruct-f32.gguf", | |
| **llama_args, | |
| ) | |
| def __call__(self, msg:str, stop:List[str], max_tokens:int): | |
| print("Autocomplete: ",msg) | |
| return self.llm(msg, stop=stop, max_tokens=max_tokens) | |
| def conv(self,msgs:List[Dict[str, str]]): | |
| return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs]) | |
| def starttok(self,user:str): | |
| return f"<|{user}|>\n" | |
| def close(self): | |
| self.llm.close() | |
| Phi35.modelname="Phi35-IQ3_XS" | |
| # TODO: Gemma2 needs license maybe try it in the future but dont think it is worth it | |
| # class Gemma2(Model): | |
| # def __init__(self): | |
| # self.llm = Llama.from_pretrained( | |
| # repo_id="google/gemma-2-2b-it-GGUF", | |
| # filename="2b_it_v2.gguf", | |
| # ) | |
| # def __call__(self, msg:str, stop:List[str], max_tokens:int): | |
| # return self.llm(msg, stop=stop, max_tokens=max_tokens) | |
| # def conv(self,msgs:List[Dict[str, str]]):#https://ai.google.dev/gemma/docs/formatting?hl=de | |
| # return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs]) | |
| # def formatmessage(self,msg:str, role:str):#https://ai.google.dev/gemma/docs/formatting?hl=de | |
| # if(role=="system"): | |
| # # Gemma2 does not support system messages / isnt trained for them | |
| # # TODO: Make them Assistant messages and test if this improves the results | |
| # return "" | |
| # if role=="assistant": | |
| # role="model" | |
| # return f"<start_of_turn>{role}\n{msg}<end_of_turn>" | |
| # def starttok(self,user:str): | |
| # return f"<start_of_turn>{user}\n" | |
| # def close(self): | |
| # self.llm.close() | |
| # Gemma2.modelname="Gemma2-2b-it-GGUF" | |
| class Llama31uncensored(Model): | |
| def __init__(self): | |
| self.llm = Llama.from_pretrained( | |
| repo_id="Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF", | |
| filename="Llama-3.1-8B-Lexi-Uncensored_V2_F16.gguf", | |
| **llama_args, | |
| ) | |
| def __call__(self, msg:str, stop:List[str], max_tokens:int): | |
| print("Autocomplete: ",msg) | |
| return self.llm(msg, stop=stop, max_tokens=max_tokens) | |
| def start(self): | |
| return "<|begin_of_text|>" | |
| def conv(self,msgs:List[Dict[str, str]]): | |
| return "\n".join([f"<|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs]) | |
| def starttok(self,user:str): | |
| return f"<|start_header_id|>{user}<|end_header_id|>\n\n" | |
| def close(self): | |
| self.llm.close() | |
| Llama31uncensored.modelname="Llama31-uncensored-fp16" | |
| class Llama31(Model): | |
| def __init__(self): | |
| self.llm = Llama.from_pretrained( | |
| repo_id="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", | |
| filename="Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf", | |
| **llama_args, | |
| ) | |
| def __call__(self, msg:str, stop:List[str], max_tokens:int): | |
| print("Autocomplete: ",msg) | |
| return self.llm(msg, stop=stop, max_tokens=max_tokens) | |
| def conv(self,msgs:List[Dict[str, str]]): | |
| return "\n".join([f"<|begin_of_text|><|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs]) | |
| def starttok(self,user:str): | |
| return f"<|begin_of_text|><|start_header_id|>{user}<|end_header_id|>" | |
| def close(self): | |
| self.llm.close() | |
| Llama31.modelname="Llama31-IQ4_XS" | |
| models=[Phi35RPMax,Phi35,Llama31uncensored,Llama31] |