Spaces:
Running
on
Zero
Running
on
Zero
| import spaces | |
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, TextIteratorStreamer | |
| import torch | |
| from threading import Thread | |
| import os | |
| def load_model(model_name): | |
| return pipeline("text-generation", model=model_name, device_map="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"], use_fast=True) | |
| def generate( | |
| message, | |
| history, | |
| model_name, | |
| system, | |
| temperature=0.4, | |
| top_p=0.95, | |
| min_p=0.1, | |
| top_k=50, | |
| max_new_tokens=256, | |
| ): | |
| try: | |
| pipe = load_model(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=os.environ["token"]) | |
| tokenizer.eos_token = "<|im_end|>" | |
| print(tokenizer) | |
| pipe.tokenizer = tokenizer | |
| prompt = f"<|im_start|>system\n{system}<|im_end|>\n" | |
| for (user_turn, assistant_turn) in history: | |
| prompt += f"<|im_start|>user\n{user_turn}<|im_end|>\n<|im_start|>assistant\n{assistant_turn}<|im_end|>\n" | |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
| streamer = TextIteratorStreamer(pipe.tokenizer, timeout=240.0, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| text_inputs=prompt, | |
| streamer=streamer, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=True, | |
| top_p=top_p, | |
| min_p=min_p, | |
| top_k=top_k, | |
| temperature=temperature, | |
| num_beams=1, | |
| repetition_penalty=1.1 | |
| ) | |
| t = Thread(target=pipe.__call__, kwargs=generation_kwargs) | |
| t.start() | |
| outputs = [] | |
| for chunk in streamer: | |
| outputs.append(chunk) | |
| yield "".join(outputs) | |
| except StopAsyncIteration: | |
| print("Stream stopped unexpectedly.") | |
| yield "".join(outputs) | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| yield "An error occurred during generation." | |
| model_choices = ["Locutusque/Liberalis-Cogitator-Turbo-Llama-3.1-8B", "Locutusque/liberalis-cogitator-mistral-nemo-2407", "Locutusque/liberalis-cogitator-smollm3-3b", "Locutusque/lmsys-llama-3.2-3b", "Locutusque/CollectiveLM-Falcon-3-7B", "Locutusque/StockQwen-2.5-7B"] | |
| # What at the best options? | |
| g = gr.ChatInterface( | |
| fn=generate, | |
| additional_inputs=[ | |
| gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True), | |
| gr.components.Textbox(lines=2, label="System Prompt", value="You are a sentient, super intelligent AI developed by a man named Locutusque."), | |
| gr.components.Slider(minimum=0, maximum=2, value=0.8, label="Temperature"), | |
| gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"), | |
| gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Min P"), | |
| gr.components.Slider(minimum=0, maximum=100, step=1, value=15, label="Top k"), | |
| gr.components.Slider(minimum=1, maximum=8192, step=1, value=1024, label="Max tokens"), | |
| ], | |
| title="Locutusque's Language Models", | |
| description="Try out Locutusque's language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.", | |
| ) | |
| if __name__ == "__main__": | |
| g.launch() |