import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch model_name = "lamapi/next-1b" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) model.eval() def chat(message, history): prompt = "" for user, bot in history or []: prompt += f"user\n{user}\nmodel\n{bot}\n" prompt += f"user\n{message}\nmodel\n" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.inference_mode(): outputs = model.generate( **inputs, max_new_tokens=2048, do_sample=True, temperature=0.9, top_k=140, top_p=0.9 ) text = tokenizer.decode(outputs[0], skip_special_tokens=True) reply = text.split("\nmodel")[-1].strip() return reply # sadece string dönüyoruz! iface = gr.ChatInterface( fn=chat, title="Next-1B Chatbot ⚡" ) iface.launch(share=True, server_name="0.0.0.0", server_port=7860)