import os
import gradio as gr
from ctransformers import LlamaForCausalLM, LlamaTokenizer

# Lấy secret key từ biến môi trường
secret_key = os.getenv("HUGGING_FACE_SECRET_KEY")

# Khởi tạo model và tokenizer
model = LlamaForCausalLM.from_pretrained("Arrcttacsrks/Llama-3.2-3B-Instruct_gguf", revision="Llama-3.2-3B-Instruct-Q8_0.gguf")
tokenizer = LlamaTokenizer.from_pretrained("Arrcttacsrks/Llama-3.2-3B-Instruct_gguf", revision="Llama-3.2-3B-Instruct-Q8_0.gguf")

def chat_with_model(user_input):
    # Mã hóa đầu vào
    input_ids = tokenizer.encode(user_input, return_tensors="pt")
    
    # Tạo phản hồi
    output = model.generate(input_ids, max_length=200, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95, num_beams=1)
    
    # Giải mã phản hồi
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Tạo giao diện Gradio
interface = gr.Interface(
    fn=chat_with_model,
    inputs=gr.Textbox(placeholder="Nhập câu hỏi của bạn..."),
    outputs=gr.Textbox(),
    title="Chat với LLaMA 3.2",
    description="Nhập câu hỏi để nhận phản hồi từ mô hình LLaMA 3.2 3B Instruct."
)

# Khởi động ứng dụng
interface.launch()