| 
							 | 
						#app.py.chatbot | 
					
					
						
						| 
							 | 
						#app.py Modif04 | 
					
					
						
						| 
							 | 
						#https: | 
					
					
						
						| 
							 | 
						import gradio as gr | 
					
					
						
						| 
							 | 
						from llama_cpp import Llama | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						llm = Llama( | 
					
					
						
						| 
							 | 
						    model_path="/home/user/app/h2o-danube3-500m-chat-Q4_K_M.gguf", | 
					
					
						
						| 
							 | 
						    verbose=True | 
					
					
						
						| 
							 | 
						) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						def predict(message, history): | 
					
					
						
						| 
							 | 
						#    messages = [{"role": "system", "content": "You are a helpful assistant."}] | 
					
					
						
						| 
							 | 
						#    messages = [{"role": "assistant", "content": "You are a helpful assistant."}] | 
					
					
						
						| 
							 | 
						#    messages = [{"role": "assistant", "content": "Bonjour, comment puis-je vous aider?"}] | 
					
					
						
						| 
							 | 
						    messages = [] | 
					
					
						
						| 
							 | 
						    for user_message, bot_message in history: | 
					
					
						
						| 
							 | 
						        if user_message: | 
					
					
						
						| 
							 | 
						            messages.append({"role": "user", "content": user_message}) | 
					
					
						
						| 
							 | 
						        if bot_message: | 
					
					
						
						| 
							 | 
						            messages.append({"role": "assistant", "content": bot_message}) | 
					
					
						
						| 
							 | 
						    messages.append({"role": "user", "content": message}) | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    response = "" | 
					
					
						
						| 
							 | 
						    for chunk in llm.create_chat_completion( | 
					
					
						
						| 
							 | 
						        stream=True, | 
					
					
						
						| 
							 | 
						        messages=messages, | 
					
					
						
						| 
							 | 
						    ): | 
					
					
						
						| 
							 | 
						        part = chunk["choices"][0]["delta"].get("content", None) | 
					
					
						
						| 
							 | 
						        if part: | 
					
					
						
						| 
							 | 
						            response += part | 
					
					
						
						| 
							 | 
						        yield response | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						demo = gr.ChatInterface(predict) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						demo.launch() | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						##app.py Modif03 | 
					
					
						
						| 
							 | 
						#import gradio as gr | 
					
					
						
						| 
							 | 
						#from huggingface_hub import create_inference_endpoint, InferenceClient | 
					
					
						
						| 
							 | 
						#from transformers import AutoModelForCausalLM, AutoTokenizer | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf" | 
					
					
						
						| 
							 | 
						##model = AutoModelForCausalLM.from_pretrained(model_name) | 
					
					
						
						| 
							 | 
						##tokenizer = AutoTokenizer.from_pretrained(model_name) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | 
					
					
						
						| 
							 | 
						##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf") | 
					
					
						
						| 
							 | 
						##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf") | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						## Créez une instance Inference locale | 
					
					
						
						| 
							 | 
						#endpoint = create_inference_endpoint( | 
					
					
						
						| 
							 | 
						#    "Local-Endpoint-MisterAI-H2O", | 
					
					
						
						| 
							 | 
						#    repository="MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf", | 
					
					
						
						| 
							 | 
						##    model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf", | 
					
					
						
						| 
							 | 
						#    framework="pytorch", | 
					
					
						
						| 
							 | 
						#    task="text-generation", | 
					
					
						
						| 
							 | 
						#    accelerator="cpu", | 
					
					
						
						| 
							 | 
						#    vendor="local", | 
					
					
						
						| 
							 | 
						#    region="local", | 
					
					
						
						| 
							 | 
						#    type="unprotected", | 
					
					
						
						| 
							 | 
						#    instance_size="small", | 
					
					
						
						| 
							 | 
						#    instance_type="local", | 
					
					
						
						| 
							 | 
						#    URL="http://0.0.0.0:6789" | 
					
					
						
						| 
							 | 
						#) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#print(f"Endpoint créé à l'URL : {endpoint.url}") | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#client = endpoint.client | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#def respond( | 
					
					
						
						| 
							 | 
						#    message, | 
					
					
						
						| 
							 | 
						#    history: list[tuple[str, str]], | 
					
					
						
						| 
							 | 
						#    system_message, | 
					
					
						
						| 
							 | 
						#    max_tokens, | 
					
					
						
						| 
							 | 
						#    temperature, | 
					
					
						
						| 
							 | 
						#    top_p, | 
					
					
						
						| 
							 | 
						#): | 
					
					
						
						| 
							 | 
						#    messages = [{"role": "system", "content": system_message}] | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    for val in history: | 
					
					
						
						| 
							 | 
						#        if val[0]: | 
					
					
						
						| 
							 | 
						#            messages.append({"role": "user", "content": val[0]}) | 
					
					
						
						| 
							 | 
						#        if val[1]: | 
					
					
						
						| 
							 | 
						#            messages.append({"role": "assistant", "content": val[1]}) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    messages.append({"role": "user", "content": message}) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    response = "" | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    for message in client.chat_completion( | 
					
					
						
						| 
							 | 
						#        messages, | 
					
					
						
						| 
							 | 
						#        max_tokens=max_tokens, | 
					
					
						
						| 
							 | 
						#        stream=True, | 
					
					
						
						| 
							 | 
						#        temperature=temperature, | 
					
					
						
						| 
							 | 
						#        top_p=top_p, | 
					
					
						
						| 
							 | 
						#    ): | 
					
					
						
						| 
							 | 
						#        token = message.choices[0].delta.content | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#        response += token | 
					
					
						
						| 
							 | 
						#        yield response | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#demo = gr.ChatInterface( | 
					
					
						
						| 
							 | 
						#    respond, | 
					
					
						
						| 
							 | 
						#    additional_inputs=[ | 
					
					
						
						| 
							 | 
						#        gr.Textbox(value="You are a friendly Chatbot.", label="System message"), | 
					
					
						
						| 
							 | 
						#        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | 
					
					
						
						| 
							 | 
						#        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | 
					
					
						
						| 
							 | 
						#        gr.Slider( | 
					
					
						
						| 
							 | 
						#            minimum=0.1, | 
					
					
						
						| 
							 | 
						#            maximum=1.0, | 
					
					
						
						| 
							 | 
						#            value=0.95, | 
					
					
						
						| 
							 | 
						#            step=0.05, | 
					
					
						
						| 
							 | 
						#            label="Top-p (nucleus sampling)", | 
					
					
						
						| 
							 | 
						#        ), | 
					
					
						
						| 
							 | 
						#    ], | 
					
					
						
						| 
							 | 
						#) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#if __name__ == "__main__": | 
					
					
						
						| 
							 | 
						#    demo.launch() | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						##app.py Modif01 | 
					
					
						
						| 
							 | 
						#import gradio as gr | 
					
					
						
						| 
							 | 
						#from huggingface_hub import Inference, InferenceClient | 
					
					
						
						| 
							 | 
						#from transformers import AutoModelForCausalLM, AutoTokenizer | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf" | 
					
					
						
						| 
							 | 
						##model = AutoModelForCausalLM.from_pretrained(model_name) | 
					
					
						
						| 
							 | 
						##tokenizer = AutoTokenizer.from_pretrained(model_name) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | 
					
					
						
						| 
							 | 
						##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf") | 
					
					
						
						| 
							 | 
						##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf") | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						## Créez une instance Inference locale | 
					
					
						
						| 
							 | 
						#inference = Inference( | 
					
					
						
						| 
							 | 
						#    model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf", | 
					
					
						
						| 
							 | 
						#    device="cpu",  # Utilisez le CPU pour l'inference | 
					
					
						
						| 
							 | 
						#    token=None,  # Pas de token nécessaire pour cette instance | 
					
					
						
						| 
							 | 
						#) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#client = inference | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#def respond( | 
					
					
						
						| 
							 | 
						#    message, | 
					
					
						
						| 
							 | 
						#    history: list[tuple[str, str]], | 
					
					
						
						| 
							 | 
						#    system_message, | 
					
					
						
						| 
							 | 
						#    max_tokens, | 
					
					
						
						| 
							 | 
						#    temperature, | 
					
					
						
						| 
							 | 
						#    top_p, | 
					
					
						
						| 
							 | 
						#): | 
					
					
						
						| 
							 | 
						#    messages = [{"role": "system", "content": system_message}] | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    for val in history: | 
					
					
						
						| 
							 | 
						#        if val[0]: | 
					
					
						
						| 
							 | 
						#            messages.append({"role": "user", "content": val[0]}) | 
					
					
						
						| 
							 | 
						#        if val[1]: | 
					
					
						
						| 
							 | 
						#            messages.append({"role": "assistant", "content": val[1]}) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    messages.append({"role": "user", "content": message}) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    response = "" | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    for message in client.chat_completion( | 
					
					
						
						| 
							 | 
						#        messages, | 
					
					
						
						| 
							 | 
						#        max_tokens=max_tokens, | 
					
					
						
						| 
							 | 
						#        stream=True, | 
					
					
						
						| 
							 | 
						#        temperature=temperature, | 
					
					
						
						| 
							 | 
						#        top_p=top_p, | 
					
					
						
						| 
							 | 
						#    ): | 
					
					
						
						| 
							 | 
						#        token = message.choices[0].delta.content | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#        response += token | 
					
					
						
						| 
							 | 
						#        yield response | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#demo = gr.ChatInterface( | 
					
					
						
						| 
							 | 
						#    respond, | 
					
					
						
						| 
							 | 
						#    additional_inputs=[ | 
					
					
						
						| 
							 | 
						#        gr.Textbox(value="You are a friendly Chatbot.", label="System message"), | 
					
					
						
						| 
							 | 
						#        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | 
					
					
						
						| 
							 | 
						#        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | 
					
					
						
						| 
							 | 
						#        gr.Slider( | 
					
					
						
						| 
							 | 
						#            minimum=0.1, | 
					
					
						
						| 
							 | 
						#            maximum=1.0, | 
					
					
						
						| 
							 | 
						#            value=0.95, | 
					
					
						
						| 
							 | 
						#            step=0.05, | 
					
					
						
						| 
							 | 
						#            label="Top-p (nucleus sampling)", | 
					
					
						
						| 
							 | 
						#        ), | 
					
					
						
						| 
							 | 
						#    ], | 
					
					
						
						| 
							 | 
						#) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#if __name__ == "__main__": | 
					
					
						
						| 
							 | 
						#    demo.launch() | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						##app.py ORIGINAL | 
					
					
						
						| 
							 | 
						#import gradio as gr | 
					
					
						
						| 
							 | 
						#from huggingface_hub import InferenceClient | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#""" | 
					
					
						
						| 
							 | 
						#For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | 
					
					
						
						| 
							 | 
						#""" | 
					
					
						
						| 
							 | 
						#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#def respond( | 
					
					
						
						| 
							 | 
						#    message, | 
					
					
						
						| 
							 | 
						#    history: list[tuple[str, str]], | 
					
					
						
						| 
							 | 
						#    system_message, | 
					
					
						
						| 
							 | 
						#    max_tokens, | 
					
					
						
						| 
							 | 
						#    temperature, | 
					
					
						
						| 
							 | 
						#    top_p, | 
					
					
						
						| 
							 | 
						#): | 
					
					
						
						| 
							 | 
						#    messages = [{"role": "system", "content": system_message}] | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    for val in history: | 
					
					
						
						| 
							 | 
						#        if val[0]: | 
					
					
						
						| 
							 | 
						#            messages.append({"role": "user", "content": val[0]}) | 
					
					
						
						| 
							 | 
						#        if val[1]: | 
					
					
						
						| 
							 | 
						#            messages.append({"role": "assistant", "content": val[1]}) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    messages.append({"role": "user", "content": message}) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    response = "" | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#    for message in client.chat_completion( | 
					
					
						
						| 
							 | 
						#        messages, | 
					
					
						
						| 
							 | 
						#        max_tokens=max_tokens, | 
					
					
						
						| 
							 | 
						#        stream=True, | 
					
					
						
						| 
							 | 
						#        temperature=temperature, | 
					
					
						
						| 
							 | 
						#        top_p=top_p, | 
					
					
						
						| 
							 | 
						#    ): | 
					
					
						
						| 
							 | 
						#        token = message.choices[0].delta.content | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#        response += token | 
					
					
						
						| 
							 | 
						#        yield response | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#""" | 
					
					
						
						| 
							 | 
						#For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface | 
					
					
						
						| 
							 | 
						#""" | 
					
					
						
						| 
							 | 
						#demo = gr.ChatInterface( | 
					
					
						
						| 
							 | 
						#    respond, | 
					
					
						
						| 
							 | 
						#    additional_inputs=[ | 
					
					
						
						| 
							 | 
						#        gr.Textbox(value="You are a friendly Chatbot.", label="System message"), | 
					
					
						
						| 
							 | 
						#        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | 
					
					
						
						| 
							 | 
						#        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | 
					
					
						
						| 
							 | 
						#        gr.Slider( | 
					
					
						
						| 
							 | 
						#            minimum=0.1, | 
					
					
						
						| 
							 | 
						#            maximum=1.0, | 
					
					
						
						| 
							 | 
						#            value=0.95, | 
					
					
						
						| 
							 | 
						#            step=0.05, | 
					
					
						
						| 
							 | 
						#            label="Top-p (nucleus sampling)", | 
					
					
						
						| 
							 | 
						#        ), | 
					
					
						
						| 
							 | 
						#    ], | 
					
					
						
						| 
							 | 
						#) | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						# | 
					
					
						
						| 
							 | 
						#if __name__ == "__main__": | 
					
					
						
						| 
							 | 
						#    demo.launch() | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						 |