Spaces:

Intel
/

powered_by_intel_llm_leaderboard

Runtime error

App Files Files Community

eduardo-alvarez commited on Mar 9, 2024

Commit

6af4a5e

verified ·

1 Parent(s): fee1876

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -32

app.py CHANGED Viewed

@@ -52,39 +52,49 @@ with demo:
         #chat_model_selection = chat_model_dropdown.value
         chat_model_selection = 'Intel/neural-chat-7b-v1-1'
-       #def slow_echo(message, history):
-       #    for i in range(len(message)):
-       #        time.sleep(0.3)
-       #        yield "You typed: " + message[: i+1]
-#
-       #gr.ChatInterface(slow_echo).launch()
-        #def call_api_and_stream_response(query, chat_model):
-        #    """
-        #    Call the API endpoint and yield characters as they are received.
-        #    This function simulates streaming by yielding characters one by one.
-        #    """
-        #    url = inference_endpoint_url
-        #    params = {"query": query,"selected_model":chat_model}
-        #    with requests.get(url, json=params, stream=True) as r:
-        #        for chunk in r.iter_content(chunk_size=1):
-        #            if chunk:
-        #                yield chunk.decode()
-        #def get_response(query, history):
-        #    """
-        #    Wrapper function to call the streaming API and compile the response.
-        #    """
-        #    response = ''
-        #
-        #    global chat_model_selection
-        #
-        #    for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
-        #        if char == '<':
-        #            break
-        #        response += char
-        #        yield response
-        #
-        #gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):

         #chat_model_selection = chat_model_dropdown.value
         chat_model_selection = 'Intel/neural-chat-7b-v1-1'
+        def call_api_and_stream_response(query, chat_model):
+            """
+            Call the API endpoint and yield characters as they are received.
+            This function simulates streaming by yielding characters one by one.
+            """
+            url = inference_endpoint_url
+            params = {"query": query,"selected_model":chat_model}
+            with requests.get(url, json=params, stream=True) as r:
+                for chunk in r.iter_content(chunk_size=1):
+                    if chunk:
+                        yield chunk.decode()
+        def get_response(query, history):
+            """
+            Wrapper function to call the streaming API and compile the response.
+            """
+            response = ''
+            global chat_model_selection
+            for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
+                if char == '<':
+                    break
+                response += char
+                yield response
+        with gr.Blocks():
+            with gr.Row():
+                message_input = gr.Textbox(label="Your message")
+                submit_button = gr.Button("Submit")
+                clear_button = gr.Button("Clear")
+            chatbox = gr.Chatbot()
+            submit_button.click(
+                fn=get_response,
+                inputs=message_input,
+                outputs=chatbox
+            )
+            clear_button.click(
+                fn=clear_chat,
+                inputs=[],
+                outputs=chatbox
+            )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):