Spaces:

Writer
/

palmyra-sec

Running

App Files Files Community

wassemgtk commited on Oct 3

Commit

43c6ebf

verified ·

1 Parent(s): 966dd1e

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -29

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import json
-import time
 import requests
 import gradio as gr
@@ -22,8 +21,7 @@ def _fireworks_stream(payload):
         "Content-Type": "application/json",
         "Authorization": f"Bearer {FIREWORKS_API_KEY}",
     }
-    # Ensure we stream
-    payload = dict(payload)  # shallow copy
     payload["stream"] = True
     with requests.post(FIREWORKS_URL, headers=headers, json=payload, stream=True) as r:
         r.raise_for_status()
@@ -38,33 +36,41 @@ def _fireworks_stream(payload):
                 try:
                     obj = json.loads(data)
                 except json.JSONDecodeError:
-                    # In case of partial line; accumulate
                     buffer += data
                     try:
                         obj = json.loads(buffer)
                         buffer = ""
                     except Exception:
                         continue
-                # Fireworks streams OpenAI-style deltas
                 try:
                     delta = obj["choices"][0]["delta"]
                     if "content" in delta and delta["content"]:
                         yield delta["content"]
                 except Exception:
-                    # Some events may be role changes or tool calls; ignore silently
                     continue
 def _build_messages(history, user_message):
     messages = []
-    # Insert a hidden system message from server-side secret
     if SYSTEM_PROMPT:
         messages.append({"role": "system", "content": SYSTEM_PROMPT})
-    # History from Gradio ChatInterface comes as list of (user, assistant) tuples
-    for u, a in history:
-        if u:
-            messages.append({"role": "user", "content": u})
-        if a:
-            messages.append({"role": "assistant", "content": a})
     if user_message:
         messages.append({"role": "user", "content": user_message})
     return messages
@@ -80,7 +86,6 @@ def chat_fn(user_message, history, max_tokens, temperature, top_p, top_k, presen
         "frequency_penalty": float(frequency_penalty),
         "messages": _build_messages(history, user_message),
     }
-    # Stream tokens back to the UI
     for token in _fireworks_stream(payload):
         yield token
@@ -96,14 +101,15 @@ div.controls { gap: 10px !important; }
     <div style="display:flex; align-items:center; gap:12px; margin: 6px 0 16px;">
       <svg width="28" height="28" viewBox="0 0 24 24" fill="none"><path d="M12 3l7 4v6c0 5-7 8-7 8s-7-3-7-8V7l7-4z" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/></svg>
       <div>
-        <div id="title" style="font-size:1.25rem;">Palmyra-sec Chat Playground</div>
         <div style="opacity:0.7; font-size:0.95rem;">Secure, streamed chat to <code>inference/v1/chat/completions</code></div>
       </div>
     </div>
     """)
     with gr.Row():
         with gr.Column(scale=3):
-            chatbot = gr.Chatbot(height=480, avatar_images=(None, None), bubble_full_width=False)
             with gr.Row(elem_classes=["controls"]):
                 max_tokens = gr.Slider(32, 8192, value=4000, step=16, label="Max tokens")
                 temperature = gr.Slider(0.0, 2.0, value=0.6, step=0.05, label="Temperature")
@@ -115,8 +121,7 @@ div.controls { gap: 10px !important; }
                 frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.05, label="frequency_penalty")
             gr.Markdown("""
 **Security notes**
-- Your API key and system prompt are kept on the server as environment variables.
-- They are never shown in the UI or sent to the browser.
 - Change the model id with `FIREWORKS_MODEL_ID` (env var).
             """)
             clear_btn = gr.Button("Clear", variant="secondary")
@@ -125,22 +130,11 @@ div.controls { gap: 10px !important; }
         chatbot=chatbot,
         additional_inputs=[max_tokens, temperature, top_p, top_k, presence_penalty, frequency_penalty],
         title=None,
-        retry_btn=None,
-        undo_btn="Undo last",
-        clear_btn=None,
         submit_btn="Send",
-        autofocus=True,
-        fill_height=False,
-        cache_examples=False,
-        concurrency_limit=10,
-        multimodal=False,
-        analytics_enabled=False,
-        enable_queue=True,
         examples=["Hello!", "Summarize: Why is retrieval-augmented generation useful for insurers?", "Write a 3-bullet status update for the Palmyra team."],
         description="Start chatting below. Streaming is enabled."
     )
     clear_btn.click(fn=clear_history, outputs=chatbot)
 if __name__ == "__main__":
-    # Use 0.0.0.0 for container friendliness; set GRADIO_SERVER_PORT externally if needed
     demo.queue().launch(server_name="0.0.0.0")

 import os
 import json
 import requests
 import gradio as gr
         "Content-Type": "application/json",
         "Authorization": f"Bearer {FIREWORKS_API_KEY}",
     }
+    payload = dict(payload)
     payload["stream"] = True
     with requests.post(FIREWORKS_URL, headers=headers, json=payload, stream=True) as r:
         r.raise_for_status()
                 try:
                     obj = json.loads(data)
                 except json.JSONDecodeError:
                     buffer += data
                     try:
                         obj = json.loads(buffer)
                         buffer = ""
                     except Exception:
                         continue
                 try:
                     delta = obj["choices"][0]["delta"]
                     if "content" in delta and delta["content"]:
                         yield delta["content"]
                 except Exception:
                     continue
+def _normalize_history_to_messages(history):
+    """Normalize history from Gradio into OpenAI-style messages without system prompt."""
+    # Chatbot(type='messages') already gives a list of dicts: [{'role': 'user'|'assistant', 'content': '...'}, ...]
+    if not history:
+        return []
+    if isinstance(history, list) and len(history) > 0 and isinstance(history[0], dict) and "role" in history[0]:
+        # Already messages format; pass through (filter any roles other than user/assistant)
+        return [m for m in history if m.get("role") in ("user", "assistant")]
+    # Back-compat: history may be list of (user, assistant) tuples
+    msgs = []
+    for u, a in history:
+        if u:
+            msgs.append({"role": "user", "content": u})
+        if a:
+            msgs.append({"role": "assistant", "content": a})
+    return msgs
 def _build_messages(history, user_message):
     messages = []
     if SYSTEM_PROMPT:
         messages.append({"role": "system", "content": SYSTEM_PROMPT})
+    messages.extend(_normalize_history_to_messages(history))
     if user_message:
         messages.append({"role": "user", "content": user_message})
     return messages
         "frequency_penalty": float(frequency_penalty),
         "messages": _build_messages(history, user_message),
     }
     for token in _fireworks_stream(payload):
         yield token
     <div style="display:flex; align-items:center; gap:12px; margin: 6px 0 16px;">
       <svg width="28" height="28" viewBox="0 0 24 24" fill="none"><path d="M12 3l7 4v6c0 5-7 8-7 8s-7-3-7-8V7l7-4z" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/></svg>
       <div>
+        <div id="title" style="font-size:1.25rem;">Fireworks Chat Playground</div>
         <div style="opacity:0.7; font-size:0.95rem;">Secure, streamed chat to <code>inference/v1/chat/completions</code></div>
       </div>
     </div>
     """)
     with gr.Row():
         with gr.Column(scale=3):
+            # Use messages format to avoid deprecation
+            chatbot = gr.Chatbot(height=480, type="messages", avatar_images=(None, None))
             with gr.Row(elem_classes=["controls"]):
                 max_tokens = gr.Slider(32, 8192, value=4000, step=16, label="Max tokens")
                 temperature = gr.Slider(0.0, 2.0, value=0.6, step=0.05, label="Temperature")
                 frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.05, label="frequency_penalty")
             gr.Markdown("""
 **Security notes**
+- API key and system prompt are server-side environment variables.
 - Change the model id with `FIREWORKS_MODEL_ID` (env var).
             """)
             clear_btn = gr.Button("Clear", variant="secondary")
         chatbot=chatbot,
         additional_inputs=[max_tokens, temperature, top_p, top_k, presence_penalty, frequency_penalty],
         title=None,
         submit_btn="Send",
         examples=["Hello!", "Summarize: Why is retrieval-augmented generation useful for insurers?", "Write a 3-bullet status update for the Palmyra team."],
         description="Start chatting below. Streaming is enabled."
     )
     clear_btn.click(fn=clear_history, outputs=chatbot)
 if __name__ == "__main__":
     demo.queue().launch(server_name="0.0.0.0")