store-user-feedback

Paused

App Files Files Community

WillHeld commited on May 19

Commit

77129be

verified ·

1 Parent(s): b3a18de

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -155

app.py CHANGED Viewed

@@ -1,180 +1,220 @@
-from __future__ import annotations
-import json
 import os
-import time
 import uuid
-from threading import Thread
-from typing import List, Dict
-import gradio as gr
 from gradio_modal import Modal
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    TextIteratorStreamer,
-)
-from datasets import (
-    Dataset,
-    load_dataset,
-    concatenate_datasets,
-    DownloadMode,
-)
-from huggingface_hub import HfApi, login
-import spaces
-# ─────────────────────────── model & constants ────────────────────────────
-checkpoint = "marin-community/marin-8b-instruct"
 device = "cuda"
 tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
-DATASET_REPO = "WillHeld/model-feedback"  # change if forking
-DATA_DIR = "./feedback_data"
-DATA_FILE = "feedback.jsonl"
-os.makedirs(DATA_DIR, exist_ok=True)
-# ─────────────────────────── helper functions ─────────────────────────────
-def save_feedback_locally(conversation: List[Dict[str, str]],
-                          satisfaction: str,
-                          feedback_text: str) -> None:
-    record = {
-        "id": str(uuid.uuid4()),
-        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
         "conversation": conversation,
         "satisfaction": satisfaction,
-        "feedback": feedback_text,
     }
-    with open(os.path.join(DATA_DIR, DATA_FILE), "a", encoding="utf-8") as fp:
-        fp.write(json.dumps(record, ensure_ascii=False) + "\n")
-def push_feedback_to_hub(hf_token: str | None = None) -> bool:
-    hf_token = hf_token or os.getenv("HF_TOKEN")
-    if not hf_token:
-        print("❌  No HF token — skipping Hub push.")
-        return False
-    login(token=hf_token)
-    local_path = os.path.join(DATA_DIR, DATA_FILE)
-    if not os.path.exists(local_path):
-        print("❌  No local feedback to push.")
-        return False
-    with open(local_path, encoding="utf-8") as fp:
-        local_ds = Dataset.from_list([json.loads(l) for l in fp])
     try:
-        remote_ds = load_dataset(
             DATASET_REPO,
-            split="train",
-            token=hf_token,
-            download_mode=DownloadMode.FORCE_REDOWNLOAD,
-        )
-        merged = concatenate_datasets([remote_ds, local_ds]).unique("id")
-    except FileNotFoundError:
-        merged = local_ds
-    except Exception:
-        HfApi(token=hf_token).create_repo(
-            repo_id=DATASET_REPO, repo_type="dataset", private=True
         )
-        merged = local_ds
-    merged.push_to_hub(
-        DATASET_REPO,
-        private=True,
-        commit_message=f"Add {len(local_ds)} new feedback entries",
-    )
-    print(f"✅  Pushed {len(local_ds)} rows; dataset now has {len(merged)} total.")
-    return True
-# ─────────────────────────── chat backend ────────────────────────────────
 @spaces.GPU(duration=120)
-def generate_response(message: str,
-                      history: List[Dict[str, str]],
-                      conversation_state: List[Dict[str, str]],
-                      temperature: float,
-                      top_p: float):
-    """Yields assistant text only; conversation_state is updated in‑place."""
-    # sync state
     history.append({"role": "user", "content": message})
-    conversation_state[:] = history  # keep external state in sync
-    prompt = tokenizer.apply_chat_template(history, tokenize=False,
-                                           add_generation_prompt=True)
-    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True,
-                                    skip_special_tokens=True)
-    gen_kwargs = dict(
-        input_ids=input_ids,
-        max_new_tokens=1024,
-        temperature=float(temperature),
-        top_p=float(top_p),
-        do_sample=True,
-        streamer=streamer,
-    )
-    Thread(target=model.generate, kwargs=gen_kwargs).start()
-    partial = ""
-    for token in streamer:
-        partial += token
-        yield partial  # only the assistant text gets streamed
-    history.append({"role": "assistant", "content": partial})
-    conversation_state[:] = history
-    # (no final yield; generator simply ends)
-# ─────────────────────────── feedback handler ────────────────────────────
-def submit_feedback(conversation_state: List[Dict[str, str]],
-                    satisfaction: str,
-                    feedback_text: str):
-    save_feedback_locally(conversation_state, satisfaction, feedback_text)
-    if push_feedback_to_hub():
-        return "✅  Thanks! Your feedback is safely stored."
-    return "⚠️  Saved locally; Hub push failed. Check server logs."
-# ─────────────────────────── UI layout ───────────────────────────────────
-with gr.Blocks(title="Marin‑8B Research Preview") as demo:
     conversation_state = gr.State([])
     with gr.Row():
         with gr.Column(scale=3):
             chatbot = gr.ChatInterface(
-                fn=generate_response,
-                additional_inputs=[conversation_state,
-                                   gr.Slider(0.1, 2.0, value=0.7, step=0.1,
-                                             label="Temperature"),
-                                   gr.Slider(0.1, 1.0, value=0.9, step=0.05,
-                                             label="Top‑P")],
-                type="messages",
             )
         with gr.Column(scale=1):
-            report_btn = gr.Button("Share Feedback", variant="primary")
-    with Modal(visible=False) as fb_modal:
-        gr.Markdown("## Research Preview Feedback")
-        gr.Markdown("We appreciate your help improving Marin‑8B! ✨")
-        sat_radio = gr.Radio([
-            "Very satisfied", "Satisfied", "Neutral",
-            "Unsatisfied", "Very unsatisfied"],
-            label="Overall experience", value="Neutral")
-        fb_text = gr.Textbox(lines=6, label="Comments / suggestions")
-        send_btn = gr.Button("Submit", variant="primary")
-        status_box = gr.Textbox(label="Status", interactive=False)
-    report_btn.click(lambda: Modal.update(visible=True), None, fb_modal)
-    send_btn.click(submit_feedback,
-                   inputs=[conversation_state, sat_radio, fb_text],
-                   outputs=status_box)
-if __name__ == "__main__":
-    demo.launch()

+import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+import gradio as gr
+from threading import Thread
 import os
+import json
 import uuid
+from datasets import Dataset
+from huggingface_hub import HfApi, login
+import time
+# Install required packages if not present
 from gradio_modal import Modal
+import huggingface_hub
+import datasets
+# Model setup
+checkpoint = "WillHeld/soft-raccoon"
 device = "cuda"
 tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
+# Constants for dataset
+DATASET_REPO = "WillHeld/model-feedback"  # Replace with your username
+DATASET_PATH = "./feedback_data"  # Local path to store feedback
+DATASET_FILENAME = "feedback.jsonl"  # Filename for feedback data
+# Ensure feedback directory exists
+os.makedirs(DATASET_PATH, exist_ok=True)
+# Feedback storage functions
+def save_feedback_locally(conversation, satisfaction, feedback_text):
+    """Save feedback to a local JSONL file"""
+    # Create a unique ID for this feedback entry
+    feedback_id = str(uuid.uuid4())
+    # Create a timestamp
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    # Prepare the feedback data
+    feedback_data = {
+        "id": feedback_id,
+        "timestamp": timestamp,
         "conversation": conversation,
         "satisfaction": satisfaction,
+        "feedback": feedback_text
     }
+    # Save to local file
+    feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
+    with open(feedback_file, "a") as f:
+        f.write(json.dumps(feedback_data) + "\n")
+    return feedback_id
+def push_feedback_to_hub(hf_token=None):
+    """Push the local feedback data to HuggingFace as a dataset"""
+    # Check if we have a token
+    if hf_token is None:
+        # Try to get token from environment variable
+        hf_token = os.environ.get("HF_TOKEN")
+        if hf_token is None:
+            print("No HuggingFace token provided. Cannot push to Hub.")
+            return False
     try:
+        # Login to HuggingFace
+        login(token=hf_token)
+        # Check if we have data to push
+        feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
+        if not os.path.exists(feedback_file):
+            print("No feedback data to push.")
+            return False
+        # Load data from the JSONL file
+        with open(feedback_file, "r") as f:
+            feedback_data = [json.loads(line) for line in f]
+        # Create a dataset from the feedback data
+        dataset = Dataset.from_list(feedback_data)
+        # Push to Hub
+        dataset.push_to_hub(
             DATASET_REPO,
+            private=True  # Set to False if you want the dataset to be public
         )
+        print(f"Feedback data pushed to {DATASET_REPO} successfully.")
+        return True
+    except Exception as e:
+        print(f"Error pushing feedback data to Hub: {e}")
+        return False
+# Modified predict function to update conversation state
 @spaces.GPU(duration=120)
+def predict(message, history, temperature, top_p):
+    # Update history with user message
     history.append({"role": "user", "content": message})
+    input_text = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
+    # Create a streamer
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    # Set up generation parameters
+    generation_kwargs = {
+        "input_ids": inputs,
+        "max_new_tokens": 1024,
+        "temperature": float(temperature),
+        "top_p": float(top_p),
+        "do_sample": True,
+        "streamer": streamer,
+    }
+    # Run generation in a separate thread
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Yield from the streamer as tokens are generated
+    partial_text = ""
+    for new_text in streamer:
+        partial_text += new_text
+        yield partial_text, state
+    # After full generation, update state with assistant's response
+    history.append({"role": "assistant", "content": partial_text})
+    return partial_text
+# Function to handle the research feedback submission
+def submit_research_feedback(conversation_state, satisfaction, feedback_text):
+    """Save user feedback both locally and to HuggingFace Hub"""
+    # Save locally first
+    feedback_id = save_feedback_locally(conversation_state, satisfaction, feedback_text)
+    # Get token from environment variable
+    env_token = os.environ.get("HF_TOKEN")
+    # Use environment token
+    push_success = push_feedback_to_hub(env_token)
+    if push_success:
+        status_msg = "Thank you for your valuable feedback! Your insights have been saved to the dataset."
+    else:
+        status_msg = "Thank you for your feedback! It has been saved locally, but couldn't be pushed to the dataset. Please check server logs."
+    return status_msg
+# Create the Gradio blocks interface
+with gr.Blocks() as demo:
+    # State to track conversation history
     conversation_state = gr.State([])
     with gr.Row():
         with gr.Column(scale=3):
+            # Custom chat function wrapper to update state
+            def chat_with_state(message, history, state, temperature, top_p):
+                for partial_response, updated_state in predict(message, history, temperature, top_p):
+                    # Update our state with each yield
+                    state = history.copy()
+                    yield partial_response, state
+                state = history.copy()
+                print(state)
+                return partial_response, state
+            # Create ChatInterface
             chatbot = gr.ChatInterface(
+                chat_with_state,
+                additional_inputs=[
+                    conversation_state,
+                    gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
+                    gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
+                ],
+                additional_outputs=[conversation_state],
+                type="messages"
             )
         with gr.Column(scale=1):
+            report_button = gr.Button("Share Feedback", variant="primary")
+    # Create the modal with feedback form components
+    with Modal(visible=False) as feedback_modal:
+        with gr.Column():
+            gr.Markdown("## Research Preview Feedback")
+            gr.Markdown("Thank you for testing our research model. Your feedback (positive or negative) helps us improve!")
+            satisfaction = gr.Radio(
+                ["Very satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very unsatisfied"],
+                label="How would you rate your experience with this research model?",
+                value="Neutral"
+            )
+            feedback_text = gr.Textbox(
+                lines=5,
+                label="Share your observations (strengths, weaknesses, suggestions):",
+                placeholder="We welcome both positive feedback and constructive criticism to help improve this research prototype..."
+            )
+            submit_button = gr.Button("Submit Research Feedback", variant="primary")
+            response_text = gr.Textbox(label="Status", interactive=False)
+    # Connect the "Share Feedback" button to show the modal
+    report_button.click(
+        lambda: Modal(visible=True),
+        None,
+        feedback_modal
+    )
+    # Connect the submit button to the submit_research_feedback function with the current conversation state
+    submit_button.click(
+        submit_research_feedback,
+        inputs=[conversation_state, satisfaction, feedback_text],
+        outputs=response_text
+    )
+# Launch the demo
+demo.launch()