Spaces:

gaeunseo
/

Interface2

Sleeping

File size: 4,775 Bytes

8570037
b63078f
 
e3a3cf7
43769dc
b63078f
e3a3cf7
 
 
 
 
 
 
 
 
 
 
 
b63078f
e3a3cf7
 
b63078f
e3a3cf7
 
b63078f
e3a3cf7
 
b63078f
e3a3cf7
 
 
 
 
 
 
 
 
 
 
b63078f
e3a3cf7
 
 
 
 
 
 
b63078f
e3a3cf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8570037
 
e3a3cf7
0d9db4d
e3a3cf7
 
 
 
 
 
b63078f
e3a3cf7
8570037
43769dc
e3a3cf7
 
 
0d9db4d
e3a3cf7
0d9db4d
 
e3a3cf7
0d9db4d
 
 
 
8570037

import gradio as gr
from datasets import load_dataset
import random
import re

def load_random_conversations():
    """
    데이터셋 'gaeunseo/Taskmaster_sample_data'의 train split에서 
    conversation_id별로 그룹화한 후, 모든 행의 used가 False인 그룹만 남겨,
    이 중 랜덤하게 2개 그룹을 선택하여 각 대화의 모든 utterance를 줄바꿈으로 연결한 문자열 두 개를 반환.
    """
    ds = load_dataset("gaeunseo/Taskmaster_sample_data")["train"]
    
    # conversation_id 별로 그룹화
    groups = {}
    for row in ds:
        cid = row["conversation_id"]
        groups.setdefault(cid, []).append(row)
    
    # 그룹 내 모든 행의 used가 False인 그룹만 선택
    valid_groups = [grp for grp in groups.values() if all(not row["used"] for row in grp)]
    
    if len(valid_groups) < 2:
        return "Not enough unused conversations", "Not enough unused conversations"
    
    # 유효한 그룹 중 랜덤하게 2개 선택
    selected_groups = random.sample(valid_groups, 2)
    
    # 각 그룹의 모든 utterance를 이어붙임
    conv_A = "\n".join(row["utterance"] for row in selected_groups[0])
    conv_B = "\n".join(row["utterance"] for row in selected_groups[1])
    
    return conv_A, conv_B

def format_chat(conv_text):
    """
    conv_text 문자열을 [turn]과 [BC] 토큰을 기준으로 분할한 후,
    첫 번째 발화는 사람(사용자), 두 번째 발화는 AI의 응답으로 간주하여
    gr.Chatbot 컴포넌트에서 사용하는 (user_message, ai_message) 튜플 리스트로 변환.
    
    - 사람(사용자) 메시지: 오른쪽 말풍선, 메시지 끝에 "🧑" 이모티콘 추가
    - AI 메시지: 왼쪽 말풍선, 메시지 앞에 "🤖" 이모티콘 추가
    """
    # [turn]와 [BC]를 구분자로 사용하여 발화 분할
    utterances = re.split(r'\[turn\]|\[BC\]', conv_text)
    # 공백 제거 및 빈 문자열 제거
    utterances = [utt.strip() for utt in utterances if utt.strip()]
    
    chat = []
    # 번갈아 등장한다고 가정 (첫 번째: 사람, 두 번째: AI, ...)
    for i in range(0, len(utterances), 2):
        # 첫 번째 발화 → 사람 (오른쪽 정렬: gr.Chatbot에서 사용자 메시지는 기본적으로 오른쪽에 표시됨)
        human = utterances[i] + " 🧑"
        ai = ""
        if i + 1 < len(utterances):
            # 두 번째 발화 → AI (왼쪽 정렬: gr.Chatbot에서 봇 메시지는 기본적으로 왼쪽에 표시됨)
            ai = "🤖 " + utterances[i + 1]
        chat.append((human, ai))
    return chat

def load_and_format_conversations():
    """
    데이터셋에서 랜덤하게 두 대화 문자열을 가져온 후, 각각 format_chat()을 통해
    채팅 인터페이스에 맞게 변환하여 반환.
    """
    conv_A, conv_B = load_random_conversations()
    # 에러 메시지인 경우 그대로 반환
    if conv_A.startswith("Not enough"):
        return conv_A, conv_B
    return format_chat(conv_A), format_chat(conv_B)

# 평가 버튼 클릭 시 업데이트할 전역 변수
statement = ""

def update_statement(val):
    global statement
    statement = val
    return statement

with gr.Blocks() as demo:
    # 상단: 좌우에 각각 채팅 인터페이스 배치 (Conversation A와 Conversation B)
    with gr.Row():
        chat_A = gr.Chatbot(label="Conversation A")
        chat_B = gr.Chatbot(label="Conversation B")
    
    # "Load Random Conversations" 버튼을 눌러 데이터셋에서 대화를 불러오고 채팅 인터페이스에 표시
    load_btn = gr.Button("Load Random Conversations")
    load_btn.click(fn=load_and_format_conversations, inputs=[], outputs=[chat_A, chat_B])
    
    # 하단: 평가 버튼 4개 배치  
    with gr.Row():
        btn_both_good = gr.Button("Both good")      # "둘 다 좋음" → "BG"
        btn_a_better   = gr.Button("A is better")     # "A가 더 좋음" → "AG"
        btn_b_better   = gr.Button("B is better")     # "B가 더 좋음" → "BG"
        btn_both_bad   = gr.Button("Both not good")   # "둘 다 별로임" → "BB"
    
    # 선택된 평가값(statement)을 보여주는 텍스트박스
    statement_output = gr.Textbox(label="Selected Statement", value="", interactive=False)
    
    # 각 평가 버튼 클릭 시 해당 상태값 업데이트
    btn_both_good.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)
    btn_a_better.click(fn=lambda: update_statement("AG"), inputs=[], outputs=statement_output)
    btn_b_better.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)
    btn_both_bad.click(fn=lambda: update_statement("BB"), inputs=[], outputs=statement_output)

demo.launch()