Spaces:

gaeunseo
/

Interface2

Sleeping

App Files Files Community

gaeunseo commited on Feb 10

Commit

7b078cc

verified ·

1 Parent(s): a4decd8

Update app.py

Browse files

Files changed (1) hide show

app.py +191 -82

app.py CHANGED Viewed

@@ -1,113 +1,222 @@
-import gradio as gr
-from datasets import load_dataset
 import random
-import re
-def load_random_conversations():
     """
-    데이터셋 'gaeunseo/Taskmaster_sample_data'의 train split에서
-    conversation_id별로 그룹화한 후, 모든 행의 used 컬럼이 False인 그룹만 필터링합니다.
-    이 중 랜덤하게 2개 그룹을 선택하여 각 그룹의 utterance를 줄바꿈으로 연결한 문자열 두 개를 반환합니다.
     """
-    ds = load_dataset("gaeunseo/Taskmaster_sample_data")["train"]
-    # conversation_id별로 그룹화
-    groups = {}
-    for row in ds:
-        cid = row["conversation_id"]
-        groups.setdefault(cid, []).append(row)
-    # 모든 행의 used가 False인 그룹만 선택
-    valid_groups = [grp for grp in groups.values() if all(not row["used"] for row in grp)]
-    if len(valid_groups) < 2:
-        return "Not enough unused conversations", "Not enough unused conversations"
-    # 유효한 그룹 중 랜덤하게 2개 선택
-    selected_groups = random.sample(valid_groups, 2)
-    # 각 그룹의 모든 utterance를 줄바꿈(\n)으로 이어붙임
-    conv_A = "\n".join(row["utterance"] for row in selected_groups[0])
-    conv_B = "\n".join(row["utterance"] for row in selected_groups[1])
-    return conv_A, conv_B
-def format_chat_html(conv_text):
     """
-    conv_text 문자열을 [turn]와 [BC] 토큰을 기준으로 분할한 후,
-    각 발화를 HTML 말풍선 형태로 변환합니다.
-    - 첫 번째 발화(인간): 오른쪽 정렬, 말풍선 오른쪽에 🧑 이모티콘 표시
-    - 두 번째 발화(AI): 왼쪽 정렬, 말풍선 왼쪽에 🤖 이모티콘 표시
     """
-    # [turn]와 [BC] 토큰을 구분자로 발화 분할
-    utterances = re.split(r'\[turn\]|\[BC\]', conv_text)
-    utterances = [utt.strip() for utt in utterances if utt.strip()]
-    # HTML 컨테이너 (채팅창처럼 보이도록)
-    html = '<div style="display: flex; flex-direction: column; gap: 10px; background-color: #f0f0f0; padding: 10px;">'
-    for i, utterance in enumerate(utterances):
-        if i % 2 == 0:
-            # 첫 번째, 세 번째 등 (인간 발화): 오른쪽 정렬
-            bubble = f'''
-            <div style="align-self: flex-end; background-color: #dcf8c6; padding: 10px; border-radius: 10px; max-width: 70%; position: relative;">
-                <div style="text-align: right;">{utterance} <span>🧑</span></div>
-            </div>
-            '''
         else:
-            # 두 번째, 네 번째 등 (AI 발화): 왼쪽 정렬
-            bubble = f'''
-            <div style="align-self: flex-start; background-color: #fff; padding: 10px; border-radius: 10px; max-width: 70%; position: relative; border: 1px solid #ccc;">
-                <div style="text-align: left;"><span>🤖</span> {utterance}</div>
-            </div>
-            '''
-        html += bubble
-    html += '</div>'
-    return html
-def load_and_format_conversations():
     """
-    데이터셋에서 랜덤하게 두 대화 문자열을 가져온 후, 각각 format_chat_html()을 통해
-    HTML 형식의 채팅창처럼 보이게 변환하여 반환합니다.
     """
-    conv_A, conv_B = load_random_conversations()
-    # 에러 메시지인 경우 그대로 HTML 태그로 감싸서 반환
-    if conv_A.startswith("Not enough"):
-        return f"<p>{conv_A}</p>", f"<p>{conv_B}</p>"
-    html_A = format_chat_html(conv_A)
-    html_B = format_chat_html(conv_B)
-    return html_A, html_B
-# 평가 버튼 클릭 시 업데이트할 전역 변수
-statement = ""
 def update_statement(val):
     global statement
     statement = val
     return statement
 with gr.Blocks() as demo:
-    # 상단: 좌우에 각각 대화 내용을 HTML로 표시 (Conversation A와 Conversation B)
     with gr.Row():
-        conv_A_html = gr.HTML(label="Conversation A")
-        conv_B_html = gr.HTML(label="Conversation B")
-    # "Load Random Conversations" 버튼을 눌러 데이터셋에서 대화 데이터를 불러오고 HTML로 변환
     load_btn = gr.Button("Load Random Conversations")
-    load_btn.click(fn=load_and_format_conversations, inputs=[], outputs=[conv_A_html, conv_B_html])
-    # 하단: 평가 버튼 4개 배치
     with gr.Row():
         btn_both_good = gr.Button("Both good")      # "둘 다 좋음" → "BG"
         btn_a_better   = gr.Button("A is better")     # "A가 더 좋음" → "AG"
         btn_b_better   = gr.Button("B is better")     # "B가 더 좋음" → "BG"
         btn_both_bad   = gr.Button("Both not good")   # "둘 다 별로임" → "BB"
-    # 선택한 평가값을 표시하는 텍스트박스
-    statement_output = gr.Textbox(label="Selected Statement", value="", interactive=False)
-    # 각 버튼 클릭 시 전역 변수 statement를 업데이트
     btn_both_good.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)
     btn_a_better.click(fn=lambda: update_statement("AG"), inputs=[], outputs=statement_output)
     btn_b_better.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)

+import os
+import threading
 import random
+import pandas as pd
+from datasets import load_dataset
+import gradio as gr
+#############################################
+# CSV 파일 관련 함수 및 전역 데이터 초기화
+#############################################
+DATA_FILE = "Interface1.csv"
+data_lock = threading.Lock()
+def initialize_global_data():
     """
+    CSV 파일(DATA_FILE)이 없으면, Hugging Face 데이터셋(gaeunseo/Taskmaster_sample_data)의 train split을
+    DataFrame으로 변환한 후 필요한 컬럼(used, overlapping, text)을 추가하고 CSV로 저장합니다.
+    이미 파일이 있으면 파일에서 데이터를 읽어 DataFrame을 반환합니다.
     """
+    if not os.path.exists(DATA_FILE):
+        ds = load_dataset("gaeunseo/Taskmaster_sample_data", split="train")
+        data = ds.to_pandas()
+        # 필요한 컬럼이 없으면 추가
+        if "used" not in data.columns:
+            data["used"] = False
+        if "overlapping" not in data.columns:
+            data["overlapping"] = ""
+        if "text" not in data.columns:
+            data["text"] = ""
+        data.to_csv(DATA_FILE, index=False)
+        return data
+    else:
+        with data_lock:
+            df = pd.read_csv(DATA_FILE)
+        return df
+def load_global_data():
+    """CSV 파일에서 global_data DataFrame을 읽어옵니다."""
+    with data_lock:
+        df = pd.read_csv(DATA_FILE)
+    return df
+def save_global_data(df):
+    """DataFrame을 CSV 파일에 저장합니다."""
+    with data_lock:
+        df.to_csv(DATA_FILE, index=False)
+# CSV 파일에 저장된 global_data 초기화
+global_data = initialize_global_data()
+#############################################
+# 데이터셋에서 랜덤 대화 행 선택 함수
+#############################################
+def get_random_row_from_dataset():
     """
+    CSV 파일에 저장된 global_data에서,
+      1. conversation_id별로 그룹화하고,
+      2. 각 그룹에서 모든 행의 used 컬럼이 False이며,
+         그룹 내에 overlapping 컬럼이 "TT"인 행이 존재하는 그룹만 valid로 간주합니다.
+    valid한 그룹들 중 랜덤하게 하나의 그룹을 선택한 후,
+      - 해당 그룹의 모든 행의 used 값을 True로 업데이트하고 CSV 파일에 저장합니다.
+      - 선택된 그룹 내에서 overlapping 컬럼이 "TT"인 행(여러 개라면 첫 번째)을 dict로 반환합니다.
     """
+    global global_data
+    global_data = load_global_data()  # 최신 데이터 ���드
+    groups = global_data.groupby('conversation_id')
+    valid_groups = []
+    for cid, group in groups:
+        # 모든 행의 used가 False이고, 그룹 내에 overlapping이 "TT"인 행이 존재하는 그룹 선택
+        if group['used'].apply(lambda x: bool(x) == False).all() and (group['overlapping'] == "TT").any():
+            valid_groups.append((cid, group))
+    if not valid_groups:
+        return None
+    chosen_cid, chosen_group = random.choice(valid_groups)
+    # 선택된 그룹의 모든 행의 used 값을 True로 업데이트
+    global_data.loc[global_data['conversation_id'] == chosen_cid, 'used'] = True
+    save_global_data(global_data)
+    # 선택된 그룹 내에서 overlapping이 "TT"인 행(여러 개일 경우 첫 번째) 선택
+    chosen_rows = chosen_group[chosen_group['overlapping'] == "TT"]
+    if chosen_rows.empty:
+        return None
+    chosen_row = chosen_rows.iloc[0]
+    return chosen_row.to_dict()
+#############################################
+# 대화 HTML 생성 함수
+#############################################
+def format_conversation_html(row):
+    """
+    전달받은 row(dict)를 기반으로 대화 내용을 HTML로 포맷합니다.
+    text 컬럼은 "[turn]"을 기준으로 발화가 구분되어 있으며,
+      - 첫 번째 발화(인간)는 오른쪽 정렬과 말풍선 오른쪽의 🧑 아이콘으로 표시,
+      - 두 번째 발화(AI)는 왼쪽 정렬과 말풍선 왼쪽의 🤖 아이콘으로 표시합니다.
+    """
+    if row is None:
+        human_message = "No valid conversation available."
+        ai_message = "No valid conversation available."
+    else:
+        raw_text = row.get('text', '')
+        parts = raw_text.split("[turn]")
+        if len(parts) >= 2:
+            human_message = parts[0].strip()
+            ai_message = parts[1].strip()
         else:
+            human_message = raw_text.strip()
+            ai_message = ""
+    # 인간 말풍선 (오른쪽 정렬, 🧑 아이콘)
+    human_html = f"""
+    <div class="human-wrapper" style="display: flex; align-items: flex-end; justify-content: flex-end; gap: 5px; width: 100%;">
+        <div class="speech-bubble human" style="background: #d0f0d0; padding: 10px 15px; border-radius: 15px; max-width: 70%; text-align: right;">
+            {human_message}
+        </div>
+        <div class="emoji" style="font-size: 24px; line-height: 1;">🧑</div>
+    </div>
     """
+    # AI 말풍선 (왼쪽 정렬, 🤖 아이콘)
+    ai_html = f"""
+    <div class="ai-wrapper" style="display: flex; align-items: flex-end; justify-content: flex-start; gap: 5px; width: 100%;">
+        <div class="emoji" style="font-size: 24px; line-height: 1;">🤖</div>
+        <div class="speech-bubble ai" style="background: #e0e0e0; padding: 10px 15px; border-radius: 15px; max-width: 70%; text-align: left;">
+            {ai_message}
+        </div>
+    </div>
     """
+    conversation_html = f"""
+    <div class="chat-container" style="display: flex; flex-direction: column; gap: 10px;">
+        {human_html}
+        {ai_html}
+    </div>
+    """
+    return conversation_html
+def load_two_conversations_html():
+    """
+    get_random_row_from_dataset() 함수를 두 번 호출하여
+    Conversation A와 Conversation B 각각의 row를 가져온 후,
+    format_conversation_html()로 HTML을 생성하여 반환합니다.
+    """
+    row_A = get_random_row_from_dataset()
+    row_B = get_random_row_from_dataset()
+    conv_A_html = format_conversation_html(row_A)
+    conv_B_html = format_conversation_html(row_B)
+    return conv_A_html, conv_B_html
+#############################################
+# 평가 버튼 관련 함수
+#############################################
+# 전역 변수 statement (버튼 클릭 시 저장할 값)
+statement = ""
 def update_statement(val):
     global statement
     statement = val
     return statement
+#############################################
+# Gradio 인터페이스 구성
+#############################################
 with gr.Blocks() as demo:
+    # (A) CSS 스타일 (채팅 말풍선 관련)
+    gr.HTML(
+        """
+        <style>
+        .chat-container {
+          display: flex;
+          flex-direction: column;
+          gap: 10px;
+          width: 100%;
+        }
+        .speech-bubble {
+          position: relative;
+          padding: 10px 15px;
+          border-radius: 15px;
+          max-width: 70%;
+          font-family: sans-serif;
+          font-size: 16px;
+          line-height: 1.4;
+        }
+        .human {
+          background: #d0f0d0;
+        }
+        .ai {
+          background: #e0e0e0;
+        }
+        .emoji {
+          font-size: 24px;
+          line-height: 1;
+        }
+        </style>
+        """
+    )
+    gr.Markdown("## Conversation Comparison")
+    # 좌측: Conversation A, 우측: Conversation B
     with gr.Row():
+        conv_A = gr.HTML(label="Conversation A")
+        conv_B = gr.HTML(label="Conversation B")
+    # "Load Random Conversations" 버튼 클릭 시 두 대화를 불러옴
     load_btn = gr.Button("Load Random Conversations")
+    load_btn.click(fn=load_two_conversations_html, inputs=[], outputs=[conv_A, conv_B])
+    # 평가 버튼 영역 (하단)
     with gr.Row():
         btn_both_good = gr.Button("Both good")      # "둘 다 좋음" → "BG"
         btn_a_better   = gr.Button("A is better")     # "A가 더 좋음" → "AG"
         btn_b_better   = gr.Button("B is better")     # "B가 더 좋음" → "BG"
         btn_both_bad   = gr.Button("Both not good")   # "둘 다 별로임" → "BB"
+    # 선택된 평가값을 보여주는 텍스트박스
+    statement_output = gr.Textbox(label="Selected Statement", interactive=False)
+    # 각 버튼 클릭 시 전역 변수 statement 업데이트
     btn_both_good.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)
     btn_a_better.click(fn=lambda: update_statement("AG"), inputs=[], outputs=statement_output)
     btn_b_better.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)