Spaces:

gaeunseo
/

Interface2

Sleeping

App Files Files Community

Interface2 / app.py

gaeunseo

Update app.py

7b078cc verified 9 months ago

raw

history blame contribute delete

8.72 kB

	import os
	import threading
	import random
	import pandas as pd
	from datasets import load_dataset
	import gradio as gr

	#############################################
	# CSV 파일 관련 함수 및 전역 데이터 초기화
	#############################################

	DATA_FILE = "Interface1.csv"
	data_lock = threading.Lock()

	def initialize_global_data():
	"""
	CSV 파일(DATA_FILE)이 없으면, Hugging Face 데이터셋(gaeunseo/Taskmaster_sample_data)의 train split을
	DataFrame으로 변환한 후 필요한 컬럼(used, overlapping, text)을 추가하고 CSV로 저장합니다.
	이미 파일이 있으면 파일에서 데이터를 읽어 DataFrame을 반환합니다.
	"""
	if not os.path.exists(DATA_FILE):
	ds = load_dataset("gaeunseo/Taskmaster_sample_data", split="train")
	data = ds.to_pandas()
	# 필요한 컬럼이 없으면 추가
	if "used" not in data.columns:
	data["used"] = False
	if "overlapping" not in data.columns:
	data["overlapping"] = ""
	if "text" not in data.columns:
	data["text"] = ""
	data.to_csv(DATA_FILE, index=False)
	return data
	else:
	with data_lock:
	df = pd.read_csv(DATA_FILE)
	return df

	def load_global_data():
	"""CSV 파일에서 global_data DataFrame을 읽어옵니다."""
	with data_lock:
	df = pd.read_csv(DATA_FILE)
	return df

	def save_global_data(df):
	"""DataFrame을 CSV 파일에 저장합니다."""
	with data_lock:
	df.to_csv(DATA_FILE, index=False)

	# CSV 파일에 저장된 global_data 초기화
	global_data = initialize_global_data()

	#############################################
	# 데이터셋에서 랜덤 대화 행 선택 함수
	#############################################

	def get_random_row_from_dataset():
	"""
	CSV 파일에 저장된 global_data에서,
	1. conversation_id별로 그룹화하고,
	2. 각 그룹에서 모든 행의 used 컬럼이 False이며,
	그룹 내에 overlapping 컬럼이 "TT"인 행이 존재하는 그룹만 valid로 간주합니다.
	valid한 그룹들 중 랜덤하게 하나의 그룹을 선택한 후,
	- 해당 그룹의 모든 행의 used 값을 True로 업데이트하고 CSV 파일에 저장합니다.
	- 선택된 그룹 내에서 overlapping 컬럼이 "TT"인 행(여러 개라면 첫 번째)을 dict로 반환합니다.
	"""
	global global_data
	global_data = load_global_data() # 최신 데이터 로드
	groups = global_data.groupby('conversation_id')
	valid_groups = []
	for cid, group in groups:
	# 모든 행의 used가 False이고, 그룹 내에 overlapping이 "TT"인 행이 존재하는 그룹 선택
	if group['used'].apply(lambda x: bool(x) == False).all() and (group['overlapping'] == "TT").any():
	valid_groups.append((cid, group))
	if not valid_groups:
	return None
	chosen_cid, chosen_group = random.choice(valid_groups)
	# 선택된 그룹의 모든 행의 used 값을 True로 업데이트
	global_data.loc[global_data['conversation_id'] == chosen_cid, 'used'] = True
	save_global_data(global_data)
	# 선택된 그룹 내에서 overlapping이 "TT"인 행(여러 개일 경우 첫 번째) 선택
	chosen_rows = chosen_group[chosen_group['overlapping'] == "TT"]
	if chosen_rows.empty:
	return None
	chosen_row = chosen_rows.iloc[0]
	return chosen_row.to_dict()

	#############################################
	# 대화 HTML 생성 함수
	#############################################

	def format_conversation_html(row):
	"""
	전달받은 row(dict)를 기반으로 대화 내용을 HTML로 포맷합니다.
	text 컬럼은 "[turn]"을 기준으로 발화가 구분되어 있으며,
	- 첫 번째 발화(인간)는 오른쪽 정렬과 말풍선 오른쪽의 🧑 아이콘으로 표시,
	- 두 번째 발화(AI)는 왼쪽 정렬과 말풍선 왼쪽의 🤖 아이콘으로 표시합니다.
	"""
	if row is None:
	human_message = "No valid conversation available."
	ai_message = "No valid conversation available."
	else:
	raw_text = row.get('text', '')
	parts = raw_text.split("[turn]")
	if len(parts) >= 2:
	human_message = parts[0].strip()
	ai_message = parts[1].strip()
	else:
	human_message = raw_text.strip()
	ai_message = ""

	# 인간 말풍선 (오른쪽 정렬, 🧑 아이콘)
	human_html = f"""
	<div class="human-wrapper" style="display: flex; align-items: flex-end; justify-content: flex-end; gap: 5px; width: 100%;">
	<div class="speech-bubble human" style="background: #d0f0d0; padding: 10px 15px; border-radius: 15px; max-width: 70%; text-align: right;">
	{human_message}
	</div>
	<div class="emoji" style="font-size: 24px; line-height: 1;">🧑</div>
	</div>
	"""
	# AI 말풍선 (왼쪽 정렬, 🤖 아이콘)
	ai_html = f"""
	<div class="ai-wrapper" style="display: flex; align-items: flex-end; justify-content: flex-start; gap: 5px; width: 100%;">
	<div class="emoji" style="font-size: 24px; line-height: 1;">🤖</div>
	<div class="speech-bubble ai" style="background: #e0e0e0; padding: 10px 15px; border-radius: 15px; max-width: 70%; text-align: left;">
	{ai_message}
	</div>
	</div>
	"""
	conversation_html = f"""
	<div class="chat-container" style="display: flex; flex-direction: column; gap: 10px;">
	{human_html}
	{ai_html}
	</div>
	"""
	return conversation_html

	def load_two_conversations_html():
	"""
	get_random_row_from_dataset() 함수를 두 번 호출하여
	Conversation A와 Conversation B 각각의 row를 가져온 후,
	format_conversation_html()로 HTML을 생성하여 반환합니다.
	"""
	row_A = get_random_row_from_dataset()
	row_B = get_random_row_from_dataset()
	conv_A_html = format_conversation_html(row_A)
	conv_B_html = format_conversation_html(row_B)
	return conv_A_html, conv_B_html

	#############################################
	# 평가 버튼 관련 함수
	#############################################

	# 전역 변수 statement (버튼 클릭 시 저장할 값)
	statement = ""
	def update_statement(val):
	global statement
	statement = val
	return statement

	#############################################
	# Gradio 인터페이스 구성
	#############################################

	with gr.Blocks() as demo:
	# (A) CSS 스타일 (채팅 말풍선 관련)
	gr.HTML(
	"""
	<style>
	.chat-container {
	display: flex;
	flex-direction: column;
	gap: 10px;
	width: 100%;
	}
	.speech-bubble {
	position: relative;
	padding: 10px 15px;
	border-radius: 15px;
	max-width: 70%;
	font-family: sans-serif;
	font-size: 16px;
	line-height: 1.4;
	}
	.human {
	background: #d0f0d0;
	}
	.ai {
	background: #e0e0e0;
	}
	.emoji {
	font-size: 24px;
	line-height: 1;
	}
	</style>
	"""
	)

	gr.Markdown("## Conversation Comparison")

	# 좌측: Conversation A, 우측: Conversation B
	with gr.Row():
	conv_A = gr.HTML(label="Conversation A")
	conv_B = gr.HTML(label="Conversation B")

	# "Load Random Conversations" 버튼 클릭 시 두 대화를 불러옴
	load_btn = gr.Button("Load Random Conversations")
	load_btn.click(fn=load_two_conversations_html, inputs=[], outputs=[conv_A, conv_B])

	# 평가 버튼 영역 (하단)
	with gr.Row():
	btn_both_good = gr.Button("Both good") # "둘 다 좋음" → "BG"
	btn_a_better = gr.Button("A is better") # "A가 더 좋음" → "AG"
	btn_b_better = gr.Button("B is better") # "B가 더 좋음" → "BG"
	btn_both_bad = gr.Button("Both not good") # "둘 다 별로임" → "BB"

	# 선택된 평가값을 보여주는 텍스트박스
	statement_output = gr.Textbox(label="Selected Statement", interactive=False)

	# 각 버튼 클릭 시 전역 변수 statement 업데이트
	btn_both_good.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)
	btn_a_better.click(fn=lambda: update_statement("AG"), inputs=[], outputs=statement_output)
	btn_b_better.click(fn=lambda: update_statement("BG"), inputs=[], outputs=statement_output)
	btn_both_bad.click(fn=lambda: update_statement("BB"), inputs=[], outputs=statement_output)

	demo.launch()