gaeunseo commited on
Commit
3db5366
ยท
verified ยท
1 Parent(s): 95e3f5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -38
app.py CHANGED
@@ -12,8 +12,8 @@ data_lock = threading.Lock()
12
 
13
  def initialize_global_data():
14
  """
15
- DATA_FILE์ด ์กด์žฌํ•˜์ง€ ์•Š์œผ๋ฉด, Dataset์„ ๋กœ๋“œํ•˜์—ฌ ํŒŒ์ผ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
16
- ์ด๋ฏธ ํŒŒ์ผ์ด ์žˆ์œผ๋ฉด ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ์ฝ์–ด ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
17
  """
18
  if not os.path.exists(DATA_FILE):
19
  ds = load_dataset("gaeunseo/Taskmaster_sample_data", split="train")
@@ -33,49 +33,51 @@ def initialize_global_data():
33
  return df
34
 
35
  def load_global_data():
 
36
  with data_lock:
37
  df = pd.read_csv(DATA_FILE)
38
  return df
39
 
40
  def save_global_data(df):
 
41
  with data_lock:
42
  df.to_csv(DATA_FILE, index=False)
43
 
 
44
  global_data = initialize_global_data()
45
 
46
  def get_random_row_from_dataset():
47
  """
48
- DATA_FILE์— ์ €์žฅ๋œ global_data์—์„œ,
49
- conversation_id๋ณ„๋กœ ๊ทธ๋ฃนํ™”ํ•œ ํ›„,
50
- - ๋ชจ๋“  ํ–‰์˜ used ์ปฌ๋Ÿผ์ด False์ธ ๊ทธ๋ฃน์ด๊ณ ,
51
- - ๊ทธ๋ฃน ๋‚ด์— overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰์ด ์กด์žฌํ•˜๋Š” ๊ทธ๋ฃน๋“ค ์ค‘์—์„œ
52
- ๋žœ๋คํ•˜๊ฒŒ ํ•˜๋‚˜์˜ ๊ทธ๋ฃน์„ ์„ ํƒํ•˜๊ณ , ํ•ด๋‹น ๊ทธ๋ฃน ๋‚ด์—์„œ overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰์„ ์„ ํƒํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
53
- ๋ฐ˜ํ™˜ ์ „์— ํ•ด๋‹น ํ–‰์˜ used ๊ฐ’์„ True๋กœ ์—…๋ฐ์ดํŠธํ•˜๊ณ  ํŒŒ์ผ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
54
  """
55
  global global_data
56
- global_data = load_global_data()
57
- # conversation_id๋ณ„ ๊ทธ๋ฃนํ™”
58
- conversation_groups = {}
59
- for row in global_data:
60
- cid = row["conversation_id"]
61
- conversation_groups.setdefault(cid, []).append(row)
62
- # ์กฐ๊ฑด์— ๋งž๋Š” ๊ทธ๋ฃน ํ•„ํ„ฐ๋ง
63
- valid_groups = [
64
- group for group in conversation_groups.values()
65
- if all(not r["used"] for r in group) and any(r["overlapping"] == "TT" for r in group)
66
- ]
67
  if not valid_groups:
68
  return None
69
- chosen_group = random.choice(valid_groups)
70
- chosen_row = None
71
- for row in chosen_group:
72
- if row["overlapping"] == "TT":
73
- row["used"] = True # ์—…๋ฐ์ดํŠธ
74
- chosen_row = row
75
- break
76
  save_global_data(global_data)
77
- return chosen_row
 
 
 
 
 
78
 
 
 
79
  row = get_random_row_from_dataset()
80
  if row is None:
81
  human_message = "No valid conversation available."
@@ -86,7 +88,7 @@ else:
86
  ai_message = raw_text.split("[turn]")[1].strip()
87
 
88
  #############################################
89
- # ์ฑ„ํŒ… ์ธํ„ฐํŽ˜์ด์Šค ๊ด€๋ จ ํ•จ์ˆ˜
90
  #############################################
91
 
92
  def get_initial_human_html():
@@ -104,7 +106,7 @@ def get_initial_human_html():
104
 
105
  def stream_human_message():
106
  """
107
- Start Typing ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ, ์ „์—ญ ๋ณ€์ˆ˜ human_message์˜ ๋‚ด์šฉ์„ ํ•œ ๊ธ€์ž์”ฉ ํƒ€์ดํ•‘ ํšจ๊ณผ๋กœ ์ถœ๋ ฅ.
108
  ์ด์ „ ์ƒํƒœ(โœ‚๏ธ ์•„์ด์ฝ˜, ํšŒ์ƒ‰ ์ฒ˜๋ฆฌ ๋“ฑ)๋Š” ๋ฆฌ์…‹๋ฉ๋‹ˆ๋‹ค.
109
  """
110
  bubble_content = ""
@@ -119,7 +121,7 @@ def stream_human_message():
119
  # ์ดˆ๊ธฐ ์ƒํƒœ: ๋นˆ ๋งํ’์„ ๊ณผ ์ด๋ชจํ‹ฐ์ฝ˜
120
  yield wrapper_start + bubble_start + bubble_end + emoji_html + wrapper_end
121
 
122
- # ํ•œ ๊ธ€์ž์”ฉ ์ถ”๊ฐ€ (ํƒ€์ดํ•‘ ํšจ๊ณผ)
123
  for i, ch in enumerate(human_message):
124
  bubble_content += f"<span data-index='{i}'>{ch}</span>"
125
  current_html = wrapper_start + bubble_start + bubble_content + bubble_end + emoji_html + wrapper_end
@@ -131,21 +133,21 @@ def submit_edit(edited_text):
131
  Submit ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ˜ธ์ถœ๋˜๋Š” ํ•จ์ˆ˜.
132
  1. ํŽธ์ง‘๋œ human ๋ฉ”์‹œ์ง€(โœ‚๏ธ ์•ž๋ถ€๋ถ„)๋ฅผ ์ƒˆ ํ–‰์œผ๋กœ global_data์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
133
  2. get_random_row_from_dataset()์„ ํ†ตํ•ด ์ƒˆ๋กœ์šด ๋Œ€ํ™”๋ฅผ ๊ฐ€์ ธ์˜ค๊ณ , ์ „์—ญ ๋ณ€์ˆ˜ human_message์™€ ai_message๋ฅผ ์—…๋ฐ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.
134
- 3. ์ดˆ๊ธฐ ์ƒํƒœ์˜ human ๋งํ’์„ ์™€ ai ๋งํ’์„  HTML์„ ๋ฐ˜ํ™˜ํ•˜์—ฌ ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ๋ฆฌ์…‹ํ•ฉ๋‹ˆ๋‹ค.
135
  """
136
- global global_data, human_message, ai_message
137
- # ์ƒˆ ํ–‰ ์ƒ์„ฑ (์ƒˆ conversation_id๋Š” ์ž„์˜๋กœ ์ƒ์„ฑ)
138
  new_row = {
139
- "conversation_id": "edited_" + str(random.randint(1000,9999)),
140
  "used": False,
141
  "overlapping": "",
142
  "text": edited_text,
143
  "human_message": edited_text,
144
  "ai_message": ""
145
  }
146
- global_data = load_global_data()
147
- global_data.append(new_row)
148
- save_global_data(global_data)
149
 
150
  new_row_data = get_random_row_from_dataset()
151
  if new_row_data is None:
@@ -170,7 +172,8 @@ def submit_edit(edited_text):
170
  #############################################
171
 
172
  with gr.Blocks() as demo:
173
- # (A) ํ…์ŠคํŠธ ํด๋ฆญ ์‹œ โœ‚๏ธ ์•„์ด์ฝ˜ ์ถ”๊ฐ€ ๋ฐ ํšŒ์ƒ‰ ์ฒ˜๋ฆฌ, ๊ทธ๋ฆฌ๊ณ  ๋ฐ”๋กœ hidden input ์—…๋ฐ์ดํŠธ
 
174
  gr.HTML(
175
  """
176
  <script>
@@ -275,7 +278,9 @@ with gr.Blocks() as demo:
275
  gr.Markdown("## Chat Interface")
276
 
277
  with gr.Column(elem_classes="chat-container"):
 
278
  human_bubble = gr.HTML(get_initial_human_html())
 
279
  ai_html = f"""
280
  <div class="ai-wrapper" style="display: flex; align-items: flex-end; justify-content: flex-start; gap: 5px; width: 100%;">
281
  <div class="emoji">๐Ÿค–</div>
@@ -287,10 +292,12 @@ with gr.Blocks() as demo:
287
  # ์ˆจ๊น€ ํ…์ŠคํŠธ๋ฐ•์Šค (ํŽธ์ง‘๋œ ํ…์ŠคํŠธ ์ €์žฅ์šฉ)
288
  edited_text_input = gr.Textbox(visible=False, elem_id="edited_text_input")
289
 
 
290
  with gr.Row():
291
  start_button = gr.Button("Start Typing")
292
  submit_button = gr.Button("Submit", elem_id="submit_btn")
293
 
 
294
  start_button.click(fn=stream_human_message, outputs=human_bubble)
295
  submit_button.click(fn=submit_edit, inputs=edited_text_input, outputs=[human_bubble, ai_bubble])
296
 
 
12
 
13
  def initialize_global_data():
14
  """
15
+ DATA_FILE์ด ์กด์žฌํ•˜์ง€ ์•Š์œผ๋ฉด, gaeunseo/Taskmaster_sample_data ๋ฐ์ดํ„ฐ์…‹์„ ๋กœ๋“œํ•˜์—ฌ DataFrame์œผ๋กœ ๋ณ€ํ™˜ํ•œ ํ›„ CSV ํŒŒ์ผ๋กœ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
16
+ ์ด๋ฏธ ํŒŒ์ผ์ด ์žˆ์œผ๋ฉด ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ์ฝ์–ด DataFrame์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
17
  """
18
  if not os.path.exists(DATA_FILE):
19
  ds = load_dataset("gaeunseo/Taskmaster_sample_data", split="train")
 
33
  return df
34
 
35
  def load_global_data():
36
+ """CSV ํŒŒ์ผ์—์„œ global_data DataFrame์„ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค."""
37
  with data_lock:
38
  df = pd.read_csv(DATA_FILE)
39
  return df
40
 
41
  def save_global_data(df):
42
+ """DataFrame์„ CSV ํŒŒ์ผ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค."""
43
  with data_lock:
44
  df.to_csv(DATA_FILE, index=False)
45
 
46
+ # CSV ํŒŒ์ผ์— ์ €์žฅ๋œ global_data ์ดˆ๊ธฐํ™”
47
  global_data = initialize_global_data()
48
 
49
  def get_random_row_from_dataset():
50
  """
51
+ CSV ํŒŒ์ผ์— ์ €์žฅ๋œ global_data์—์„œ,
52
+ 1. conversation_id๋ณ„๋กœ ๊ทธ๋ฃนํ™”ํ•˜๊ณ ,
53
+ 2. ๊ฐ ๊ทธ๋ฃน์—์„œ ๋ชจ๋“  ํ–‰์˜ used ์ปฌ๋Ÿผ์ด False์ด๋ฉฐ, ๊ทธ๋ฃน ๋‚ด์— overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰์ด ์กด์žฌํ•˜๋Š” ๊ทธ๋ฃน๋งŒ valid๋กœ ๊ฐ„์ฃผํ•ฉ๋‹ˆ๋‹ค.
54
+ validํ•œ ๊ทธ๋ฃน๋“ค ์ค‘ ๋žœ๋คํ•˜๊ฒŒ ํ•˜๋‚˜์˜ ๊ทธ๋ฃน์„ ์„ ํƒํ•œ ํ›„,
55
+ - ํ•ด๋‹น ๊ทธ๋ฃน์˜ ๋ชจ๋“  ํ–‰์˜ used ๊ฐ’์„ True๋กœ ์—…๋ฐ์ดํŠธ(์ฆ‰, ์ „์ฒด ๊ทธ๋ฃน์„ ํ• ๋‹น)ํ•˜๊ณ  CSV ํŒŒ์ผ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
56
+ - ์„ ํƒ๋œ ๊ทธ๋ฃน ๋‚ด์—์„œ overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰(์—ฌ๋Ÿฌ ๊ฐœ๋ผ๋ฉด ์ฒซ ๋ฒˆ์งธ)์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
57
  """
58
  global global_data
59
+ global_data = load_global_data() # ์ตœ์‹  ๋ฐ์ดํ„ฐ ๋กœ๋“œ
60
+ groups = global_data.groupby('conversation_id')
61
+ valid_groups = []
62
+ for cid, group in groups:
63
+ # ๋ชจ๋“  ํ–‰์˜ used ๊ฐ’์ด False์ด๊ณ , ๊ทธ๋ฃน ๋‚ด์— overlapping ๊ฐ’์ด "TT"์ธ ํ–‰์ด ์žˆ๋Š” ๊ทธ๋ฃน ํ•„ํ„ฐ๋ง
64
+ if group['used'].apply(lambda x: bool(x) == False).all() and (group['overlapping'] == "TT").any():
65
+ valid_groups.append((cid, group))
 
 
 
 
66
  if not valid_groups:
67
  return None
68
+ chosen_cid, chosen_group = random.choice(valid_groups)
69
+ # ์ „์ฒด ๊ทธ๋ฃน์˜ used ๊ฐ’์„ True๋กœ ์—…๋ฐ์ดํŠธ
70
+ global_data.loc[global_data['conversation_id'] == chosen_cid, 'used'] = True
 
 
 
 
71
  save_global_data(global_data)
72
+ # ์„ ํƒ๋œ ๊ทธ๋ฃน ๋‚ด์—์„œ overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰์„ ๋ฐ˜ํ™˜ (์—ฌ๋Ÿฌ ๊ฐœ๋ผ๋ฉด ์ฒซ ๋ฒˆ์งธ)
73
+ chosen_rows = chosen_group[chosen_group['overlapping'] == "TT"]
74
+ if chosen_rows.empty:
75
+ return None
76
+ chosen_row = chosen_rows.iloc[0]
77
+ return chosen_row.to_dict()
78
 
79
+ # --- ์ดˆ๊ธฐ ๋Œ€ํ™” ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ---
80
+ # ๋ฐ์ดํ„ฐ์…‹์˜ text ์ปฌ๋Ÿผ์€ "[turn]"์„ ๊ธฐ์ค€์œผ๋กœ ๋Œ€ํ™”๊ฐ€ ๊ตฌ๋ถ„๋˜์–ด ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
81
  row = get_random_row_from_dataset()
82
  if row is None:
83
  human_message = "No valid conversation available."
 
88
  ai_message = raw_text.split("[turn]")[1].strip()
89
 
90
  #############################################
91
+ # ์ฑ„ํŒ… ์ธํ„ฐํŽ˜์ด์Šค ๊ด€๋ จ ํ•จ์ˆ˜ (๋งํ’์„ , ํƒ€์ดํ•‘ ํšจ๊ณผ, ํŽธ์ง‘ ๊ธฐ๋Šฅ)
92
  #############################################
93
 
94
  def get_initial_human_html():
 
106
 
107
  def stream_human_message():
108
  """
109
+ Start Typing ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ, ์ „์—ญ ๋ณ€์ˆ˜ human_message์˜ ๋‚ด์šฉ์„ ํ•œ ๊ธ€์ž์”ฉ ํƒ€์ดํ•‘ ํšจ๊ณผ๋กœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.
110
  ์ด์ „ ์ƒํƒœ(โœ‚๏ธ ์•„์ด์ฝ˜, ํšŒ์ƒ‰ ์ฒ˜๋ฆฌ ๋“ฑ)๋Š” ๋ฆฌ์…‹๋ฉ๋‹ˆ๋‹ค.
111
  """
112
  bubble_content = ""
 
121
  # ์ดˆ๊ธฐ ์ƒํƒœ: ๋นˆ ๋งํ’์„ ๊ณผ ์ด๋ชจํ‹ฐ์ฝ˜
122
  yield wrapper_start + bubble_start + bubble_end + emoji_html + wrapper_end
123
 
124
+ # human_message๋ฅผ ํ•œ ๊ธ€์ž์”ฉ ์ถ”๊ฐ€ (ํƒ€์ดํ•‘ ํšจ๊ณผ)
125
  for i, ch in enumerate(human_message):
126
  bubble_content += f"<span data-index='{i}'>{ch}</span>"
127
  current_html = wrapper_start + bubble_start + bubble_content + bubble_end + emoji_html + wrapper_end
 
133
  Submit ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ˜ธ์ถœ๋˜๋Š” ํ•จ์ˆ˜.
134
  1. ํŽธ์ง‘๋œ human ๋ฉ”์‹œ์ง€(โœ‚๏ธ ์•ž๋ถ€๋ถ„)๋ฅผ ์ƒˆ ํ–‰์œผ๋กœ global_data์— ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
135
  2. get_random_row_from_dataset()์„ ํ†ตํ•ด ์ƒˆ๋กœ์šด ๋Œ€ํ™”๋ฅผ ๊ฐ€์ ธ์˜ค๊ณ , ์ „์—ญ ๋ณ€์ˆ˜ human_message์™€ ai_message๋ฅผ ์—…๋ฐ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.
136
+ 3. ์ดˆ๊ธฐ ์ƒํƒœ์˜ human ๋งํ’์„ ๊ณผ ai ๋งํ’์„  HTML์„ ๋ฐ˜ํ™˜ํ•˜์—ฌ ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ๋ฆฌ์…‹ํ•ฉ๋‹ˆ๋‹ค.
137
  """
138
+ global human_message, ai_message
139
+ data = load_global_data()
140
  new_row = {
141
+ "conversation_id": "edited_" + str(random.randint(1000, 9999)),
142
  "used": False,
143
  "overlapping": "",
144
  "text": edited_text,
145
  "human_message": edited_text,
146
  "ai_message": ""
147
  }
148
+ new_df = pd.DataFrame([new_row])
149
+ data = pd.concat([data, new_df], ignore_index=True)
150
+ save_global_data(data)
151
 
152
  new_row_data = get_random_row_from_dataset()
153
  if new_row_data is None:
 
172
  #############################################
173
 
174
  with gr.Blocks() as demo:
175
+ # (A) ํŽ˜์ด์ง€ ์ƒ๋‹จ ์Šคํฌ๋ฆฝํŠธ: Human ๋งํ’์„  ๋‚ด์˜ ๊ฐ <span data-index="...">๋ฅผ ํด๋ฆญํ•˜๋ฉด,
176
+ # ํ•ด๋‹น ์œ„์น˜์— โœ‚๏ธ ์•„์ด์ฝ˜์ด ์‚ฝ์ž…๋˜๊ณ , ๊ทธ ์ดํ›„ ํ…์ŠคํŠธ๊ฐ€ ํšŒ์ƒ‰์œผ๋กœ ๋ณ€๊ฒฝ๋ฉ๋‹ˆ๋‹ค.
177
  gr.HTML(
178
  """
179
  <script>
 
278
  gr.Markdown("## Chat Interface")
279
 
280
  with gr.Column(elem_classes="chat-container"):
281
+ # Human ๋งํ’์„  (์ดˆ๊ธฐ: ๋นˆ ๋ฉ”์‹œ์ง€ + ๐Ÿง‘ ์ด๋ชจํ‹ฐ์ฝ˜)
282
  human_bubble = gr.HTML(get_initial_human_html())
283
+ # AI ๋งํ’์„  (์™ผ์ชฝ: ๐Ÿค– ์ด๋ชจํ‹ฐ์ฝ˜ + ๋ฉ”์‹œ์ง€)
284
  ai_html = f"""
285
  <div class="ai-wrapper" style="display: flex; align-items: flex-end; justify-content: flex-start; gap: 5px; width: 100%;">
286
  <div class="emoji">๐Ÿค–</div>
 
292
  # ์ˆจ๊น€ ํ…์ŠคํŠธ๋ฐ•์Šค (ํŽธ์ง‘๋œ ํ…์ŠคํŠธ ์ €์žฅ์šฉ)
293
  edited_text_input = gr.Textbox(visible=False, elem_id="edited_text_input")
294
 
295
+ # ๋ฒ„ํŠผ ์˜์—ญ: Start Typing๊ณผ Submit ๋ฒ„ํŠผ์„ ๊ฐ™์€ ํ–‰์— ๋ฐฐ์น˜
296
  with gr.Row():
297
  start_button = gr.Button("Start Typing")
298
  submit_button = gr.Button("Submit", elem_id="submit_btn")
299
 
300
+ # ๋ฒ„ํŠผ ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
301
  start_button.click(fn=stream_human_message, outputs=human_bubble)
302
  submit_button.click(fn=submit_edit, inputs=edited_text_input, outputs=[human_bubble, ai_bubble])
303