gaeunseo commited on
Commit
a414f84
ยท
verified ยท
1 Parent(s): f07adf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -26
app.py CHANGED
@@ -1,27 +1,21 @@
1
  import time
2
  import gradio as gr
3
  import random
 
4
  import threading
5
  import pandas as pd
6
- from datasets import load_dataset, Dataset
7
 
8
- # Huggingface Datasets repository ID (๋ณธ์ธ์˜ ์‚ฌ์šฉ์ž๋ช…๊ณผ ์ €์žฅ์†Œ๋ช…์„ ์ž…๋ ฅํ•˜์„ธ์š”)
9
- HF_DATASET_REPO = "gaeunseo/Interface1"
10
  data_lock = threading.Lock()
11
 
12
  def initialize_global_data():
13
  """
14
- Huggingface Datasets repository์— global_data๊ฐ€ ์กด์žฌํ•˜๋ฉด ์ด๋ฅผ ๋ถˆ๋Ÿฌ์™€ DataFrame์œผ๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
15
- ๋งŒ์•ฝ ์กด์žฌํ•˜์ง€ ์•Š์œผ๋ฉด, gaeunseo/Taskmaster_sample_data ๋ฐ์ดํ„ฐ์…‹์„ ๋กœ๋“œํ•˜์—ฌ DataFrame์œผ๋กœ ๋ณ€ํ™˜ํ•œ ํ›„,
16
- ํ•„์š”ํ•œ ์ปฌ๋Ÿผ(used, overlapping, text)์ด ์—†์œผ๋ฉด ์ถ”๊ฐ€ํ•˜๊ณ , repository์— ์—…๋กœ๋“œํ•œ ํ›„ DataFrame์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
17
  """
18
- try:
19
- # repository์— ์ €์žฅ๋œ ๋ฐ์ดํ„ฐ์…‹ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
20
- ds = load_dataset(HF_DATASET_REPO, split="train")
21
- data = ds.to_pandas()
22
- return data
23
- except Exception as e:
24
- # repository์— ๋ฐ์ดํ„ฐ์…‹์ด ์—†๊ฑฐ๋‚˜ ๋ถˆ๋Ÿฌ์˜ค์ง€ ๋ชปํ•˜๋ฉด ์ดˆ๊ธฐ ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ ์ƒ์„ฑ
25
  ds = load_dataset("gaeunseo/Taskmaster_sample_data", split="train")
26
  data = ds.to_pandas()
27
  # ํ•„์š”ํ•œ ์ปฌ๋Ÿผ์ด ์—†์œผ๋ฉด ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
@@ -31,34 +25,34 @@ def initialize_global_data():
31
  data["overlapping"] = ""
32
  if "text" not in data.columns:
33
  data["text"] = ""
34
- # Huggingface Datasets repository์— ์—…๋กœ๋“œ
35
- dataset_to_push = Dataset.from_pandas(data)
36
- dataset_to_push.push_to_hub(HF_DATASET_REPO, commit_message="Initialize global_data")
37
  return data
 
 
 
 
38
 
39
  def load_global_data():
40
- """Huggingface Datasets repository์—์„œ global_data DataFrame์„ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค."""
41
  with data_lock:
42
- ds = load_dataset(HF_DATASET_REPO, split="train")
43
- df = ds.to_pandas()
44
  return df
45
 
46
  def save_global_data(df):
47
- """DataFrame์„ Huggingface Datasets repository์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค."""
48
  with data_lock:
49
- dataset_to_push = Dataset.from_pandas(df)
50
- dataset_to_push.push_to_hub(HF_DATASET_REPO, commit_message="Update global_data")
51
 
52
- # Huggingface Datasets repository์— ์ €์žฅ๋œ global_data ์ดˆ๊ธฐํ™”
53
  global_data = initialize_global_data()
54
 
55
  def get_random_row_from_dataset():
56
  """
57
- Huggingface Datasets repository์— ์ €์žฅ๋œ global_data์—์„œ,
58
  1. conversation_id๋ณ„๋กœ ๊ทธ๋ฃนํ™”ํ•˜๊ณ ,
59
  2. ๊ฐ ๊ทธ๋ฃน์—์„œ ๋ชจ๋“  ํ–‰์˜ used ์ปฌ๋Ÿผ์ด False์ด๋ฉฐ, ๊ทธ๋ฃน ๋‚ด์— overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰์ด ์กด์žฌํ•˜๋Š” ๊ทธ๋ฃน๋งŒ valid๋กœ ๊ฐ„์ฃผํ•ฉ๋‹ˆ๋‹ค.
60
  validํ•œ ๊ทธ๋ฃน๋“ค ์ค‘ ๋žœ๋คํ•˜๊ฒŒ ํ•˜๋‚˜์˜ ๊ทธ๋ฃน์„ ์„ ํƒํ•œ ํ›„,
61
- - ํ•ด๋‹น ๊ทธ๋ฃน์˜ ๋ชจ๋“  ํ–‰์˜ used ๊ฐ’์„ True๋กœ ์—…๋ฐ์ดํŠธ(์ฆ‰, ์ „์ฒด ๊ทธ๋ฃน์„ ํ• ๋‹น)ํ•˜๊ณ  repository์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
62
  - ์„ ํƒ๋œ ๊ทธ๋ฃน ๋‚ด์—์„œ overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰(์—ฌ๋Ÿฌ ๊ฐœ๋ผ๋ฉด ์ฒซ ๋ฒˆ์งธ)์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
63
  """
64
  global global_data
@@ -307,4 +301,4 @@ with gr.Blocks() as demo:
307
  start_button.click(fn=stream_human_message, outputs=human_bubble)
308
  submit_button.click(fn=submit_edit, inputs=edited_text_input, outputs=[human_bubble, ai_bubble])
309
 
310
- demo.launch()
 
1
  import time
2
  import gradio as gr
3
  import random
4
+ import os
5
  import threading
6
  import pandas as pd
7
+ from datasets import load_dataset
8
 
9
+ # CSV ํŒŒ์ผ ๊ฒฝ๋กœ์™€ ๋™์‹œ ์ ‘๊ทผ์„ ์œ„ํ•œ Lock ์„ ์–ธ
10
+ DATA_FILE = "global_data.csv"
11
  data_lock = threading.Lock()
12
 
13
  def initialize_global_data():
14
  """
15
+ DATA_FILE์ด ์กด์žฌํ•˜์ง€ ์•Š์œผ๋ฉด, gaeunseo/Taskmaster_sample_data ๋ฐ์ดํ„ฐ์…‹์„ ๋กœ๋“œํ•˜์—ฌ DataFrame์œผ๋กœ ๋ณ€ํ™˜ํ•œ ํ›„ CSV ํŒŒ์ผ๋กœ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
16
+ ์ด๋ฏธ ํŒŒ์ผ์ด ์žˆ์œผ๋ฉด ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ์ฝ์–ด DataFrame์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
 
17
  """
18
+ if not os.path.exists(DATA_FILE):
 
 
 
 
 
 
19
  ds = load_dataset("gaeunseo/Taskmaster_sample_data", split="train")
20
  data = ds.to_pandas()
21
  # ํ•„์š”ํ•œ ์ปฌ๋Ÿผ์ด ์—†์œผ๋ฉด ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
 
25
  data["overlapping"] = ""
26
  if "text" not in data.columns:
27
  data["text"] = ""
28
+ data.to_csv(DATA_FILE, index=False)
 
 
29
  return data
30
+ else:
31
+ with data_lock:
32
+ df = pd.read_csv(DATA_FILE)
33
+ return df
34
 
35
  def load_global_data():
36
+ """CSV ํŒŒ์ผ์—์„œ global_data DataFrame์„ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค."""
37
  with data_lock:
38
+ df = pd.read_csv(DATA_FILE)
 
39
  return df
40
 
41
  def save_global_data(df):
42
+ """DataFrame์„ CSV ํŒŒ์ผ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค."""
43
  with data_lock:
44
+ df.to_csv(DATA_FILE, index=False)
 
45
 
46
+ # CSV ํŒŒ์ผ์— ์ €์žฅ๋œ global_data ์ดˆ๊ธฐํ™”
47
  global_data = initialize_global_data()
48
 
49
  def get_random_row_from_dataset():
50
  """
51
+ CSV ํŒŒ์ผ์— ์ €์žฅ๋œ global_data์—์„œ,
52
  1. conversation_id๋ณ„๋กœ ๊ทธ๋ฃนํ™”ํ•˜๊ณ ,
53
  2. ๊ฐ ๊ทธ๋ฃน์—์„œ ๋ชจ๋“  ํ–‰์˜ used ์ปฌ๋Ÿผ์ด False์ด๋ฉฐ, ๊ทธ๋ฃน ๋‚ด์— overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰์ด ์กด์žฌํ•˜๋Š” ๊ทธ๋ฃน๋งŒ valid๋กœ ๊ฐ„์ฃผํ•ฉ๋‹ˆ๋‹ค.
54
  validํ•œ ๊ทธ๋ฃน๋“ค ์ค‘ ๋žœ๋คํ•˜๊ฒŒ ํ•˜๋‚˜์˜ ๊ทธ๋ฃน์„ ์„ ํƒํ•œ ํ›„,
55
+ - ํ•ด๋‹น ๊ทธ๋ฃน์˜ ๋ชจ๋“  ํ–‰์˜ used ๊ฐ’์„ True๋กœ ์—…๋ฐ์ดํŠธ(์ฆ‰, ์ „์ฒด ๊ทธ๋ฃน์„ ํ• ๋‹น)ํ•˜๊ณ  CSV ํŒŒ์ผ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
56
  - ์„ ํƒ๋œ ๊ทธ๋ฃน ๋‚ด์—์„œ overlapping ์ปฌ๋Ÿผ์ด "TT"์ธ ํ–‰(์—ฌ๋Ÿฌ ๊ฐœ๋ผ๋ฉด ์ฒซ ๋ฒˆ์งธ)์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
57
  """
58
  global global_data
 
301
  start_button.click(fn=stream_human_message, outputs=human_bubble)
302
  submit_button.click(fn=submit_edit, inputs=edited_text_input, outputs=[human_bubble, ai_bubble])
303
 
304
+ demo.launch()