Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ import matplotlib.pyplot as plt
|
|
| 7 |
import csv
|
| 8 |
import io
|
| 9 |
import matplotlib.font_manager as fm
|
|
|
|
| 10 |
|
| 11 |
# ํ๊ตญ์ด ์ฒ๋ฆฌ๋ฅผ ์ํ KoSentence-BERT ๋ชจ๋ธ ๋ก๋
|
| 12 |
model = SentenceTransformer('jhgan/ko-sbert-sts')
|
|
@@ -14,15 +15,16 @@ model = SentenceTransformer('jhgan/ko-sbert-sts')
|
|
| 14 |
# ๋๋๋ฐ๋ฅธ๊ณ ๋ ํฐํธ ์ค์ (ํ๊น
ํ์ด์ค ํ๊ฒฝ์ ๋ง๊ฒ ์์ )
|
| 15 |
plt.rc('font', family='NanumBarunGothic')
|
| 16 |
|
| 17 |
-
# ์ ์ญ
|
| 18 |
global_recommendations = None
|
| 19 |
global_csv_string = None
|
|
|
|
| 20 |
|
| 21 |
# CSV ๋ฌธ์์ด ์์ฑ ํจ์
|
| 22 |
def create_csv_string(recommendations):
|
| 23 |
output = io.StringIO()
|
| 24 |
writer = csv.writer(output)
|
| 25 |
-
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
|
| 26 |
for rec in recommendations:
|
| 27 |
writer.writerow(rec)
|
| 28 |
return output.getvalue()
|
|
@@ -71,12 +73,40 @@ def validate_and_get_columns(employee_df, program_df):
|
|
| 71 |
|
| 72 |
return None, employee_cols, program_cols
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# ์ง์ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ์ฌ ๊ต์ก ํ๋ก๊ทธ๋จ์ ์ถ์ฒํ๊ณ , ํ
์ด๋ธ๊ณผ ๊ทธ๋ํ๋ฅผ ์์ฑํ๋ ํจ์
|
| 75 |
-
def hybrid_rag(employee_file, program_file):
|
| 76 |
global global_recommendations
|
| 77 |
global global_csv_string
|
| 78 |
|
| 79 |
-
#
|
| 80 |
employee_df = pd.read_csv(employee_file.name)
|
| 81 |
program_df = pd.read_csv(program_file.name)
|
| 82 |
|
|
@@ -91,22 +121,38 @@ def hybrid_rag(employee_file, program_file):
|
|
| 91 |
|
| 92 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
recommendations = []
|
| 95 |
-
recommendation_rows = []
|
| 96 |
for i, employee in employee_df.iterrows():
|
| 97 |
recommended_programs = []
|
|
|
|
| 98 |
for j, program in program_df.iterrows():
|
| 99 |
if similarities[i][j] > 0.5:
|
| 100 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
if recommended_programs:
|
| 103 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์ ์ถ์ฒ ํ๋ก๊ทธ๋จ: {', '.join(recommended_programs)}"
|
| 104 |
-
|
|
|
|
|
|
|
| 105 |
else:
|
| 106 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์๊ฒ ์ ํฉํ ํ๋ก๊ทธ๋จ์ด ์์ต๋๋ค."
|
| 107 |
-
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
recommendations.append(recommendation)
|
| 110 |
|
| 111 |
global_recommendations = recommendation_rows
|
| 112 |
|
|
@@ -129,7 +175,7 @@ def hybrid_rag(employee_file, program_file):
|
|
| 129 |
global_csv_string = create_csv_string(recommendation_rows)
|
| 130 |
|
| 131 |
# ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
|
| 132 |
-
result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs"])
|
| 133 |
|
| 134 |
return result_df, chart_buffer, gr.File.update(visible=True)
|
| 135 |
|
|
@@ -141,7 +187,7 @@ def chat_response(message, history):
|
|
| 141 |
|
| 142 |
for employee in global_recommendations:
|
| 143 |
if employee[1].lower() in message.lower():
|
| 144 |
-
return f"{employee[1]}๋์๊ฒ ์ถ์ฒ๋ ํ๋ก๊ทธ๋จ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค: {employee[2]}"
|
| 145 |
|
| 146 |
return "์ฃ์กํฉ๋๋ค. ํด๋น ์ง์์ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ๋ค๋ฅธ ์ง์ ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์."
|
| 147 |
|
|
@@ -154,13 +200,23 @@ def download_csv():
|
|
| 154 |
|
| 155 |
# Gradio ๋ธ๋ก
|
| 156 |
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
|
| 157 |
-
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐ผ HybridRAG
|
| 158 |
|
| 159 |
with gr.Row():
|
| 160 |
with gr.Column(scale=1, min_width=300):
|
| 161 |
-
gr.Markdown("<h3 style='color: #34495e;'>1.
|
| 162 |
employee_file = gr.File(label="์ง์ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
| 163 |
program_file = gr.File(label="๊ต์ก ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
analyze_button = gr.Button("๋ถ์ ์์", elem_classes="gradio-button")
|
| 165 |
output_table = gr.DataFrame(label="๋ถ์ ๊ฒฐ๊ณผ (ํ
์ด๋ธ)")
|
| 166 |
csv_download = gr.File(label="์ถ์ฒ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋", visible=False)
|
|
@@ -169,13 +225,15 @@ with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .g
|
|
| 169 |
gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ์ ๊ฒฐ๊ณผ ๋ฐ ์๊ฐํ</h3>")
|
| 170 |
chart_output = gr.Image(label="์๊ฐํ ์ฐจํธ")
|
| 171 |
|
| 172 |
-
gr.Markdown("<h3 style='color: #34495e;'>3. ์ง์๋ณ ์ถ์ฒ ํ๋ก๊ทธ๋จ ํ์ธ</h3>")
|
| 173 |
chatbot = gr.Chatbot()
|
| 174 |
msg = gr.Textbox(label="์ง์ ์ด๋ฆ์ ์
๋ ฅํ์ธ์")
|
| 175 |
clear = gr.Button("๋ํ ๋ด์ญ ์ง์ฐ๊ธฐ")
|
| 176 |
|
| 177 |
# ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ํ
์ด๋ธ, ์ฐจํธ, ํ์ผ ๋ค์ด๋ก๋๋ฅผ ์
๋ฐ์ดํธ
|
| 178 |
-
analyze_button.click(hybrid_rag,
|
|
|
|
|
|
|
| 179 |
|
| 180 |
# CSV ๋ค์ด๋ก๋ ๋ฒํผ
|
| 181 |
csv_download.click(download_csv, inputs=[], outputs=[csv_download])
|
|
|
|
| 7 |
import csv
|
| 8 |
import io
|
| 9 |
import matplotlib.font_manager as fm
|
| 10 |
+
from datetime import datetime, timedelta
|
| 11 |
|
| 12 |
# ํ๊ตญ์ด ์ฒ๋ฆฌ๋ฅผ ์ํ KoSentence-BERT ๋ชจ๋ธ ๋ก๋
|
| 13 |
model = SentenceTransformer('jhgan/ko-sbert-sts')
|
|
|
|
| 15 |
# ๋๋๋ฐ๋ฅธ๊ณ ๋ ํฐํธ ์ค์ (ํ๊น
ํ์ด์ค ํ๊ฒฝ์ ๋ง๊ฒ ์์ )
|
| 16 |
plt.rc('font', family='NanumBarunGothic')
|
| 17 |
|
| 18 |
+
# ์ ์ญ ๋ณ์
|
| 19 |
global_recommendations = None
|
| 20 |
global_csv_string = None
|
| 21 |
+
youtube_columns = None
|
| 22 |
|
| 23 |
# CSV ๋ฌธ์์ด ์์ฑ ํจ์
|
| 24 |
def create_csv_string(recommendations):
|
| 25 |
output = io.StringIO()
|
| 26 |
writer = csv.writer(output)
|
| 27 |
+
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
|
| 28 |
for rec in recommendations:
|
| 29 |
writer.writerow(rec)
|
| 30 |
return output.getvalue()
|
|
|
|
| 73 |
|
| 74 |
return None, employee_cols, program_cols
|
| 75 |
|
| 76 |
+
# ์ ํ๋ธ ๋ฐ์ดํฐ ์ด ์ ํ ํจ์
|
| 77 |
+
def select_youtube_columns(youtube_file):
|
| 78 |
+
global youtube_columns
|
| 79 |
+
youtube_df = pd.read_csv(youtube_file.name)
|
| 80 |
+
required_youtube_cols = ["title", "description", "url", "upload_date"]
|
| 81 |
+
youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
|
| 82 |
+
|
| 83 |
+
column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
|
| 84 |
+
return gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("title")), \
|
| 85 |
+
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("description")), \
|
| 86 |
+
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("url")), \
|
| 87 |
+
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("upload_date"))
|
| 88 |
+
|
| 89 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ๋ฐ์ดํฐ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ํจ์
|
| 90 |
+
def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
|
| 91 |
+
youtube_df = pd.read_csv(file_path)
|
| 92 |
+
youtube_df = youtube_df[[title_col, description_col, url_col, upload_date_col]]
|
| 93 |
+
youtube_df.columns = ['title', 'description', 'url', 'upload_date']
|
| 94 |
+
youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'])
|
| 95 |
+
return youtube_df
|
| 96 |
+
|
| 97 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ์ ๊ต์ก ํ๋ก๊ทธ๋จ ๋งค์นญ ํจ์
|
| 98 |
+
def match_youtube_content(program_skills, youtube_df, model):
|
| 99 |
+
youtube_embeddings = model.encode(youtube_df['description'].tolist())
|
| 100 |
+
program_embeddings = model.encode(program_skills)
|
| 101 |
+
similarities = cosine_similarity(program_embeddings, youtube_embeddings)
|
| 102 |
+
return similarities
|
| 103 |
+
|
| 104 |
# ์ง์ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ์ฌ ๊ต์ก ํ๋ก๊ทธ๋จ์ ์ถ์ฒํ๊ณ , ํ
์ด๋ธ๊ณผ ๊ทธ๋ํ๋ฅผ ์์ฑํ๋ ํจ์
|
| 105 |
+
def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
|
| 106 |
global global_recommendations
|
| 107 |
global global_csv_string
|
| 108 |
|
| 109 |
+
# ์ง์ ๋ฐ ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ ๋ก๋
|
| 110 |
employee_df = pd.read_csv(employee_file.name)
|
| 111 |
program_df = pd.read_csv(program_file.name)
|
| 112 |
|
|
|
|
| 121 |
|
| 122 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
| 123 |
|
| 124 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ
|
| 125 |
+
youtube_df = load_youtube_content(youtube_file.name, title_col, description_col, url_col, upload_date_col)
|
| 126 |
+
|
| 127 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ์ ๊ต์ก ํ๋ก๊ทธ๋จ ๋งค์นญ
|
| 128 |
+
youtube_similarities = match_youtube_content(program_df[program_cols['skills_acquired']].tolist(), youtube_df, model)
|
| 129 |
+
|
| 130 |
recommendations = []
|
| 131 |
+
recommendation_rows = []
|
| 132 |
for i, employee in employee_df.iterrows():
|
| 133 |
recommended_programs = []
|
| 134 |
+
recommended_youtube = []
|
| 135 |
for j, program in program_df.iterrows():
|
| 136 |
if similarities[i][j] > 0.5:
|
| 137 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
| 138 |
+
|
| 139 |
+
# ํด๋น ํ๋ก๊ทธ๋จ๊ณผ ๊ฐ์ฅ ์ ์ฌํ ์ ํ๋ธ ์ฝํ
์ธ ์ฐพ๊ธฐ
|
| 140 |
+
top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1] # ์์ 3๊ฐ
|
| 141 |
+
for idx in top_youtube_indices:
|
| 142 |
+
recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
|
| 143 |
|
| 144 |
if recommended_programs:
|
| 145 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์ ์ถ์ฒ ํ๋ก๊ทธ๋จ: {', '.join(recommended_programs)}"
|
| 146 |
+
youtube_recommendation = f"์ถ์ฒ ์ ํ๋ธ ์ฝํ
์ธ : {', '.join(recommended_youtube)}"
|
| 147 |
+
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
|
| 148 |
+
", ".join(recommended_programs), ", ".join(recommended_youtube)])
|
| 149 |
else:
|
| 150 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์๊ฒ ์ ํฉํ ํ๋ก๊ทธ๋จ์ด ์์ต๋๋ค."
|
| 151 |
+
youtube_recommendation = "์ถ์ฒํ ์ ํ๋ธ ์ฝํ
์ธ ๊ฐ ์์ต๋๋ค."
|
| 152 |
+
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
|
| 153 |
+
"์ ํฉํ ํ๋ก๊ทธ๋จ ์์", "์ถ์ฒ ์ฝํ
์ธ ์์"])
|
| 154 |
|
| 155 |
+
recommendations.append(recommendation + "\n" + youtube_recommendation)
|
| 156 |
|
| 157 |
global_recommendations = recommendation_rows
|
| 158 |
|
|
|
|
| 175 |
global_csv_string = create_csv_string(recommendation_rows)
|
| 176 |
|
| 177 |
# ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
|
| 178 |
+
result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
|
| 179 |
|
| 180 |
return result_df, chart_buffer, gr.File.update(visible=True)
|
| 181 |
|
|
|
|
| 187 |
|
| 188 |
for employee in global_recommendations:
|
| 189 |
if employee[1].lower() in message.lower():
|
| 190 |
+
return f"{employee[1]}๋์๊ฒ ์ถ์ฒ๋ ํ๋ก๊ทธ๋จ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค: {employee[2]}\n\n์ถ์ฒ ์ ํ๋ธ ์ฝํ
์ธ : {employee[3]}"
|
| 191 |
|
| 192 |
return "์ฃ์กํฉ๋๋ค. ํด๋น ์ง์์ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ๋ค๋ฅธ ์ง์ ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์."
|
| 193 |
|
|
|
|
| 200 |
|
| 201 |
# Gradio ๋ธ๋ก
|
| 202 |
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
|
| 203 |
+
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐ผ HybridRAG ์์คํ
(์ ํ๋ธ ์ฝํ
์ธ ํฌํจ)</h1>")
|
| 204 |
|
| 205 |
with gr.Row():
|
| 206 |
with gr.Column(scale=1, min_width=300):
|
| 207 |
+
gr.Markdown("<h3 style='color: #34495e;'>1. ๋ฐ์ดํฐ๋ฅผ ์
๋ก๋ํ์ธ์</h3>")
|
| 208 |
employee_file = gr.File(label="์ง์ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
| 209 |
program_file = gr.File(label="๊ต์ก ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
| 210 |
+
youtube_file = gr.File(label="์ ํ๋ธ ์ฝํ
์ธ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
| 211 |
+
|
| 212 |
+
gr.Markdown("<h4 style='color: #34495e;'>์ ํ๋ธ ๋ฐ์ดํฐ ์ด ์ ํ</h4>")
|
| 213 |
+
title_col = gr.Dropdown(label="์ ๋ชฉ ์ด")
|
| 214 |
+
description_col = gr.Dropdown(label="์ค๋ช
์ด")
|
| 215 |
+
url_col = gr.Dropdown(label="URL ์ด")
|
| 216 |
+
upload_date_col = gr.Dropdown(label="์
๋ก๋ ๋ ์ง ์ด")
|
| 217 |
+
|
| 218 |
+
youtube_file.change(select_youtube_columns, inputs=[youtube_file], outputs=[title_col, description_col, url_col, upload_date_col])
|
| 219 |
+
|
| 220 |
analyze_button = gr.Button("๋ถ์ ์์", elem_classes="gradio-button")
|
| 221 |
output_table = gr.DataFrame(label="๋ถ์ ๊ฒฐ๊ณผ (ํ
์ด๋ธ)")
|
| 222 |
csv_download = gr.File(label="์ถ์ฒ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋", visible=False)
|
|
|
|
| 225 |
gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ์ ๊ฒฐ๊ณผ ๋ฐ ์๊ฐํ</h3>")
|
| 226 |
chart_output = gr.Image(label="์๊ฐํ ์ฐจํธ")
|
| 227 |
|
| 228 |
+
gr.Markdown("<h3 style='color: #34495e;'>3. ์ง์๋ณ ์ถ์ฒ ํ๋ก๊ทธ๋จ ๋ฐ ์ ํ๋ธ ์ฝํ
์ธ ํ์ธ</h3>")
|
| 229 |
chatbot = gr.Chatbot()
|
| 230 |
msg = gr.Textbox(label="์ง์ ์ด๋ฆ์ ์
๋ ฅํ์ธ์")
|
| 231 |
clear = gr.Button("๋ํ ๋ด์ญ ์ง์ฐ๊ธฐ")
|
| 232 |
|
| 233 |
# ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ํ
์ด๋ธ, ์ฐจํธ, ํ์ผ ๋ค์ด๋ก๋๋ฅผ ์
๋ฐ์ดํธ
|
| 234 |
+
analyze_button.click(hybrid_rag,
|
| 235 |
+
inputs=[employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col],
|
| 236 |
+
outputs=[output_table, chart_output, csv_download])
|
| 237 |
|
| 238 |
# CSV ๋ค์ด๋ก๋ ๋ฒํผ
|
| 239 |
csv_download.click(download_csv, inputs=[], outputs=[csv_download])
|