Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,22 +2,12 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
from sentence_transformers import SentenceTransformer
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
-
import networkx as nx
|
| 6 |
-
import matplotlib.pyplot as plt
|
| 7 |
import csv
|
| 8 |
import io
|
| 9 |
-
import matplotlib.font_manager as fm
|
| 10 |
-
from datetime import datetime, timedelta
|
| 11 |
|
| 12 |
# ํ๊ตญ์ด ์ฒ๋ฆฌ๋ฅผ ์ํ KoSentence-BERT ๋ชจ๋ธ ๋ก๋
|
| 13 |
model = SentenceTransformer('jhgan/ko-sbert-sts')
|
| 14 |
|
| 15 |
-
font_path = "./NanumBarunGothic.ttf"
|
| 16 |
-
font_prop = fm.FontProperties(fname=font_path)
|
| 17 |
-
plt.rcParams['font.family'] = 'NanumBarunGothic'
|
| 18 |
-
plt.rcParams['font.sans-serif'] = ['NanumBarunGothic']
|
| 19 |
-
fm.fontManager.addfont(font_path)
|
| 20 |
-
|
| 21 |
# ์ ์ญ ๋ณ์
|
| 22 |
global_recommendations = None
|
| 23 |
global_csv_string = None
|
|
@@ -32,19 +22,6 @@ def create_csv_string(recommendations):
|
|
| 32 |
writer.writerow(rec)
|
| 33 |
return output.getvalue()
|
| 34 |
|
| 35 |
-
def create_chart(G):
|
| 36 |
-
plt.figure(figsize=(10, 8))
|
| 37 |
-
pos = nx.spring_layout(G)
|
| 38 |
-
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray')
|
| 39 |
-
plt.title("์ง์๊ณผ ํ๋ก๊ทธ๋จ ๊ฐ์ ๊ด๊ณ", fontsize=14, fontweight='bold')
|
| 40 |
-
plt.tight_layout(pad=1.0)
|
| 41 |
-
|
| 42 |
-
buf = io.BytesIO()
|
| 43 |
-
plt.savefig(buf, format='png', bbox_inches='tight')
|
| 44 |
-
buf.seek(0)
|
| 45 |
-
plt.close()
|
| 46 |
-
return buf
|
| 47 |
-
|
| 48 |
# ์ด ๋งค์นญ ํจ์
|
| 49 |
def auto_match_columns(df, required_cols):
|
| 50 |
matched_cols = {}
|
|
@@ -76,7 +53,6 @@ def validate_and_get_columns(employee_df, program_df):
|
|
| 76 |
return None, employee_cols, program_cols
|
| 77 |
|
| 78 |
# ์ ํ๋ธ ๋ฐ์ดํฐ ์ด ์ ํ ํจ์
|
| 79 |
-
|
| 80 |
def select_youtube_columns(youtube_file):
|
| 81 |
global youtube_columns
|
| 82 |
if youtube_file is None:
|
|
@@ -121,7 +97,7 @@ def match_youtube_content(program_skills, youtube_df, model):
|
|
| 121 |
similarities = cosine_similarity(program_embeddings, youtube_embeddings)
|
| 122 |
return similarities
|
| 123 |
|
| 124 |
-
# ์ง์ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ์ฌ ๊ต์ก ํ๋ก๊ทธ๋จ์ ์ถ์ฒํ๊ณ ,
|
| 125 |
def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
|
| 126 |
global global_recommendations
|
| 127 |
global global_csv_string
|
|
@@ -132,7 +108,7 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
|
|
| 132 |
|
| 133 |
error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
|
| 134 |
if error_msg:
|
| 135 |
-
return error_msg, None, None
|
| 136 |
|
| 137 |
employee_skills = employee_df[employee_cols["current_skills"]].tolist()
|
| 138 |
program_skills = program_df[program_cols["skills_acquired"]].tolist()
|
|
@@ -176,21 +152,6 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
|
|
| 176 |
recommendations.append(recommendation + "\n" + youtube_recommendation)
|
| 177 |
|
| 178 |
global_recommendations = recommendation_rows
|
| 179 |
-
|
| 180 |
-
G = nx.Graph()
|
| 181 |
-
for employee in employee_df[employee_cols['employee_name']]:
|
| 182 |
-
G.add_node(employee, type='employee')
|
| 183 |
-
|
| 184 |
-
for program in program_df[program_cols['program_name']]:
|
| 185 |
-
G.add_node(program, type='program')
|
| 186 |
-
|
| 187 |
-
for i, employee in employee_df.iterrows():
|
| 188 |
-
for j, program in program_df.iterrows():
|
| 189 |
-
if similarities[i][j] > 0.5:
|
| 190 |
-
G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
|
| 191 |
-
|
| 192 |
-
# ์ฐจํธ ์์ฑ
|
| 193 |
-
chart_buffer = create_chart(G)
|
| 194 |
|
| 195 |
# CSV ๋ฌธ์์ด ์์ฑ
|
| 196 |
global_csv_string = create_csv_string(recommendation_rows)
|
|
@@ -198,7 +159,7 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
|
|
| 198 |
# ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
|
| 199 |
result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
|
| 200 |
|
| 201 |
-
return result_df,
|
| 202 |
|
| 203 |
# ์ฑํ
์๋ต ํจ์
|
| 204 |
def chat_response(message, history):
|
|
@@ -241,20 +202,16 @@ with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .g
|
|
| 241 |
csv_download = gr.File(label="์ถ์ฒ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋", visible=False)
|
| 242 |
download_button = gr.Button("CSV ๋ค์ด๋ก๋", visible=False)
|
| 243 |
|
| 244 |
-
|
| 245 |
-
gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ์ ๊ฒฐ๊ณผ ๋ฐ ์๊ฐํ</h3>")
|
| 246 |
-
chart_output = gr.Image(label="์๊ฐํ ์ฐจํธ")
|
| 247 |
-
|
| 248 |
-
gr.Markdown("<h3 style='color: #34495e;'>3. ์ง์๋ณ ์ถ์ฒ ํ๋ก๊ทธ๋จ ๋ฐ ์ ํ๋ธ ์ฝํ
์ธ ํ์ธ</h3>")
|
| 249 |
chatbot = gr.Chatbot()
|
| 250 |
msg = gr.Textbox(label="์ง์ ์ด๋ฆ์ ์
๋ ฅํ์ธ์")
|
| 251 |
clear = gr.Button("๋ํ ๋ด์ญ ์ง์ฐ๊ธฐ")
|
| 252 |
|
| 253 |
-
# ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ํ
์ด๋ธ,
|
| 254 |
analyze_button.click(
|
| 255 |
hybrid_rag,
|
| 256 |
inputs=[employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col],
|
| 257 |
-
outputs=[output_table,
|
| 258 |
)
|
| 259 |
|
| 260 |
# CSV ๋ค์ด๋ก๋ ๋ฒํผ
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from sentence_transformers import SentenceTransformer
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
| 5 |
import csv
|
| 6 |
import io
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# ํ๊ตญ์ด ์ฒ๋ฆฌ๋ฅผ ์ํ KoSentence-BERT ๋ชจ๋ธ ๋ก๋
|
| 9 |
model = SentenceTransformer('jhgan/ko-sbert-sts')
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# ์ ์ญ ๋ณ์
|
| 12 |
global_recommendations = None
|
| 13 |
global_csv_string = None
|
|
|
|
| 22 |
writer.writerow(rec)
|
| 23 |
return output.getvalue()
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# ์ด ๋งค์นญ ํจ์
|
| 26 |
def auto_match_columns(df, required_cols):
|
| 27 |
matched_cols = {}
|
|
|
|
| 53 |
return None, employee_cols, program_cols
|
| 54 |
|
| 55 |
# ์ ํ๋ธ ๋ฐ์ดํฐ ์ด ์ ํ ํจ์
|
|
|
|
| 56 |
def select_youtube_columns(youtube_file):
|
| 57 |
global youtube_columns
|
| 58 |
if youtube_file is None:
|
|
|
|
| 97 |
similarities = cosine_similarity(program_embeddings, youtube_embeddings)
|
| 98 |
return similarities
|
| 99 |
|
| 100 |
+
# ์ง์ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ์ฌ ๊ต์ก ํ๋ก๊ทธ๋จ์ ์ถ์ฒํ๊ณ , ํ
์ด๋ธ์ ์์ฑํ๋ ํจ์
|
| 101 |
def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
|
| 102 |
global global_recommendations
|
| 103 |
global global_csv_string
|
|
|
|
| 108 |
|
| 109 |
error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
|
| 110 |
if error_msg:
|
| 111 |
+
return error_msg, None, None
|
| 112 |
|
| 113 |
employee_skills = employee_df[employee_cols["current_skills"]].tolist()
|
| 114 |
program_skills = program_df[program_cols["skills_acquired"]].tolist()
|
|
|
|
| 152 |
recommendations.append(recommendation + "\n" + youtube_recommendation)
|
| 153 |
|
| 154 |
global_recommendations = recommendation_rows
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
# CSV ๋ฌธ์์ด ์์ฑ
|
| 157 |
global_csv_string = create_csv_string(recommendation_rows)
|
|
|
|
| 159 |
# ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
|
| 160 |
result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
|
| 161 |
|
| 162 |
+
return result_df, gr.File(value=global_csv_string, visible=True), gr.Button(value="CSV ๋ค์ด๋ก๋", visible=True)
|
| 163 |
|
| 164 |
# ์ฑํ
์๋ต ํจ์
|
| 165 |
def chat_response(message, history):
|
|
|
|
| 202 |
csv_download = gr.File(label="์ถ์ฒ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋", visible=False)
|
| 203 |
download_button = gr.Button("CSV ๋ค์ด๋ก๋", visible=False)
|
| 204 |
|
| 205 |
+
gr.Markdown("<h3 style='color: #34495e;'>2. ์ง์๋ณ ์ถ์ฒ ํ๋ก๊ทธ๋จ ๋ฐ ์ ํ๋ธ ์ฝํ
์ธ ํ์ธ</h3>")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
chatbot = gr.Chatbot()
|
| 207 |
msg = gr.Textbox(label="์ง์ ์ด๋ฆ์ ์
๋ ฅํ์ธ์")
|
| 208 |
clear = gr.Button("๋ํ ๋ด์ญ ์ง์ฐ๊ธฐ")
|
| 209 |
|
| 210 |
+
# ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ํ
์ด๋ธ, ํ์ผ ๋ค์ด๋ก๋๋ฅผ ์
๋ฐ์ดํธ
|
| 211 |
analyze_button.click(
|
| 212 |
hybrid_rag,
|
| 213 |
inputs=[employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col],
|
| 214 |
+
outputs=[output_table, csv_download, download_button]
|
| 215 |
)
|
| 216 |
|
| 217 |
# CSV ๋ค์ด๋ก๋ ๋ฒํผ
|