Spaces:

JERNGOC
/

20241009_ML

Sleeping

App Files Files Community

JERNGOC commited on Oct 9, 2024

Commit

7d9573f

verified ·

1 Parent(s): e03dfb5

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -32

app.py CHANGED Viewed

@@ -11,28 +11,28 @@ from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, cl
 import matplotlib.pyplot as plt
 import seaborn as sns
-# 設定 Streamlit 介面標題
-st.title('分類模型比較：堆疊與投票分類器')
-# 讓使用者上傳資料
-uploaded_file = st.file_uploader("請上傳 CSV 檔案", type=["csv"])
 if uploaded_file is not None:
     df = pd.read_csv(uploaded_file)
-    # 定義特徵與目標變數
     X = df.drop(columns=['Target_goal'])
     y = df['Target_goal']
-    # 分割數據集
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # 標準化數據
     scaler = StandardScaler()
     X_train = scaler.fit_transform(X_train)
     X_test = scaler.transform(X_test)
-    # 定義基礎模型
     estimators = [
         ('lr', LogisticRegression()),
         ('dt', DecisionTreeClassifier()),
@@ -41,7 +41,7 @@ if uploaded_file is not None:
         ('svc', SVC(probability=True))
     ]
-    # 堆疊分類器
     stacking_clf = StackingClassifier(
         estimators=estimators,
         final_estimator=LogisticRegression()
@@ -50,15 +50,15 @@ if uploaded_file is not None:
     y_pred_stack = stacking_clf.predict(X_test)
     y_pred_stack_proba = stacking_clf.predict_proba(X_test)[:, 1]
-    # 堆疊分類器準確性
     accuracy_stack = accuracy_score(y_test, y_pred_stack)
-    st.write(f'堆疊分類器的準確性: {accuracy_stack:.2f}')
-    # 堆疊分類器的分類報告
-    st.write("堆疊分類器的分類報告:")
     st.text(classification_report(y_test, y_pred_stack))
-    # 投票分類器
     voting_clf = VotingClassifier(
         estimators=estimators,
         voting='soft'
@@ -67,34 +67,34 @@ if uploaded_file is not None:
     y_pred_vote = voting_clf.predict(X_test)
     y_pred_vote_proba = voting_clf.predict_proba(X_test)[:, 1]
-    # 投票分類器準確性
     accuracy_vote = accuracy_score(y_test, y_pred_vote)
-    st.write(f'投票分類器的準確性: {accuracy_vote:.2f}')
-    # 投票分類器的分類報告
-    st.write("投票分類器的分類報告:")
     st.text(classification_report(y_test, y_pred_vote))
-    # 混淆矩陣可視化
-    st.write("堆疊分類器的混淆矩陣：")
     conf_matrix_stack = confusion_matrix(y_test, y_pred_stack)
     fig, ax = plt.subplots()
     sns.heatmap(conf_matrix_stack, annot=True, fmt='d', cmap='Blues', ax=ax)
-    ax.set_title('堆疊分類器的混淆矩陣')
     st.pyplot(fig)
-    st.write("投票分類器的混淆矩陣：")
     conf_matrix_vote = confusion_matrix(y_test, y_pred_vote)
     fig, ax = plt.subplots()
     sns.heatmap(conf_matrix_vote, annot=True, fmt='d', cmap='Blues', ax=ax)
-    ax.set_title('投票分類器的混淆矩陣')
     st.pyplot(fig)
-    # ROC 曲線
-    # 將 y_test 標籤轉換為 0 和 1
-    y_test_binary = (y_test == 2).astype(int)  # 假設 2 是正標籤
-    # 計算 ROC 曲線
     fpr_stack, tpr_stack, _ = roc_curve(y_test_binary, y_pred_stack_proba)
     roc_auc_stack = auc(fpr_stack, tpr_stack)
@@ -102,13 +102,13 @@ if uploaded_file is not None:
     roc_auc_vote = auc(fpr_vote, tpr_vote)
     fig, ax = plt.subplots()
-    ax.plot(fpr_stack, tpr_stack, color='blue', lw=2, label='堆疊分類器 (AUC = %0.2f)' % roc_auc_stack)
-    ax.plot(fpr_vote, tpr_vote, color='red', lw=2, label='投票分類器 (AUC = %0.2f)' % roc_auc_vote)
     ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
     ax.set_xlim([0.0, 1.0])
     ax.set_ylim([0.0, 1.05])
-    ax.set_xlabel('假陽性率（False Positive Rate）')
-    ax.set_ylabel('真陽性率（True Positive Rate）')
-    ax.set_title('ROC 曲線')
     ax.legend(loc="lower right")
     st.pyplot(fig)

 import matplotlib.pyplot as plt
 import seaborn as sns
+# Set Streamlit interface title
+st.title('Classification Model Comparison: Stacking and Voting Classifiers')
+# Allow user to upload data
+uploaded_file = st.file_uploader("Please upload a CSV file", type=["csv"])
 if uploaded_file is not None:
     df = pd.read_csv(uploaded_file)
+    # Define features and target variable
     X = df.drop(columns=['Target_goal'])
     y = df['Target_goal']
+    # Split dataset
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Standardize data
     scaler = StandardScaler()
     X_train = scaler.fit_transform(X_train)
     X_test = scaler.transform(X_test)
+    # Define base models
     estimators = [
         ('lr', LogisticRegression()),
         ('dt', DecisionTreeClassifier()),
         ('svc', SVC(probability=True))
     ]
+    # Stacking classifier
     stacking_clf = StackingClassifier(
         estimators=estimators,
         final_estimator=LogisticRegression()
     y_pred_stack = stacking_clf.predict(X_test)
     y_pred_stack_proba = stacking_clf.predict_proba(X_test)[:, 1]
+    # Stacking classifier accuracy
     accuracy_stack = accuracy_score(y_test, y_pred_stack)
+    st.write(f'Stacking Classifier Accuracy: {accuracy_stack:.2f}')
+    # Stacking classifier classification report
+    st.write("Stacking Classifier Classification Report:")
     st.text(classification_report(y_test, y_pred_stack))
+    # Voting classifier
     voting_clf = VotingClassifier(
         estimators=estimators,
         voting='soft'
     y_pred_vote = voting_clf.predict(X_test)
     y_pred_vote_proba = voting_clf.predict_proba(X_test)[:, 1]
+    # Voting classifier accuracy
     accuracy_vote = accuracy_score(y_test, y_pred_vote)
+    st.write(f'Voting Classifier Accuracy: {accuracy_vote:.2f}')
+    # Voting classifier classification report
+    st.write("Voting Classifier Classification Report:")
     st.text(classification_report(y_test, y_pred_vote))
+    # Confusion matrix visualization
+    st.write("Stacking Classifier Confusion Matrix:")
     conf_matrix_stack = confusion_matrix(y_test, y_pred_stack)
     fig, ax = plt.subplots()
     sns.heatmap(conf_matrix_stack, annot=True, fmt='d', cmap='Blues', ax=ax)
+    ax.set_title('Stacking Classifier Confusion Matrix')
     st.pyplot(fig)
+    st.write("Voting Classifier Confusion Matrix:")
     conf_matrix_vote = confusion_matrix(y_test, y_pred_vote)
     fig, ax = plt.subplots()
     sns.heatmap(conf_matrix_vote, annot=True, fmt='d', cmap='Blues', ax=ax)
+    ax.set_title('Voting Classifier Confusion Matrix')
     st.pyplot(fig)
+    # ROC curve
+    # Convert y_test labels to 0 and 1
+    y_test_binary = (y_test == 2).astype(int)  # Assume 2 is the positive label
+    # Calculate ROC curve
     fpr_stack, tpr_stack, _ = roc_curve(y_test_binary, y_pred_stack_proba)
     roc_auc_stack = auc(fpr_stack, tpr_stack)
     roc_auc_vote = auc(fpr_vote, tpr_vote)
     fig, ax = plt.subplots()
+    ax.plot(fpr_stack, tpr_stack, color='blue', lw=2, label='Stacking Classifier (AUC = %0.2f)' % roc_auc_stack)
+    ax.plot(fpr_vote, tpr_vote, color='red', lw=2, label='Voting Classifier (AUC = %0.2f)' % roc_auc_vote)
     ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
     ax.set_xlim([0.0, 1.0])
     ax.set_ylim([0.0, 1.05])
+    ax.set_xlabel('False Positive Rate')
+    ax.set_ylabel('True Positive Rate')
+    ax.set_title('ROC Curve')
     ax.legend(loc="lower right")
     st.pyplot(fig)