Upload 15 files
Browse files- app.py +35 -1
- data/iris.csv +151 -0
- data/model.pkl +3 -0
- images/feature01.png +0 -0
- images/setosa.jpg +0 -0
- images/setosa.webp +0 -0
- images/versicolor.jpg +0 -0
- images/versicolor.webp +0 -0
- images/virginica.jpg +0 -0
- images/virginica.webp +0 -0
- images//344/270/213/350/275/275 (1).jpeg +0 -0
- pages/01data_intro.py +35 -0
- pages/02data_feature.py +57 -0
- pages/03model_train.py +125 -0
- pages/04model_sample.py +64 -0
app.py
CHANGED
|
@@ -1,3 +1,37 @@
|
|
|
|
|
| 1 |
import simplestart as ss
|
| 2 |
|
| 3 |
-
ss.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### 简介
|
| 2 |
import simplestart as ss
|
| 3 |
|
| 4 |
+
ss.md('''
|
| 5 |
+
## Iris 数据集
|
| 6 |
+
Iris数据集是常用的分类实验数据集,由Fisher, 1936收集整理。Iris也称鸢尾花卉数据集,是一类多重变量分析的数据集。数据集包含150个数据样本,分为3类,每类50个数据,每个数据包含4个属性。可通过花萼长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类。
|
| 7 |
+
|
| 8 |
+
### 鸢尾花
|
| 9 |
+
鸢尾的文化底蕴深厚,鸢尾花因花瓣形如鸢鸟尾巴而称之,其拉丁属名“iris” 为希腊语“彩虹”之意,喻指花色丰富。
|
| 10 |
+
''')
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
with ss.row(style="margin:10px 0"):
|
| 14 |
+
with ss.col():
|
| 15 |
+
ss.image("./images/setosa.webp", title = "丝质鸢尾 setosa", elevation = 10, width=250)
|
| 16 |
+
|
| 17 |
+
with ss.col():
|
| 18 |
+
ss.image("./images/versicolor.webp", title = "鸢尾花 versicolor", elevation = 10, width=250)
|
| 19 |
+
|
| 20 |
+
with ss.col():
|
| 21 |
+
ss.image("./images/virginica.webp", title = "弗吉尼亚鸢尾 virginica", elevation = 10, width=250)
|
| 22 |
+
|
| 23 |
+
ss.md('''
|
| 24 |
+
### 机器学习
|
| 25 |
+
|
| 26 |
+
这个教程将使用 scikit-learn 库构建一个机器学习分类模型,以预测鸢尾花的品种,具体说就是利用鸢尾花的测量数据——包括花瓣和花萼的长度及宽度训练和测试模型。我们的目标是通过应用几个经典的机器学习算法,教会模型如何从这些已标记的数据中学习,以便能对新的鸢尾花进行准确的品种预测。
|
| 27 |
+
|
| 28 |
+
''')
|
| 29 |
+
|
| 30 |
+
ss.md('''
|
| 31 |
+
###
|
| 32 |
+
本例的网络参考资料有:
|
| 33 |
+
---
|
| 34 |
+
[1.【机器学习基础】1.7 鸢尾花分类](https://blog.csdn.net/qq_47809408/article/details/124632290)
|
| 35 |
+
[2. KNN分类算法介绍,用KNN分类鸢尾花数据集(iris)](https://blog.csdn.net/weixin_51756038/article/details/130096706)
|
| 36 |
+
[3. Interactive Web App with Streamlit and Scikit-learn](https://github.com/patrickloeber/streamlit-demo)
|
| 37 |
+
''')
|
data/iris.csv
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sepal_length,sepal_width,petal_length,petal_width,species
|
| 2 |
+
5.1,3.5,1.4,0.2,setosa
|
| 3 |
+
4.9,3.0,1.4,0.2,setosa
|
| 4 |
+
4.7,3.2,1.3,0.2,setosa
|
| 5 |
+
4.6,3.1,1.5,0.2,setosa
|
| 6 |
+
5.0,3.6,1.4,0.2,setosa
|
| 7 |
+
5.4,3.9,1.7,0.4,setosa
|
| 8 |
+
4.6,3.4,1.4,0.3,setosa
|
| 9 |
+
5.0,3.4,1.5,0.2,setosa
|
| 10 |
+
4.4,2.9,1.4,0.2,setosa
|
| 11 |
+
4.9,3.1,1.5,0.1,setosa
|
| 12 |
+
5.4,3.7,1.5,0.2,setosa
|
| 13 |
+
4.8,3.4,1.6,0.2,setosa
|
| 14 |
+
4.8,3.0,1.4,0.1,setosa
|
| 15 |
+
4.3,3.0,1.1,0.1,setosa
|
| 16 |
+
5.8,4.0,1.2,0.2,setosa
|
| 17 |
+
5.7,4.4,1.5,0.4,setosa
|
| 18 |
+
5.4,3.9,1.3,0.4,setosa
|
| 19 |
+
5.1,3.5,1.4,0.3,setosa
|
| 20 |
+
5.7,3.8,1.7,0.3,setosa
|
| 21 |
+
5.1,3.8,1.5,0.3,setosa
|
| 22 |
+
5.4,3.4,1.7,0.2,setosa
|
| 23 |
+
5.1,3.7,1.5,0.4,setosa
|
| 24 |
+
4.6,3.6,1.0,0.2,setosa
|
| 25 |
+
5.1,3.3,1.7,0.5,setosa
|
| 26 |
+
4.8,3.4,1.9,0.2,setosa
|
| 27 |
+
5.0,3.0,1.6,0.2,setosa
|
| 28 |
+
5.0,3.4,1.6,0.4,setosa
|
| 29 |
+
5.2,3.5,1.5,0.2,setosa
|
| 30 |
+
5.2,3.4,1.4,0.2,setosa
|
| 31 |
+
4.7,3.2,1.6,0.2,setosa
|
| 32 |
+
4.8,3.1,1.6,0.2,setosa
|
| 33 |
+
5.4,3.4,1.5,0.4,setosa
|
| 34 |
+
5.2,4.1,1.5,0.1,setosa
|
| 35 |
+
5.5,4.2,1.4,0.2,setosa
|
| 36 |
+
4.9,3.1,1.5,0.2,setosa
|
| 37 |
+
5.0,3.2,1.2,0.2,setosa
|
| 38 |
+
5.5,3.5,1.3,0.2,setosa
|
| 39 |
+
4.9,3.6,1.4,0.1,setosa
|
| 40 |
+
4.4,3.0,1.3,0.2,setosa
|
| 41 |
+
5.1,3.4,1.5,0.2,setosa
|
| 42 |
+
5.0,3.5,1.3,0.3,setosa
|
| 43 |
+
4.5,2.3,1.3,0.3,setosa
|
| 44 |
+
4.4,3.2,1.3,0.2,setosa
|
| 45 |
+
5.0,3.5,1.6,0.6,setosa
|
| 46 |
+
5.1,3.8,1.9,0.4,setosa
|
| 47 |
+
4.8,3.0,1.4,0.3,setosa
|
| 48 |
+
5.1,3.8,1.6,0.2,setosa
|
| 49 |
+
4.6,3.2,1.4,0.2,setosa
|
| 50 |
+
5.3,3.7,1.5,0.2,setosa
|
| 51 |
+
5.0,3.3,1.4,0.2,setosa
|
| 52 |
+
7.0,3.2,4.7,1.4,versicolor
|
| 53 |
+
6.4,3.2,4.5,1.5,versicolor
|
| 54 |
+
6.9,3.1,4.9,1.5,versicolor
|
| 55 |
+
5.5,2.3,4.0,1.3,versicolor
|
| 56 |
+
6.5,2.8,4.6,1.5,versicolor
|
| 57 |
+
5.7,2.8,4.5,1.3,versicolor
|
| 58 |
+
6.3,3.3,4.7,1.6,versicolor
|
| 59 |
+
4.9,2.4,3.3,1.0,versicolor
|
| 60 |
+
6.6,2.9,4.6,1.3,versicolor
|
| 61 |
+
5.2,2.7,3.9,1.4,versicolor
|
| 62 |
+
5.0,2.0,3.5,1.0,versicolor
|
| 63 |
+
5.9,3.0,4.2,1.5,versicolor
|
| 64 |
+
6.0,2.2,4.0,1.0,versicolor
|
| 65 |
+
6.1,2.9,4.7,1.4,versicolor
|
| 66 |
+
5.6,2.9,3.6,1.3,versicolor
|
| 67 |
+
6.7,3.1,4.4,1.4,versicolor
|
| 68 |
+
5.6,3.0,4.5,1.5,versicolor
|
| 69 |
+
5.8,2.7,4.1,1.0,versicolor
|
| 70 |
+
6.2,2.2,4.5,1.5,versicolor
|
| 71 |
+
5.6,2.5,3.9,1.1,versicolor
|
| 72 |
+
5.9,3.2,4.8,1.8,versicolor
|
| 73 |
+
6.1,2.8,4.0,1.3,versicolor
|
| 74 |
+
6.3,2.5,4.9,1.5,versicolor
|
| 75 |
+
6.1,2.8,4.7,1.2,versicolor
|
| 76 |
+
6.4,2.9,4.3,1.3,versicolor
|
| 77 |
+
6.6,3.0,4.4,1.4,versicolor
|
| 78 |
+
6.8,2.8,4.8,1.4,versicolor
|
| 79 |
+
6.7,3.0,5.0,1.7,versicolor
|
| 80 |
+
6.0,2.9,4.5,1.5,versicolor
|
| 81 |
+
5.7,2.6,3.5,1.0,versicolor
|
| 82 |
+
5.5,2.4,3.8,1.1,versicolor
|
| 83 |
+
5.5,2.4,3.7,1.0,versicolor
|
| 84 |
+
5.8,2.7,3.9,1.2,versicolor
|
| 85 |
+
6.0,2.7,5.1,1.6,versicolor
|
| 86 |
+
5.4,3.0,4.5,1.5,versicolor
|
| 87 |
+
6.0,3.4,4.5,1.6,versicolor
|
| 88 |
+
6.7,3.1,4.7,1.5,versicolor
|
| 89 |
+
6.3,2.3,4.4,1.3,versicolor
|
| 90 |
+
5.6,3.0,4.1,1.3,versicolor
|
| 91 |
+
5.5,2.5,4.0,1.3,versicolor
|
| 92 |
+
5.5,2.6,4.4,1.2,versicolor
|
| 93 |
+
6.1,3.0,4.6,1.4,versicolor
|
| 94 |
+
5.8,2.6,4.0,1.2,versicolor
|
| 95 |
+
5.0,2.3,3.3,1.0,versicolor
|
| 96 |
+
5.6,2.7,4.2,1.3,versicolor
|
| 97 |
+
5.7,3.0,4.2,1.2,versicolor
|
| 98 |
+
5.7,2.9,4.2,1.3,versicolor
|
| 99 |
+
6.2,2.9,4.3,1.3,versicolor
|
| 100 |
+
5.1,2.5,3.0,1.1,versicolor
|
| 101 |
+
5.7,2.8,4.1,1.3,versicolor
|
| 102 |
+
6.3,3.3,6.0,2.5,virginica
|
| 103 |
+
5.8,2.7,5.1,1.9,virginica
|
| 104 |
+
7.1,3.0,5.9,2.1,virginica
|
| 105 |
+
6.3,2.9,5.6,1.8,virginica
|
| 106 |
+
6.5,3.0,5.8,2.2,virginica
|
| 107 |
+
7.6,3.0,6.6,2.1,virginica
|
| 108 |
+
4.9,2.5,4.5,1.7,virginica
|
| 109 |
+
7.3,2.9,6.3,1.8,virginica
|
| 110 |
+
6.7,2.5,5.8,1.8,virginica
|
| 111 |
+
7.2,3.6,6.1,2.5,virginica
|
| 112 |
+
6.5,3.2,5.1,2.0,virginica
|
| 113 |
+
6.4,2.7,5.3,1.9,virginica
|
| 114 |
+
6.8,3.0,5.5,2.1,virginica
|
| 115 |
+
5.7,2.5,5.0,2.0,virginica
|
| 116 |
+
5.8,2.8,5.1,2.4,virginica
|
| 117 |
+
6.4,3.2,5.3,2.3,virginica
|
| 118 |
+
6.5,3.0,5.5,1.8,virginica
|
| 119 |
+
7.7,3.8,6.7,2.2,virginica
|
| 120 |
+
7.7,2.6,6.9,2.3,virginica
|
| 121 |
+
6.0,2.2,5.0,1.5,virginica
|
| 122 |
+
6.9,3.2,5.7,2.3,virginica
|
| 123 |
+
5.6,2.8,4.9,2.0,virginica
|
| 124 |
+
7.7,2.8,6.7,2.0,virginica
|
| 125 |
+
6.3,2.7,4.9,1.8,virginica
|
| 126 |
+
6.7,3.3,5.7,2.1,virginica
|
| 127 |
+
7.2,3.2,6.0,1.8,virginica
|
| 128 |
+
6.2,2.8,4.8,1.8,virginica
|
| 129 |
+
6.1,3.0,4.9,1.8,virginica
|
| 130 |
+
6.4,2.8,5.6,2.1,virginica
|
| 131 |
+
7.2,3.0,5.8,1.6,virginica
|
| 132 |
+
7.4,2.8,6.1,1.9,virginica
|
| 133 |
+
7.9,3.8,6.4,2.0,virginica
|
| 134 |
+
6.4,2.8,5.6,2.2,virginica
|
| 135 |
+
6.3,2.8,5.1,1.5,virginica
|
| 136 |
+
6.1,2.6,5.6,1.4,virginica
|
| 137 |
+
7.7,3.0,6.1,2.3,virginica
|
| 138 |
+
6.3,3.4,5.6,2.4,virginica
|
| 139 |
+
6.4,3.1,5.5,1.8,virginica
|
| 140 |
+
6.0,3.0,4.8,1.8,virginica
|
| 141 |
+
6.9,3.1,5.4,2.1,virginica
|
| 142 |
+
6.7,3.1,5.6,2.4,virginica
|
| 143 |
+
6.9,3.1,5.1,2.3,virginica
|
| 144 |
+
5.8,2.7,5.1,1.9,virginica
|
| 145 |
+
6.8,3.2,5.9,2.3,virginica
|
| 146 |
+
6.7,3.3,5.7,2.5,virginica
|
| 147 |
+
6.7,3.0,5.2,2.3,virginica
|
| 148 |
+
6.3,2.5,5.0,1.9,virginica
|
| 149 |
+
6.5,3.0,5.2,2.0,virginica
|
| 150 |
+
6.2,3.4,5.4,2.3,virginica
|
| 151 |
+
5.9,3.0,5.1,1.8,virginica
|
data/model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e37d94112cf51c382e9639ba7d9aab490170f2aa684455aa589c0c097dbdb929
|
| 3 |
+
size 912
|
images/feature01.png
ADDED
|
images/setosa.jpg
ADDED
|
images/setosa.webp
ADDED
|
images/versicolor.jpg
ADDED
|
images/versicolor.webp
ADDED
|
images/virginica.jpg
ADDED
|
images/virginica.webp
ADDED
|
images//344/270/213/350/275/275 (1).jpeg
ADDED
|
pages/01data_intro.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### 数据浏览
|
| 2 |
+
import simplestart as ss
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
ss.md('''
|
| 6 |
+
## Iris数据集
|
| 7 |
+
数据集包含150个数据样本,分为3类,即Setosa,Versicolour,Virginica。每类50个数据,每个数据包含4个属性。
|
| 8 |
+
''')
|
| 9 |
+
|
| 10 |
+
ss.space()
|
| 11 |
+
|
| 12 |
+
title = "表1. Iris数据集"
|
| 13 |
+
subtitle = "sepal_length 花萼长度, sepal_width 花萼宽度, petal_lenght 花瓣长度, petal_width 花瓣宽度"
|
| 14 |
+
# 设置全局浮点数显示精度
|
| 15 |
+
pd.options.display.float_format = '{:.2f}'.format
|
| 16 |
+
df = pd.read_csv("./data/iris.csv")
|
| 17 |
+
|
| 18 |
+
ss.table(df, index=True, title = title, subtitle = subtitle, width=400)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
ss.table(df.describe(), index=True)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
ss.md("---")
|
| 25 |
+
# 模拟数据
|
| 26 |
+
import numpy as np
|
| 27 |
+
# 设置随机种子以确保结果可重复
|
| 28 |
+
np.random.seed(0)
|
| 29 |
+
|
| 30 |
+
num_rows = 10000
|
| 31 |
+
data = {
|
| 32 |
+
'Column1': np.random.randint(0, 100, size=num_rows), # 随机整数
|
| 33 |
+
'Column2': np.random.random(size=num_rows), # 随机浮点数
|
| 34 |
+
'Column3': np.random.choice(['A', 'B', 'C', 'D'], size=num_rows), # 随机选择的类别
|
| 35 |
+
}
|
pages/02data_feature.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### 特征分析
|
| 2 |
+
|
| 3 |
+
import simplestart as ss
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
ss.md('''
|
| 7 |
+
## 特征分析
|
| 8 |
+
''')
|
| 9 |
+
|
| 10 |
+
ss.space()
|
| 11 |
+
|
| 12 |
+
ss.md("#### 1. 特征的散点矩阵")
|
| 13 |
+
ss.space()
|
| 14 |
+
ss.image("./images/feature01.png", width=600, height = 500)
|
| 15 |
+
|
| 16 |
+
ss.space()
|
| 17 |
+
|
| 18 |
+
ss.md('''
|
| 19 |
+
本图来自:
|
| 20 |
+
[VuNus 【机器学习基础】1.7 鸢尾花分类](https://blog.csdn.net/qq_47809408/article/details/124632290)
|
| 21 |
+
''')
|
| 22 |
+
|
| 23 |
+
ss.space()
|
| 24 |
+
ss.md("#### 2. 特征浏览")
|
| 25 |
+
import pandas as pd
|
| 26 |
+
from bokeh.plotting import figure, show
|
| 27 |
+
from bokeh.models import ColumnDataSource
|
| 28 |
+
from bokeh.transform import factor_cmap
|
| 29 |
+
from bokeh.embed import file_html
|
| 30 |
+
from bokeh.resources import CDN
|
| 31 |
+
from bokeh.palettes import Category10
|
| 32 |
+
|
| 33 |
+
# 加载数据集
|
| 34 |
+
data = pd.read_csv("./data/iris.csv")
|
| 35 |
+
|
| 36 |
+
# 创建 Bokeh 图表
|
| 37 |
+
p = figure(title="Iris 数据集散点图", x_axis_label='花萼长度 (cm)', y_axis_label='花萼宽度 (cm)',
|
| 38 |
+
tools="pan,wheel_zoom,box_zoom,reset,hover,save", width=800, height=600)
|
| 39 |
+
|
| 40 |
+
# 创建数据源
|
| 41 |
+
source = ColumnDataSource(data)
|
| 42 |
+
|
| 43 |
+
# 为 species 列设置颜色映射
|
| 44 |
+
species_list = data['species'].unique().tolist()
|
| 45 |
+
p.circle(x='sepal_length', y='sepal_width', source=source, size=10,
|
| 46 |
+
color=factor_cmap('species', palette=Category10[3], factors=species_list), legend_field='species')
|
| 47 |
+
|
| 48 |
+
# 配置图例
|
| 49 |
+
p.legend.title = "Species"
|
| 50 |
+
p.legend.location = "top_left"
|
| 51 |
+
|
| 52 |
+
# 将 Bokeh 图表转换为 HTML 并显示
|
| 53 |
+
html_output = file_html(p, CDN, "Iris 数据集散点图")
|
| 54 |
+
#show(p)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
ss.htmlview(html_output)
|
pages/03model_train.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### 模型训练
|
| 2 |
+
|
| 3 |
+
import simplestart as ss
|
| 4 |
+
|
| 5 |
+
from sklearn import datasets
|
| 6 |
+
from sklearn.neighbors import KNeighborsClassifier
|
| 7 |
+
from sklearn.model_selection import train_test_split
|
| 8 |
+
from sklearn.metrics import accuracy_score
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
ss.md('''
|
| 12 |
+
## 模型训练
|
| 13 |
+
''')
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
#加载数据,并划分样本数据
|
| 17 |
+
data = datasets.load_iris()
|
| 18 |
+
X = data.data
|
| 19 |
+
y = data.target
|
| 20 |
+
|
| 21 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
| 22 |
+
|
| 23 |
+
#页面会话变量
|
| 24 |
+
ss.session["acc"] = ""
|
| 25 |
+
ss.session["code"] = 0
|
| 26 |
+
|
| 27 |
+
#训练函数
|
| 28 |
+
def train(event):
|
| 29 |
+
clf = KNeighborsClassifier(n_neighbors=3)
|
| 30 |
+
clf.fit(X_train,y_train)
|
| 31 |
+
|
| 32 |
+
y_pred = clf.predict(X_test)
|
| 33 |
+
|
| 34 |
+
acc = accuracy_score(y_test, y_pred)
|
| 35 |
+
acc = round(acc, 2)
|
| 36 |
+
|
| 37 |
+
ss.session["acc"] = acc #将结果赋值给页面会话变量,相应页面显示值会自动响应
|
| 38 |
+
|
| 39 |
+
ss.md('''
|
| 40 |
+
#### 模型训练的主要步骤:
|
| 41 |
+
首先,从数据集中加载 Iris 数据(包括特征和标签),并将这些数据划分为训练集和测试集,其中 80% 用于训练,20% 用于测试。接着,定义了一个训练函数,该函数使用 K-Nearest Neighbors(KNN)分类器进行训练,评估模型的预测精度,并将结果保存在一个页面会话变量中,以便在网页上显示。
|
| 42 |
+
###
|
| 43 |
+
在网页上,显示了一个训练按钮。当用户点击这个按钮时,训练函数会被触发,模型会在后台进行训练并计算测试集的预测精度。训练完成后,精度结果会更新到页面中,并以“Accuracy = @acc”格式展示给用户,其中 @acc 是训练过程中计算得到的预测精度值。
|
| 44 |
+
###
|
| 45 |
+
训练和测试速度特别快可能是因为 Iris 数据集非常小,只有 150 个样本和 4 个特征。此外,K-Nearest Neighbors(KNN)是一种简单且高效的算法,特别是在小数据集上表现较好,因此训练和测试过程迅速完成。
|
| 46 |
+
###
|
| 47 |
+
---
|
| 48 |
+
''')
|
| 49 |
+
ss.write(f'测试集的预测精度 Accuracy =', "@acc")
|
| 50 |
+
|
| 51 |
+
ss.button("Train", onclick = train)
|
| 52 |
+
#ui
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
ss.md("---")
|
| 56 |
+
|
| 57 |
+
def conditioner(event):
|
| 58 |
+
return ss.session["code"] == 1
|
| 59 |
+
|
| 60 |
+
def checkcode(event):
|
| 61 |
+
ss.session["code"] = 1
|
| 62 |
+
|
| 63 |
+
def hidecode(event):
|
| 64 |
+
ss.session["code"] = 0
|
| 65 |
+
|
| 66 |
+
ss.button("查看代码", onclick = checkcode)
|
| 67 |
+
ss.button("隐藏代码", onclick = hidecode)
|
| 68 |
+
|
| 69 |
+
with ss.when(conditioner):
|
| 70 |
+
ss.md('''
|
| 71 |
+
```python
|
| 72 |
+
import simplestart as ss
|
| 73 |
+
|
| 74 |
+
from sklearn import datasets
|
| 75 |
+
from sklearn.neighbors import KNeighborsClassifier
|
| 76 |
+
from sklearn.model_selection import train_test_split
|
| 77 |
+
from sklearn.metrics import accuracy_score
|
| 78 |
+
|
| 79 |
+
#加载数据,并划分样本数据
|
| 80 |
+
data = datasets.load_iris()
|
| 81 |
+
X = data.data
|
| 82 |
+
y = data.target
|
| 83 |
+
ss.write(X.shape, y.shape)
|
| 84 |
+
|
| 85 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
| 86 |
+
|
| 87 |
+
#页面会话变量
|
| 88 |
+
ss.session["acc"] = ""
|
| 89 |
+
|
| 90 |
+
#训练函数
|
| 91 |
+
def train(event):
|
| 92 |
+
clf = KNeighborsClassifier(n_neighbors=3)
|
| 93 |
+
clf.fit(X_train,y_train)
|
| 94 |
+
|
| 95 |
+
y_pred = clf.predict(X_test)
|
| 96 |
+
|
| 97 |
+
acc = accuracy_score(y_test, y_pred)
|
| 98 |
+
acc = round(acc, 2)
|
| 99 |
+
|
| 100 |
+
ss.session["acc"] = acc #将结果赋值给页面会话变量,相应页面显示值会自动响应
|
| 101 |
+
|
| 102 |
+
#显示在测试集上模型的准确率
|
| 103 |
+
ss.write(f'测试集的预测精度 Accuracy =', "\@acc")
|
| 104 |
+
|
| 105 |
+
ss.button("Train", onclick = train)
|
| 106 |
+
```
|
| 107 |
+
''')
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
ss.md("---")
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
ss.md('''
|
| 114 |
+
::: tip
|
| 115 |
+
### KNN的优点:
|
| 116 |
+
简洁、易于理解、易于实现、无须估计参数,无须训练;
|
| 117 |
+
适合对稀有事件进行分类;
|
| 118 |
+
特别适用于多分类问题(Multi-label,对象具有多个类别标签)
|
| 119 |
+
:::
|
| 120 |
+
''')
|
| 121 |
+
|
| 122 |
+
ss.md('''
|
| 123 |
+
更多KNN介绍,请参考
|
| 124 |
+
[KNN分类算法介绍,用KNN分类鸢尾花数据集(iris)](https://blog.csdn.net/weixin_51756038/article/details/130096706)
|
| 125 |
+
''')
|
pages/04model_sample.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### 预测实例
|
| 2 |
+
#the original source code:
|
| 3 |
+
#https://github.com/AzeemWaqarRao/Streamlit-Iris-Classification-App
|
| 4 |
+
import simplestart as ss
|
| 5 |
+
|
| 6 |
+
from sklearn.datasets import load_iris
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import pickle
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
#data and api
|
| 12 |
+
species = ['setosa', 'versicolor', 'virginica']
|
| 13 |
+
image = ['./images/setosa.jpg', './images/versicolor.jpg', './images/virginica.jpg']
|
| 14 |
+
with open('./data/model.pkl', 'rb') as f:
|
| 15 |
+
model = pickle.load(f)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def slidechange(event):
|
| 19 |
+
predict()
|
| 20 |
+
|
| 21 |
+
def predict():
|
| 22 |
+
# Getting Prediction from model
|
| 23 |
+
inp = np.array([sepal_length.value, sepal_width.value, petal_length.value, petal_width.value])
|
| 24 |
+
inp = np.expand_dims(inp,axis=0)
|
| 25 |
+
prediction = model.predict_proba(inp)
|
| 26 |
+
#test
|
| 27 |
+
#prediction = [["aaa", "bbb","cccds sdfdsafd sagdsfasf sdfsdf"]]
|
| 28 |
+
|
| 29 |
+
## Show Results when prediction is done
|
| 30 |
+
if True:
|
| 31 |
+
df = pd.DataFrame(prediction, index = ['result'], columns=species).round(4)
|
| 32 |
+
table_result.data = df
|
| 33 |
+
ss.session["result"] = species[np.argmax(prediction)]
|
| 34 |
+
image_flower.image = image[np.argmax(prediction)]
|
| 35 |
+
|
| 36 |
+
#ui
|
| 37 |
+
with ss.sidebar():
|
| 38 |
+
ss.write("### Inputs")
|
| 39 |
+
|
| 40 |
+
sepal_length = ss.slider("sepal length (cm)",4.3, 7.9, 5.0, onchange=slidechange)
|
| 41 |
+
|
| 42 |
+
sepal_width = ss.slider("sepal width (cm)",2.0,4.4,3.6, onchange=slidechange)
|
| 43 |
+
petal_length = ss.slider("petal length (cm)",1.0,6.9,1.4, onchange=slidechange)
|
| 44 |
+
petal_width = ss.slider("petal width (cm)",0.1,2.5,0.2, onchange=slidechange)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
ss.write("## 鸢尾花分类预测")
|
| 48 |
+
ss.write("改变花萼花瓣的长度宽度,在3种可能的类别中预测")
|
| 49 |
+
|
| 50 |
+
ss.write('''
|
| 51 |
+
# Results
|
| 52 |
+
Following is the probability of each class
|
| 53 |
+
''')
|
| 54 |
+
|
| 55 |
+
ss.space()
|
| 56 |
+
|
| 57 |
+
table_result = ss.table(show_border = True)
|
| 58 |
+
ss.write("**This flower belongs to @result" + " class**")
|
| 59 |
+
|
| 60 |
+
ss.space()
|
| 61 |
+
|
| 62 |
+
image_flower = ss.image(image[0])
|
| 63 |
+
|
| 64 |
+
predict()
|