Spaces:

scymz2
/

MNISTFormer

Sleeping

App Files Files Community

mochuan zhan commited on Nov 11, 2024

Commit

360f3af

1 Parent(s): a2ce9c9

fix +++

Browse files

Files changed (2) hide show

app.py +15 -15
vit_model.pth +1 -1

app.py CHANGED Viewed

@@ -69,13 +69,14 @@ class ViT(nn.Module):
 model = ViT(num_classes=10)  # 确保num_classes与你的MNIST任务一致
 model_path = "vit_model.pth"  # 模型权重文件名
 model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'), weights_only=True))
 # 定义图像预处理
 transform = transforms.Compose([
-    transforms.Resize((224, 224)),          # 适应ViT的输入大小
     transforms.ToTensor(),
-    transforms.Normalize((0.5,), (0.5,))    # 根据训练时的归一化参数调整
 ])
 # 定义预测函数
@@ -87,18 +88,15 @@ def classify_image(image):
     # 确保 image 是一个 PIL 图像
     if not isinstance(image, Image.Image):
         raise TypeError(f"Expected image to be PIL Image, but got {type(image)}")
-    # 将图像转换为灰度模式
-    image = image.convert("L")
-    # 反转颜色
-    image = ImageOps.invert(image)
-    # 调整图像大小
-    image = image.resize((224, 224))
     # 图像预处理
     img = transform(image).unsqueeze(0)  # 添加批次维度
     # 模型预测
     with torch.no_grad():
@@ -106,10 +104,11 @@ def classify_image(image):
         probabilities = F.softmax(outputs, dim=1)
     # 获取预测结果
-    _, predicted = torch.max(outputs, 1)
     confidence = probabilities[0][predicted].item()
     # 返回结果字典，包含预测类别和置信度
     return {str(predicted.item()): confidence}
 # # 创建Gradio界面
@@ -123,11 +122,12 @@ def classify_image(image):
 iface = gr.Interface(
     fn=classify_image,
-    inputs=gr.Sketchpad(crop_size=(256,256), type='pil', image_mode='L', brush=gr.Brush()),
     outputs=gr.Label(num_top_classes=1),
     title="MNIST Digit Classification with ViT",
     description="Use the mouse to hand draw a number and the model will predict the category it belongs to."
 )
-iface.launch()

 model = ViT(num_classes=10)  # 确保num_classes与你的MNIST任务一致
 model_path = "vit_model.pth"  # 模型权重文件名
 model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'), weights_only=True))
+model.eval()
 # 定义图像预处理
 transform = transforms.Compose([
+    transforms.Grayscale(num_output_channels=1),  # 转换为单通道
+    transforms.Resize((28, 28)),
     transforms.ToTensor(),
+    transforms.Normalize((0.1307,), (0.3081,))
 ])
 # 定义预测函数
     # 确保 image 是一个 PIL 图像
     if not isinstance(image, Image.Image):
         raise TypeError(f"Expected image to be PIL Image, but got {type(image)}")
+    # 打印image的数组
+    print(image)
     # 图像预处理
     img = transform(image).unsqueeze(0)  # 添加批次维度
+    image_pil = Image.fromarray(img.numpy().squeeze() * 255).convert('L')
+    image_pil.show()
     # 模型预测
     with torch.no_grad():
         probabilities = F.softmax(outputs, dim=1)
     # 获取预测结果
+    _, predicted = torch.max(probabilities, 1)
     confidence = probabilities[0][predicted].item()
     # 返回结果字典，包含预测类别和置信度
+    print(predicted, confidence)
     return {str(predicted.item()): confidence}
 # # 创建Gradio界面
 iface = gr.Interface(
     fn=classify_image,
+    inputs=gr.Sketchpad(type='pil', image_mode='L', brush=gr.Brush(default_size=18), crop_size=(600, 600)),
     outputs=gr.Label(num_top_classes=1),
     title="MNIST Digit Classification with ViT",
     description="Use the mouse to hand draw a number and the model will predict the category it belongs to."
 )
+if __name__ == "__main__":
+    iface.launch()

vit_model.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:223c6c32c2a9d4c274b09c35ef089b358ee7cf1729b9d939fca898db5765dcdb
 size 3248655

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4c96b402b7457e05bba3fbf9589f8ee20aaf1bb86a482d4b605ac289cafde68
 size 3248655