Spaces:
Running
Running
| # takn from: https://huggingface.co/spaces/frgfm/torch-cam/blob/main/app.py | |
| # streamlit run app.py | |
| from io import BytesIO | |
| import os | |
| import sys | |
| import matplotlib.pyplot as plt | |
| import requests | |
| import streamlit as st | |
| import torch | |
| from PIL import Image | |
| from torchvision import models | |
| from torchvision.transforms.functional import normalize, resize, to_pil_image, to_tensor | |
| from torchvision import transforms | |
| from torchcam.methods import CAM | |
| from torchcam import methods as torchcam_methods | |
| from torchcam.utils import overlay_mask | |
| import os.path as osp | |
| root_path = osp.abspath(osp.join(__file__, osp.pardir)) | |
| sys.path.append(root_path) | |
| from utils import get_model | |
| from registry_utils import import_registered_modules | |
| import_registered_modules() | |
| # from torchcam.methods._utils import locate_candidate_layer | |
| CAM_METHODS = [ | |
| "CAM", | |
| # "GradCAM", | |
| # "GradCAMpp", | |
| # "SmoothGradCAMpp", | |
| # "ScoreCAM", | |
| # "SSCAM", | |
| # "ISCAM", | |
| # "XGradCAM", | |
| # "LayerCAM", | |
| ] | |
| TV_MODELS = [ | |
| "resnet18", | |
| # "resnet50", | |
| ] | |
| SR_METHODS = ["GFPGAN", "RealESRGAN", "SRResNet", "CodeFormer", "HAT"] | |
| UPSCALE = ["2", "3", "4"] | |
| LABEL_MAP = [ | |
| "left_eye", | |
| "right_eye", | |
| ] | |
| def _load_model(model_configs, device="cpu"): | |
| model_path = os.path.join(root_path, model_configs["model_path"]) | |
| model_configs.pop("model_path") | |
| model_dict = torch.load(model_path, map_location=device) | |
| model = get_model(model_configs=model_configs) | |
| model.load_state_dict(model_dict) | |
| model = model.to(device) | |
| model = model.eval() | |
| return model | |
| def main(): | |
| # Wide mode | |
| st.set_page_config(page_title="Pupil Diameter Estimator", layout="wide") | |
| # Designing the interface | |
| st.title("EyeDentify Playground") | |
| # For newline | |
| st.write("\n") | |
| # Set the columns | |
| cols = st.columns((1, 1)) | |
| # cols = st.columns((1, 1, 1)) | |
| cols[0].header("Input image") | |
| # cols[1].header("Raw CAM") | |
| cols[-1].header("Prediction") | |
| # Sidebar | |
| # File selection | |
| st.sidebar.title("Input selection") | |
| # Disabling warning | |
| st.set_option("deprecation.showfileUploaderEncoding", False) | |
| # Choose your own image | |
| uploaded_file = st.sidebar.file_uploader( | |
| "Upload files", type=["png", "jpeg", "jpg"] | |
| ) | |
| if uploaded_file is not None: | |
| img = Image.open(BytesIO(uploaded_file.read()), mode="r").convert("RGB") | |
| cols[0].image(img, use_column_width=True) | |
| # Model selection | |
| st.sidebar.title("Setup") | |
| tv_model = st.sidebar.selectbox( | |
| "Classification model", | |
| TV_MODELS, | |
| help="Supported models from Torchvision", | |
| ) | |
| # class_choices = [ | |
| # f"{idx + 1} - {class_name}" for idx, class_name in enumerate(LABEL_MAP) | |
| # ] | |
| # class_selection = st.sidebar.selectbox( | |
| # "Class selection", ["Predicted class (argmax)", *class_choices] | |
| # ) | |
| img_configs = {"img_size": [32, 64], "means": None, "stds": None} | |
| # For newline | |
| st.sidebar.write("\n") | |
| if st.sidebar.button("Compute CAM"): | |
| if uploaded_file is None: | |
| st.sidebar.error("Please upload an image first") | |
| else: | |
| with st.spinner("Analyzing..."): | |
| preprocess_steps = [transforms.ToTensor()] | |
| image_size = img_configs["img_size"] | |
| if image_size is not None: | |
| preprocess_steps.append( | |
| transforms.Resize( | |
| [image_size[0], image_size[-1]], | |
| interpolation=transforms.InterpolationMode.BICUBIC, | |
| antialias=True, | |
| ) | |
| ) | |
| means = img_configs["means"] | |
| stds = img_configs["stds"] | |
| if means is not None and stds is not None: | |
| preprocess_steps.append(transforms.Normalize(means, stds)) | |
| preprocess_function = transforms.Compose(preprocess_steps) | |
| input_img = preprocess_function(img) | |
| input_img = input_img.unsqueeze(0).to(device="cpu") | |
| model_configs = { | |
| "model_path": root_path | |
| + "/pre_trained_models/ResNet18/left_eye.pt", | |
| "registered_model_name": "ResNet18", | |
| "num_classes": 1, | |
| } | |
| registered_model_name = model_configs["registered_model_name"] | |
| # default_layer = "" | |
| if tv_model is not None: | |
| with st.spinner("Loading model..."): | |
| model = _load_model(model_configs) | |
| if torch.cuda.is_available(): | |
| model = model.cuda() | |
| if registered_model_name == "ResNet18": | |
| target_layer = model.resnet.layer4[-1].conv2 | |
| elif registered_model_name == "ResNet50": | |
| target_layer = model.resnet.layer4[-1].conv3 | |
| else: | |
| raise Exception( | |
| f"No target layer available for selected model: {registered_model_name}" | |
| ) | |
| # target_layer = st.sidebar.text_input( | |
| # "Target layer", | |
| # default_layer, | |
| # help='If you want to target several layers, add a "+" separator (e.g. "layer3+layer4")', | |
| # ) | |
| cam_method = "CAM" | |
| # cam_method = st.sidebar.selectbox( | |
| # "CAM method", | |
| # CAM_METHODS, | |
| # help="The way your class activation map will be computed", | |
| # ) | |
| if cam_method is not None: | |
| # cam_extractor = methods.__dict__[cam_method]( | |
| # model, | |
| # target_layer=( | |
| # [s.strip() for s in target_layer.split("+")] | |
| # if len(target_layer) > 0 | |
| # else None | |
| # ), | |
| # ) | |
| cam_extractor = torchcam_methods.__dict__[cam_method]( | |
| model, | |
| target_layer=target_layer, | |
| fc_layer=model.resnet.fc, | |
| input_shape=(3, 32, 64), | |
| ) | |
| # with torch.no_grad(): | |
| # if input_mask is not None: | |
| # out = self.model(input_img, input_mask) | |
| # else: | |
| # out = self.model(input_img) | |
| # activation_map = cam_extractor(class_idx=target_class) | |
| # Forward the image to the model | |
| out = model(input_img) | |
| print("out = ", out) | |
| # Select the target class | |
| # if class_selection == "Predicted class (argmax)": | |
| # class_idx = out.squeeze(0).argmax().item() | |
| # else: | |
| # class_idx = LABEL_MAP.index(class_selection.rpartition(" - ")[-1]) | |
| # Retrieve the CAM | |
| # act_maps = cam_extractor(class_idx=target_class) | |
| act_maps = cam_extractor(0, out) | |
| # Fuse the CAMs if there are several | |
| activation_map = ( | |
| act_maps[0] | |
| if len(act_maps) == 1 | |
| else cam_extractor.fuse_cams(act_maps) | |
| ) | |
| # Overlayed CAM | |
| fig, ax = plt.subplots() | |
| result = overlay_mask( | |
| img, to_pil_image(activation_map, mode="F"), alpha=0.5 | |
| ) | |
| ax.imshow(result) | |
| ax.axis("off") | |
| cols[-1].pyplot(fig) | |
| if __name__ == "__main__": | |
| main() | |