Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Upload 6 files
Browse files- .gitattributes +2 -0
- face_grab.py +67 -0
- gradcam.py +138 -0
- mmod_human_face_detector.dat +0 -0
- requirements.txt +9 -0
- shape_predictor_68_face_landmarks.dat +3 -0
- shape_predictor_68_face_landmarks_GTX.dat +3 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            shape_predictor_68_face_landmarks_GTX.dat filter=lfs diff=lfs merge=lfs -text
         | 
| 37 | 
            +
            shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
         | 
    	
        face_grab.py
    ADDED
    
    | @@ -0,0 +1,67 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import logging
         | 
| 2 | 
            +
            import cv2 as cv
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
            import dlib
         | 
| 5 | 
            +
            from typing import Optional
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            logging.basicConfig(level=logging.INFO)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            class FaceGrabber:
         | 
| 11 | 
            +
                def __init__(self):
         | 
| 12 | 
            +
                    self.cascades = [
         | 
| 13 | 
            +
                        "haarcascade_frontalface_default.xml",
         | 
| 14 | 
            +
                        "haarcascade_frontalface_alt.xml",
         | 
| 15 | 
            +
                        "haarcascade_frontalface_alt2.xml",
         | 
| 16 | 
            +
                        "haarcascade_frontalface_alt_tree.xml"
         | 
| 17 | 
            +
                    ]
         | 
| 18 | 
            +
                    self.detector = dlib.get_frontal_face_detector() # load face detector
         | 
| 19 | 
            +
                    self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks_GTX.dat") # load face predictor
         | 
| 20 | 
            +
                    self.mmod = dlib.cnn_face_detection_model_v1("mmod_human_face_detector.dat") # load face detector
         | 
| 21 | 
            +
                    self.paddingBy = 0.1 # padding by 10%
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def grab_faces(self, img: np.ndarray, bGray: bool = False) -> Optional[np.ndarray]:
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    if bGray:
         | 
| 26 | 
            +
                        img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # convert to grayscale
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                    detected = None
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                    if detected is None:
         | 
| 31 | 
            +
                        faces = self.detector(img) # detect faces
         | 
| 32 | 
            +
                        if len(faces) > 0:
         | 
| 33 | 
            +
                            detected = faces[0]
         | 
| 34 | 
            +
                            detected = (detected.left(), detected.top(), detected.width(), detected.height())
         | 
| 35 | 
            +
                            logging.info("Face detected by dlib")
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    if detected is None:
         | 
| 38 | 
            +
                        faces = self.mmod(img)
         | 
| 39 | 
            +
                        if len(faces) > 0:
         | 
| 40 | 
            +
                            detected = faces[0]
         | 
| 41 | 
            +
                            detected = (detected.rect.left(), detected.rect.top(), detected.rect.width(), detected.rect.height())
         | 
| 42 | 
            +
                            logging.info("Face detected by mmod")
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                    if detected is None:
         | 
| 45 | 
            +
                        for cascade in self.cascades:
         | 
| 46 | 
            +
                            cascadeClassifier = cv.CascadeClassifier(cv.data.haarcascades + cascade)
         | 
| 47 | 
            +
                            faces = cascadeClassifier.detectMultiScale(img, scaleFactor=1.5, minNeighbors=5) # detect faces
         | 
| 48 | 
            +
                            if len(faces) > 0:
         | 
| 49 | 
            +
                                detected = faces[0]
         | 
| 50 | 
            +
                                logging.info(f"Face detected by {cascade}")
         | 
| 51 | 
            +
                                break
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                    if detected is not None: # if face detected
         | 
| 54 | 
            +
                        x, y, w, h = detected # grab first face
         | 
| 55 | 
            +
                        padW = int(self.paddingBy * w) # get padding width
         | 
| 56 | 
            +
                        padH = int(self.paddingBy * h) # get padding height
         | 
| 57 | 
            +
                        imgH, imgW, _ = img.shape # get image dims
         | 
| 58 | 
            +
                        x = max(0, x - padW)
         | 
| 59 | 
            +
                        y = max(0, y - padH)
         | 
| 60 | 
            +
                        w = min(imgW - x, w + 2 * padW)
         | 
| 61 | 
            +
                        h = min(imgH - y, h + 2 * padH)
         | 
| 62 | 
            +
                        x = max(0, x - (w - detected[2]) // 2) # center the face horizontally
         | 
| 63 | 
            +
                        y = max(0, y - (h - detected[3]) // 2) # center the face vertically
         | 
| 64 | 
            +
                        face = img[y:y+h, x:x+w] # crop face
         | 
| 65 | 
            +
                        return face
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                    return None
         | 
    	
        gradcam.py
    ADDED
    
    | @@ -0,0 +1,138 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from transformers import ViTFeatureExtractor, ViTForImageClassification
         | 
| 2 | 
            +
            import warnings
         | 
| 3 | 
            +
            from torchvision import transforms
         | 
| 4 | 
            +
            from datasets import load_dataset
         | 
| 5 | 
            +
            from pytorch_grad_cam import run_dff_on_image, GradCAM
         | 
| 6 | 
            +
            from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
         | 
| 7 | 
            +
            from pytorch_grad_cam.utils.image import show_cam_on_image
         | 
| 8 | 
            +
            from PIL import Image
         | 
| 9 | 
            +
            import numpy as np
         | 
| 10 | 
            +
            import cv2 as cv
         | 
| 11 | 
            +
            import torch
         | 
| 12 | 
            +
            from typing import List, Callable, Optional
         | 
| 13 | 
            +
            import logging
         | 
| 14 | 
            +
            from face_grab import FaceGrabber
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # original borrowed from https://github.com/jacobgil/pytorch-grad-cam/blob/master/tutorials/HuggingFace.ipynb
         | 
| 17 | 
            +
            # thanks @jacobgil
         | 
| 18 | 
            +
            # further mods beyond this commit by @simonSlamka
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            warnings.filterwarnings("ignore")
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            logging.basicConfig(level=logging.INFO)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
             | 
| 26 | 
            +
            class HuggingfaceToTensorModelWrapper(torch.nn.Module):
         | 
| 27 | 
            +
                def __init__(self, model):
         | 
| 28 | 
            +
                    super(HuggingfaceToTensorModelWrapper, self).__init__()
         | 
| 29 | 
            +
                    self.model = model
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                def forward(self, x):
         | 
| 32 | 
            +
                    return self.model(x).logits
         | 
| 33 | 
            +
             | 
| 34 | 
            +
             | 
| 35 | 
            +
             | 
| 36 | 
            +
            class GradCam():
         | 
| 37 | 
            +
                def __init__(self):
         | 
| 38 | 
            +
                    pass
         | 
| 39 | 
            +
                
         | 
| 40 | 
            +
                def category_name_to_index(self, model, category_name):
         | 
| 41 | 
            +
                    name_to_index = dict((v, k) for k, v in model.config.id2label.items())
         | 
| 42 | 
            +
                    return name_to_index[category_name]
         | 
| 43 | 
            +
                    
         | 
| 44 | 
            +
                def run_grad_cam_on_image(self, model: torch.nn.Module,
         | 
| 45 | 
            +
                                        target_layer: torch.nn.Module,
         | 
| 46 | 
            +
                                        targets_for_gradcam: List[Callable],
         | 
| 47 | 
            +
                                        reshape_transform: Optional[Callable],
         | 
| 48 | 
            +
                                        input_tensor: torch.nn.Module,
         | 
| 49 | 
            +
                                        input_image: Image,
         | 
| 50 | 
            +
                                        method: Callable=GradCAM,
         | 
| 51 | 
            +
                                        threshold: float=0.5):
         | 
| 52 | 
            +
                    with method(model=HuggingfaceToTensorModelWrapper(model),
         | 
| 53 | 
            +
                                target_layers=[target_layer],
         | 
| 54 | 
            +
                                reshape_transform=reshape_transform) as cam:
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                        # Replicate the tensor for each of the categories we want to create Grad-CAM for:
         | 
| 57 | 
            +
                        repeated_tensor = input_tensor[None, :].repeat(len(targets_for_gradcam), 1, 1, 1)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                        batch_results = cam(input_tensor=repeated_tensor,
         | 
| 60 | 
            +
                                            targets=targets_for_gradcam)
         | 
| 61 | 
            +
                        results = []
         | 
| 62 | 
            +
                        for grayscale_cam in batch_results:
         | 
| 63 | 
            +
                            grayscale_cam[grayscale_cam < threshold] = 0
         | 
| 64 | 
            +
                            visualization = show_cam_on_image(np.float32(input_image)/255,
         | 
| 65 | 
            +
                                                            grayscale_cam,
         | 
| 66 | 
            +
                                                            use_rgb=True)
         | 
| 67 | 
            +
                            # Make it weight less in the notebook:
         | 
| 68 | 
            +
                            visualization = cv.resize(visualization,
         | 
| 69 | 
            +
                                                    (visualization.shape[1]//2, visualization.shape[0]//2))
         | 
| 70 | 
            +
                            results.append(visualization)
         | 
| 71 | 
            +
                        return np.hstack(results)
         | 
| 72 | 
            +
                    
         | 
| 73 | 
            +
                    
         | 
| 74 | 
            +
                def get_top_category(self, model, img_tensor, top_k=5):
         | 
| 75 | 
            +
                    logits = model(img_tensor.unsqueeze(0)).logits
         | 
| 76 | 
            +
                    probabilities = torch.nn.functional.softmax(logits, dim=1)
         | 
| 77 | 
            +
                    topIdx = logits.cpu()[0, :].detach().numpy().argsort()[-1]
         | 
| 78 | 
            +
                    topClass = model.config.id2label[topIdx]
         | 
| 79 | 
            +
                    topScore = probabilities[0][topIdx].item()
         | 
| 80 | 
            +
                    return [{"label": topClass, "score": topScore}]
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                def reshape_transform_vit_huggingface(self, x):
         | 
| 83 | 
            +
                    activations = x[:, 1:, :]
         | 
| 84 | 
            +
                    activations = activations.view(activations.shape[0],
         | 
| 85 | 
            +
                                                14, 14, activations.shape[2])
         | 
| 86 | 
            +
                    activations = activations.transpose(2, 3).transpose(1, 2)
         | 
| 87 | 
            +
                    return activations
         | 
| 88 | 
            +
             | 
| 89 | 
            +
             | 
| 90 | 
            +
             | 
| 91 | 
            +
            if __name__ == "__main__":
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                faceGrabber = FaceGrabber()
         | 
| 94 | 
            +
                gradCam = GradCam()
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                image = Image.open("Feature-Image-74.jpg").convert("RGB")
         | 
| 97 | 
            +
                face = faceGrabber.grab_faces(np.array(image))
         | 
| 98 | 
            +
                if face is not None:
         | 
| 99 | 
            +
                    image = Image.fromarray(face)
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                img_tensor = transforms.ToTensor()(image)
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                model = ViTForImageClassification.from_pretrained("ongkn/attraction-classifier")
         | 
| 104 | 
            +
                targets_for_gradcam = [ClassifierOutputTarget(gradCam.category_name_to_index(model, "pos")),
         | 
| 105 | 
            +
                                    ClassifierOutputTarget(gradCam.category_name_to_index(model, "neg"))]
         | 
| 106 | 
            +
                target_layer_dff = model.vit.layernorm
         | 
| 107 | 
            +
                target_layer_gradcam = model.vit.encoder.layer[-2].output
         | 
| 108 | 
            +
                image_resized = image.resize((224, 224))
         | 
| 109 | 
            +
                tensor_resized = transforms.ToTensor()(image_resized)
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                dff_image = run_dff_on_image(model=model,
         | 
| 112 | 
            +
                                            target_layer=target_layer_dff,
         | 
| 113 | 
            +
                                            classifier=model.classifier,
         | 
| 114 | 
            +
                                            img_pil=image_resized,
         | 
| 115 | 
            +
                                            img_tensor=tensor_resized,
         | 
| 116 | 
            +
                                            reshape_transform=gradCam.reshape_transform_vit_huggingface,
         | 
| 117 | 
            +
                                            n_components=5,
         | 
| 118 | 
            +
                                            top_k=10,
         | 
| 119 | 
            +
                                            threshold=0,
         | 
| 120 | 
            +
                                            output_size=None) #(500, 500))
         | 
| 121 | 
            +
                cv.namedWindow("DFF Image", cv.WINDOW_KEEPRATIO)
         | 
| 122 | 
            +
                cv.imshow("DFF Image", cv.cvtColor(dff_image, cv.COLOR_BGR2RGB))
         | 
| 123 | 
            +
                cv.resizeWindow("DFF Image", 2500, 700)
         | 
| 124 | 
            +
                # cv.waitKey(0)
         | 
| 125 | 
            +
                # cv.destroyAllWindows()
         | 
| 126 | 
            +
                grad_cam_image = gradCam.run_grad_cam_on_image(model=model,
         | 
| 127 | 
            +
                                                    target_layer=target_layer_gradcam,
         | 
| 128 | 
            +
                                                    targets_for_gradcam=targets_for_gradcam,
         | 
| 129 | 
            +
                                                    input_tensor=tensor_resized,
         | 
| 130 | 
            +
                                                    input_image=image_resized,
         | 
| 131 | 
            +
                                                    reshape_transform=gradCam.reshape_transform_vit_huggingface,
         | 
| 132 | 
            +
                                                    threshold=0)
         | 
| 133 | 
            +
                cv.namedWindow("Grad-CAM Image", cv.WINDOW_KEEPRATIO)
         | 
| 134 | 
            +
                cv.imshow("Grad-CAM Image", grad_cam_image)
         | 
| 135 | 
            +
                cv.resizeWindow("Grad-CAM Image", 2000, 1250)
         | 
| 136 | 
            +
                cv.waitKey(0)
         | 
| 137 | 
            +
                cv.destroyAllWindows()
         | 
| 138 | 
            +
                gradCam.print_top_categories(model, tensor_resized)
         | 
    	
        mmod_human_face_detector.dat
    ADDED
    
    | Binary file (730 kB). View file | 
|  | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,9 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            gradio
         | 
| 2 | 
            +
            transformers
         | 
| 3 | 
            +
            numpy
         | 
| 4 | 
            +
            Pillow
         | 
| 5 | 
            +
            opencv-python-headless
         | 
| 6 | 
            +
            dlib
         | 
| 7 | 
            +
            torch
         | 
| 8 | 
            +
            grad-cam
         | 
| 9 | 
            +
            torchvision
         | 
    	
        shape_predictor_68_face_landmarks.dat
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
         | 
| 3 | 
            +
            size 99693937
         | 
    	
        shape_predictor_68_face_landmarks_GTX.dat
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:249a69a1d5f2d7c714a92934d35367d46eb52dc308d46717e82d49e8386b3b80
         | 
| 3 | 
            +
            size 66435981
         | 
