Spaces:

Gradio-Blocks
/

CBNetV2

Running

App Files Files Community

hysts HF Staff commited on May 29, 2022

Commit

3fe1151

1 Parent(s): 4e0c895

Add files

Browse files

Files changed (19) hide show

.gitattributes +2 -0
.gitignore +1 -0
.gitmodules +3 -0
.pre-commit-config.yaml +46 -0
.style.yapf +5 -0
CBNetV2 +1 -0
README.md +1 -1
app.py +240 -0
images/README.md +9 -0
images/pexels-element-digital-1370295.jpg +3 -0
images/pexels-elle-hughes-1549196.jpg +3 -0
images/pexels-jean-van-der-meulen-1599791.jpg +3 -0
images/pexels-mark-stebnicki-2255935.jpg +3 -0
images/pexels-oleksandr-pidvalnyi-1031698.jpg +3 -0
images/pexels-pixabay-45170.jpg +3 -0
images/pexels-trang-doan-1132047.jpg +3 -0
palette.py +273 -0
patch +834 -0
requirements.txt +8 -0

.gitattributes CHANGED Viewed

@@ -1,3 +1,5 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ weights/

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "CBNetV2"]
+	path = CBNetV2
+	url = https://github.com/VDIGPKU/CBNetV2

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+exclude: ^(CBNetV2/|patch)
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.2.0
+  hooks:
+  - id: check-executables-have-shebangs
+  - id: check-json
+  - id: check-merge-conflict
+  - id: check-shebang-scripts-are-executable
+  - id: check-toml
+  - id: check-yaml
+  - id: double-quote-string-fixer
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+    args: ['--fix=lf']
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
+- repo: https://github.com/myint/docformatter
+  rev: v1.4
+  hooks:
+  - id: docformatter
+    args: ['--in-place']
+- repo: https://github.com/pycqa/isort
+  rev: 5.10.1
+  hooks:
+    - id: isort
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.812
+  hooks:
+    - id: mypy
+      args: ['--ignore-missing-imports']
+- repo: https://github.com/google/yapf
+  rev: v0.32.0
+  hooks:
+  - id: yapf
+    args: ['--parallel', '--in-place']
+- repo: https://github.com/kynan/nbstripout
+  rev: 0.5.0
+  hooks:
+    - id: nbstripout
+      args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
+- repo: https://github.com/nbQA-dev/nbQA
+  rev: 1.3.1
+  hooks:
+    - id: nbqa-isort
+    - id: nbqa-yapf

.style.yapf ADDED Viewed

	@@ -0,0 +1,5 @@

+[style]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = false
+spaces_before_comment = 2
+split_before_logical_operator = true

CBNetV2 ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit a546be507af55a17c96dc18a85f86b17656ff814

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 📉
 colorFrom: gray
 colorTo: green
 sdk: gradio
-sdk_version: 3.0.6
 app_file: app.py
 pinned: false
 ---

 colorFrom: gray
 colorTo: green
 sdk: gradio
+sdk_version: 3.0.5
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,240 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import argparse
+import os
+import pathlib
+import subprocess
+import sys
+if os.getenv('SYSTEM') == 'spaces':
+    import mim
+    mim.uninstall('mmcv-full', confirm_yes=True)
+    mim.install('mmcv-full==1.5.0', is_yes=True)
+    subprocess.call('pip uninstall -y opencv-python'.split())
+    subprocess.call('pip uninstall -y opencv-python-headless'.split())
+    subprocess.call('pip install opencv-python-headless==4.5.5.64'.split())
+    subprocess.call('git apply ../patch'.split(), cwd='CBNetV2')
+    subprocess.call('mv palette.py CBNetV2/mmdet/core/visualization/'.split())
+import gradio as gr
+import numpy as np
+import torch
+import torch.nn as nn
+sys.path.insert(0, 'CBNetV2/')
+from mmdet.apis import inference_detector, init_detector
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    return parser.parse_args()
+class Model:
+    def __init__(self, device: str | torch.device):
+        self.device = torch.device(device)
+        self.models = self._load_models()
+        self.model_name = 'Improved HTC (DB-Swin-B)'
+    def _load_models(self) -> dict[str, nn.Module]:
+        model_dict = {
+            'Faster R-CNN (DB-ResNet50)': {
+                'config':
+                'CBNetV2/configs/cbnet/faster_rcnn_cbv2d1_r50_fpn_1x_coco.py',
+                'model':
+                'https://github.com/CBNetwork/storage/releases/download/v1.0.0/faster_rcnn_cbv2d1_r50_fpn_1x_coco.pth.zip',
+            },
+            'Mask R-CNN (DB-Swin-T)': {
+                'config':
+                'CBNetV2/configs/cbnet/mask_rcnn_cbv2_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py',
+                'model':
+                'https://github.com/CBNetwork/storage/releases/download/v1.0.0/mask_rcnn_cbv2_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.pth.zip',
+            },
+            #        'Cascade Mask R-CNN (DB-Swin-S)': {
+            #            'config':
+            #                'CBNetV2/configs/cbnet/cascade_mask_rcnn_cbv2_swin_small_patch4_window7_mstrain_400-1400_adamw_3x_coco.py',
+            #            'model':
+            #                'https://github.com/CBNetwork/storage/releases/download/v1.0.0/cascade_mask_rcnn_cbv2_swin_small_patch4_window7_mstrain_400-1400_adamw_3x_coco.pth.zip',
+            #        },
+            'Improved HTC (DB-Swin-B)': {
+                'config':
+                'CBNetV2/configs/cbnet/htc_cbv2_swin_base_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_20e_coco.py',
+                'model':
+                'https://github.com/CBNetwork/storage/releases/download/v1.0.0/htc_cbv2_swin_base22k_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_20e_coco.pth.zip',
+            },
+            'Improved HTC (DB-Swin-L)': {
+                'config':
+                'CBNetV2/configs/cbnet/htc_cbv2_swin_large_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_1x_coco.py',
+                'model':
+                'https://github.com/CBNetwork/storage/releases/download/v1.0.0/htc_cbv2_swin_large22k_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_1x_coco.pth.zip',
+            },
+            'Improved HTC (DB-Swin-L (TTA))': {
+                'config':
+                'CBNetV2/configs/cbnet/htc_cbv2_swin_large_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_1x_coco.py',
+                'model':
+                'https://github.com/CBNetwork/storage/releases/download/v1.0.0/htc_cbv2_swin_large22k_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_1x_coco.pth.zip',
+            },
+        }
+        weight_dir = pathlib.Path('weights')
+        weight_dir.mkdir(exist_ok=True)
+        def _download(model_name: str, out_dir: pathlib.Path) -> None:
+            import zipfile
+            model_url = model_dict[model_name]['model']
+            zip_name = model_url.split('/')[-1]
+            out_path = out_dir / zip_name
+            if out_path.exists():
+                return
+            torch.hub.download_url_to_file(model_url, out_path)
+            with zipfile.ZipFile(out_path) as f:
+                f.extractall(out_dir)
+        def _get_model_path(model_name: str) -> str:
+            model_url = model_dict[model_name]['model']
+            model_name = model_url.split('/')[-1][:-4]
+            return (weight_dir / model_name).as_posix()
+        for model_name in model_dict:
+            _download(model_name, weight_dir)
+        models = {
+            key: init_detector(dic['config'],
+                               _get_model_path(key),
+                               device=self.device)
+            for key, dic in model_dict.items()
+        }
+        return models
+    def set_model_name(self, name: str) -> None:
+        self.model_name = name
+    def detect_and_visualize(
+            self, image: np.ndarray,
+            score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
+        out = self.detect(image)
+        vis = self.visualize_detection_results(image, out, score_threshold)
+        return out, vis
+    def detect(self, image: np.ndarray) -> list[np.ndarray]:
+        image = image[:, :, ::-1]  # RGB -> BGR
+        model = self.models[self.model_name]
+        out = inference_detector(model, image)
+        return out
+    def visualize_detection_results(
+            self,
+            image: np.ndarray,
+            detection_results: list[np.ndarray],
+            score_threshold: float = 0.3) -> np.ndarray:
+        image = image[:, :, ::-1]  # RGB -> BGR
+        model = self.models[self.model_name]
+        vis = model.show_result(image,
+                                detection_results,
+                                score_thr=score_threshold,
+                                bbox_color=None,
+                                text_color=(200, 200, 200),
+                                mask_color=None)
+        return vis[:, :, ::-1]  # BGR -> RGB
+def set_example_image(example: list) -> dict:
+    return gr.Image.update(value=example[0])
+def main():
+    args = parse_args()
+    model = Model(args.device)
+    css = '''
+h1#title {
+  text-align: center;
+}
+'''
+    with gr.Blocks(theme=args.theme, css=css) as demo:
+        gr.Markdown('''<h1 id="title">VDIGPKU/CBNetV2</h1>
+This is an unofficial demo for [https://github.com/VDIGPKU/CBNetV2](https://github.com/VDIGPKU/CBNetV2).'''
+                    )
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    input_image = gr.Image(label='Input Image', type='numpy')
+                with gr.Row():
+                    detector_name = gr.Dropdown(list(model.models.keys()),
+                                                value=model.model_name,
+                                                label='Detector')
+                with gr.Row():
+                    detect_button = gr.Button(value='Detect')
+                    detection_results = gr.Variable()
+            with gr.Column():
+                detection_visualization = gr.Image(label='Detection Result',
+                                                   type='numpy')
+                visualization_score_threshold = gr.Slider(
+                    0,
+                    1,
+                    step=0.05,
+                    value=0.3,
+                    label='Visualization Score Threshold')
+                redraw_button = gr.Button(value='Redraw')
+        with gr.Row():
+            paths = sorted(pathlib.Path('images').rglob('*.jpg'))
+            example_images = gr.Dataset(components=[input_image],
+                                        samples=[[path.as_posix()]
+                                                 for path in paths])
+        gr.Markdown(
+            '<center><img src="https://visitor-badge.glitch.me/badge?page_id=hysts.cbnetv2" alt="visitor badge"/></center>'
+        )
+        detector_name.change(fn=model.set_model_name,
+                             inputs=[detector_name],
+                             outputs=None)
+        detect_button.click(fn=model.detect_and_visualize,
+                            inputs=[
+                                input_image,
+                                visualization_score_threshold,
+                            ],
+                            outputs=[
+                                detection_results,
+                                detection_visualization,
+                            ])
+        redraw_button.click(fn=model.visualize_detection_results,
+                            inputs=[
+                                input_image,
+                                detection_results,
+                                visualization_score_threshold,
+                            ],
+                            outputs=[detection_visualization])
+        example_images.click(fn=set_example_image,
+                             inputs=[example_images],
+                             outputs=[input_image])
+    demo.launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
+    )
+if __name__ == '__main__':
+    main()

images/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+These images are freely-usable ones from https://www.pexels.com/.
+- https://www.pexels.com/photo/assorted-color-kittens-45170/
+- https://www.pexels.com/photo/white-wooden-kitchen-cabinet-1599791/
+- https://www.pexels.com/photo/assorted-books-on-book-shelves-1370295/
+- https://www.pexels.com/photo/pile-of-assorted-varieties-of-vegetables-2255935/
+- https://www.pexels.com/photo/sliced-fruits-on-tray-1132047/
+- https://www.pexels.com/photo/group-of-people-carrying-surfboards-1549196/
+- https://www.pexels.com/photo/aerial-photo-of-vehicles-in-the-city-1031698/

images/pexels-element-digital-1370295.jpg ADDED Viewed

Git LFS Details

SHA256: 5b2885e968ebf6b8e84ecab708d953a5feede203f488038ce3c9cb5bcfc52562
Pointer size: 131 Bytes
Size of remote file: 738 kB

images/pexels-elle-hughes-1549196.jpg ADDED Viewed

Git LFS Details

SHA256: 6d3854839113cc0570044ac87be9d7116b1d1ffe0ba6c81eff60923fa9030b25
Pointer size: 131 Bytes
Size of remote file: 609 kB

images/pexels-jean-van-der-meulen-1599791.jpg ADDED Viewed

Git LFS Details

SHA256: 0f34538e583afda98666158a40154b09316757d89b465b318e182a1758859a88
Pointer size: 131 Bytes
Size of remote file: 333 kB

images/pexels-mark-stebnicki-2255935.jpg ADDED Viewed

Git LFS Details

SHA256: 778b11664deb3c332345a327444898d776b50b4038e39c8ef79e416dc183f752
Pointer size: 131 Bytes
Size of remote file: 417 kB

images/pexels-oleksandr-pidvalnyi-1031698.jpg ADDED Viewed

Git LFS Details

SHA256: 2de03892c6f6624a45fc67a283aeb33d4534fecc76e51d83e43fe40b58ce2351
Pointer size: 131 Bytes
Size of remote file: 670 kB

images/pexels-pixabay-45170.jpg ADDED Viewed

Git LFS Details

SHA256: 114e83cc896e9df6476fd682c8d0dc8fafb76e8cf1da5f6396280040081ee1ca
Pointer size: 131 Bytes
Size of remote file: 681 kB

images/pexels-trang-doan-1132047.jpg ADDED Viewed

Git LFS Details

SHA256: e0eaec57002e3fbb1199d5ddc09df277fd168fc88b36359c2a9d8ef20ea18c28
Pointer size: 131 Bytes
Size of remote file: 519 kB

palette.py ADDED Viewed

	@@ -0,0 +1,273 @@

+"""
+This file is copied from https://github.com/open-mmlab/mmdetection/blob/v2.24.1/mmdet/core/visualization/palette.py
+The LICENSE of mmdetection is the following:
+```
+Copyright 2018-2023 OpenMMLab. All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2018-2023 OpenMMLab.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+```
+"""
+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import numpy as np
+def palette_val(palette):
+    """Convert palette to matplotlib palette.
+    Args:
+        palette List[tuple]: A list of color tuples.
+    Returns:
+        List[tuple[float]]: A list of RGB matplotlib color tuples.
+    """
+    new_palette = []
+    for color in palette:
+        color = [c / 255 for c in color]
+        new_palette.append(tuple(color))
+    return new_palette
+def get_palette(palette, num_classes):
+    """Get palette from various inputs.
+    Args:
+        palette (list[tuple] | str | tuple | :obj:`Color`): palette inputs.
+        num_classes (int): the number of classes.
+    Returns:
+        list[tuple[int]]: A list of color tuples.
+    """
+    assert isinstance(num_classes, int)
+    if isinstance(palette, list):
+        dataset_palette = palette
+    elif isinstance(palette, tuple):
+        dataset_palette = [palette] * num_classes
+    elif palette == 'random' or palette is None:
+        state = np.random.get_state()
+        # random color
+        np.random.seed(42)
+        palette = np.random.randint(0, 256, size=(num_classes, 3))
+        np.random.set_state(state)
+        dataset_palette = [tuple(c) for c in palette]
+    elif palette == 'coco':
+        from mmdet.datasets import CocoDataset, CocoPanopticDataset
+        dataset_palette = CocoDataset.PALETTE
+        if len(dataset_palette) < num_classes:
+            dataset_palette = CocoPanopticDataset.PALETTE
+    elif palette == 'citys':
+        from mmdet.datasets import CityscapesDataset
+        dataset_palette = CityscapesDataset.PALETTE
+    elif palette == 'voc':
+        from mmdet.datasets import VOCDataset
+        dataset_palette = VOCDataset.PALETTE
+    elif mmcv.is_str(palette):
+        dataset_palette = [mmcv.color_val(palette)[::-1]] * num_classes
+    else:
+        raise TypeError(f'Invalid type for palette: {type(palette)}')
+    assert len(dataset_palette) >= num_classes, \
+        'The length of palette should not be less than `num_classes`.'
+    return dataset_palette

patch ADDED Viewed

	@@ -0,0 +1,834 @@

+diff --git a/configs/cbnet/cascade_rcnn_cbv2d1_r2_101_mdconv_fpn_20e_fp16_ms400-1400_giou_4conv1f_coco.py b/configs/cbnet/cascade_rcnn_cbv2d1_r2_101_mdconv_fpn_20e_fp16_ms400-1400_giou_4conv1f_coco.py
+index 167d4379..7c0bd239 100644
+--- a/configs/cbnet/cascade_rcnn_cbv2d1_r2_101_mdconv_fpn_20e_fp16_ms400-1400_giou_4conv1f_coco.py
++++ b/configs/cbnet/cascade_rcnn_cbv2d1_r2_101_mdconv_fpn_20e_fp16_ms400-1400_giou_4conv1f_coco.py
+@@ -2,9 +2,9 @@ _base_ = '../res2net/cascade_rcnn_r2_101_fpn_20e_coco.py'
+ model = dict(
+     backbone=dict(
+-        type='CBRes2Net',
++        type='CBRes2Net',
+         cb_del_stages=1,
+-        cb_inplanes=[64, 256, 512, 1024, 2048],
++        cb_inplanes=[64, 256, 512, 1024, 2048],
+         dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+         stage_with_dcn=(False, True, True, True)
+     ),
+@@ -28,7 +28,7 @@ model = dict(
+                     target_stds=[0.1, 0.1, 0.2, 0.2]),
+                 reg_class_agnostic=False,
+                 reg_decoded_bbox=True,
+-                norm_cfg=dict(type='SyncBN', requires_grad=True),
++                norm_cfg=dict(type='BN', requires_grad=True),
+                 loss_cls=dict(
+                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
+@@ -47,7 +47,7 @@ model = dict(
+                     target_stds=[0.05, 0.05, 0.1, 0.1]),
+                 reg_class_agnostic=False,
+                 reg_decoded_bbox=True,
+-                norm_cfg=dict(type='SyncBN', requires_grad=True),
++                norm_cfg=dict(type='BN', requires_grad=True),
+                 loss_cls=dict(
+                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
+@@ -66,7 +66,7 @@ model = dict(
+                     target_stds=[0.033, 0.033, 0.067, 0.067]),
+                 reg_class_agnostic=False,
+                 reg_decoded_bbox=True,
+-                norm_cfg=dict(type='SyncBN', requires_grad=True),
++                norm_cfg=dict(type='BN', requires_grad=True),
+                 loss_cls=dict(
+                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0))
+diff --git a/configs/cbnet/htc_cbv2_swin_base_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_20e_coco.py b/configs/cbnet/htc_cbv2_swin_base_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_20e_coco.py
+index 51edfd62..a7434c5d 100644
+--- a/configs/cbnet/htc_cbv2_swin_base_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_20e_coco.py
++++ b/configs/cbnet/htc_cbv2_swin_base_patch4_window7_mstrain_400-1400_giou_4conv1f_adamw_20e_coco.py
+@@ -18,7 +18,7 @@ model = dict(
+                     target_stds=[0.1, 0.1, 0.2, 0.2]),
+                 reg_class_agnostic=True,
+                 reg_decoded_bbox=True,
+-                norm_cfg=dict(type='SyncBN', requires_grad=True),
++                norm_cfg=dict(type='BN', requires_grad=True),
+                 loss_cls=dict(
+                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
+@@ -37,7 +37,7 @@ model = dict(
+                     target_stds=[0.05, 0.05, 0.1, 0.1]),
+                 reg_class_agnostic=True,
+                 reg_decoded_bbox=True,
+-                norm_cfg=dict(type='SyncBN', requires_grad=True),
++                norm_cfg=dict(type='BN', requires_grad=True),
+                 loss_cls=dict(
+                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
+@@ -56,7 +56,7 @@ model = dict(
+                     target_stds=[0.033, 0.033, 0.067, 0.067]),
+                 reg_class_agnostic=True,
+                 reg_decoded_bbox=True,
+-                norm_cfg=dict(type='SyncBN', requires_grad=True),
++                norm_cfg=dict(type='BN', requires_grad=True),
+                 loss_cls=dict(
+                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0))
+diff --git a/mmdet/__init__.py b/mmdet/__init__.py
+index 646ee84e..9e846286 100644
+--- a/mmdet/__init__.py
++++ b/mmdet/__init__.py
+@@ -20,9 +20,9 @@ mmcv_maximum_version = '1.4.0'
+ mmcv_version = digit_version(mmcv.__version__)
+-assert (mmcv_version >= digit_version(mmcv_minimum_version)
+-        and mmcv_version <= digit_version(mmcv_maximum_version)), \
+-    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+-    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
++#assert (mmcv_version >= digit_version(mmcv_minimum_version)
++#        and mmcv_version <= digit_version(mmcv_maximum_version)), \
++#    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
++#    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
+ __all__ = ['__version__', 'short_version']
+diff --git a/mmdet/core/mask/structures.py b/mmdet/core/mask/structures.py
+index 6f5a62ae..a9d0ebb4 100644
+--- a/mmdet/core/mask/structures.py
++++ b/mmdet/core/mask/structures.py
+@@ -1,3 +1,4 @@
++# Copyright (c) OpenMMLab. All rights reserved.
+ from abc import ABCMeta, abstractmethod
+ import cv2
+@@ -528,6 +529,21 @@ class BitmapMasks(BaseInstanceMasks):
+         self = cls(masks, height=height, width=width)
+         return self
++    def get_bboxes(self):
++        num_masks = len(self)
++        boxes = np.zeros((num_masks, 4), dtype=np.float32)
++        x_any = self.masks.any(axis=1)
++        y_any = self.masks.any(axis=2)
++        for idx in range(num_masks):
++            x = np.where(x_any[idx, :])[0]
++            y = np.where(y_any[idx, :])[0]
++            if len(x) > 0 and len(y) > 0:
++                # use +1 for x_max and y_max so that the right and bottom
++                # boundary of instance masks are fully included by the box
++                boxes[idx, :] = np.array([x[0], y[0], x[-1] + 1, y[-1] + 1],
++                                         dtype=np.float32)
++        return boxes
++
+ class PolygonMasks(BaseInstanceMasks):
+     """This class represents masks in the form of polygons.
+@@ -637,8 +653,8 @@ class PolygonMasks(BaseInstanceMasks):
+                 resized_poly = []
+                 for p in poly_per_obj:
+                     p = p.copy()
+-                    p[0::2] *= w_scale
+-                    p[1::2] *= h_scale
++                    p[0::2] = p[0::2] * w_scale
++                    p[1::2] = p[1::2] * h_scale
+                     resized_poly.append(p)
+                 resized_masks.append(resized_poly)
+             resized_masks = PolygonMasks(resized_masks, *out_shape)
+@@ -690,8 +706,8 @@ class PolygonMasks(BaseInstanceMasks):
+                 for p in poly_per_obj:
+                     # pycocotools will clip the boundary
+                     p = p.copy()
+-                    p[0::2] -= bbox[0]
+-                    p[1::2] -= bbox[1]
++                    p[0::2] = p[0::2] - bbox[0]
++                    p[1::2] = p[1::2] - bbox[1]
+                     cropped_poly_per_obj.append(p)
+                 cropped_masks.append(cropped_poly_per_obj)
+             cropped_masks = PolygonMasks(cropped_masks, h, w)
+@@ -736,12 +752,12 @@ class PolygonMasks(BaseInstanceMasks):
+                 p = p.copy()
+                 # crop
+                 # pycocotools will clip the boundary
+-                p[0::2] -= bbox[0]
+-                p[1::2] -= bbox[1]
++                p[0::2] = p[0::2] - bbox[0]
++                p[1::2] = p[1::2] - bbox[1]
+                 # resize
+-                p[0::2] *= w_scale
+-                p[1::2] *= h_scale
++                p[0::2] = p[0::2] * w_scale
++                p[1::2] = p[1::2] * h_scale
+                 resized_mask.append(p)
+             resized_masks.append(resized_mask)
+         return PolygonMasks(resized_masks, *out_shape)
+@@ -944,6 +960,7 @@ class PolygonMasks(BaseInstanceMasks):
+                 a list of vertices, in CCW order.
+             """
+             from scipy.stats import truncnorm
++
+             # Generate around the unit circle
+             cx, cy = (0.0, 0.0)
+             radius = 1
+@@ -1019,6 +1036,24 @@ class PolygonMasks(BaseInstanceMasks):
+         self = cls(masks, height, width)
+         return self
++    def get_bboxes(self):
++        num_masks = len(self)
++        boxes = np.zeros((num_masks, 4), dtype=np.float32)
++        for idx, poly_per_obj in enumerate(self.masks):
++            # simply use a number that is big enough for comparison with
++            # coordinates
++            xy_min = np.array([self.width * 2, self.height * 2],
++                              dtype=np.float32)
++            xy_max = np.zeros(2, dtype=np.float32)
++            for p in poly_per_obj:
++                xy = np.array(p).reshape(-1, 2).astype(np.float32)
++                xy_min = np.minimum(xy_min, np.min(xy, axis=0))
++                xy_max = np.maximum(xy_max, np.max(xy, axis=0))
++            boxes[idx, :2] = xy_min
++            boxes[idx, 2:] = xy_max
++
++        return boxes
++
+ def polygon_to_bitmap(polygons, height, width):
+     """Convert masks from the form of polygons to bitmaps.
+@@ -1035,3 +1070,33 @@ def polygon_to_bitmap(polygons, height, width):
+     rle = maskUtils.merge(rles)
+     bitmap_mask = maskUtils.decode(rle).astype(np.bool)
+     return bitmap_mask
++
++
++def bitmap_to_polygon(bitmap):
++    """Convert masks from the form of bitmaps to polygons.
++
++    Args:
++        bitmap (ndarray): masks in bitmap representation.
++
++    Return:
++        list[ndarray]: the converted mask in polygon representation.
++        bool: whether the mask has holes.
++    """
++    bitmap = np.ascontiguousarray(bitmap).astype(np.uint8)
++    # cv2.RETR_CCOMP: retrieves all of the contours and organizes them
++    #   into a two-level hierarchy. At the top level, there are external
++    #   boundaries of the components. At the second level, there are
++    #   boundaries of the holes. If there is another contour inside a hole
++    #   of a connected component, it is still put at the top level.
++    # cv2.CHAIN_APPROX_NONE: stores absolutely all the contour points.
++    outs = cv2.findContours(bitmap, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
++    contours = outs[-2]
++    hierarchy = outs[-1]
++    if hierarchy is None:
++        return [], False
++    # hierarchy[i]: 4 elements, for the indexes of next, previous,
++    # parent, or nested contours. If there is no corresponding contour,
++    # it will be -1.
++    with_hole = (hierarchy.reshape(-1, 4)[:, 3] >= 0).any()
++    contours = [c.reshape(-1, 2) for c in contours]
++    return contours, with_hole
+diff --git a/mmdet/core/visualization/image.py b/mmdet/core/visualization/image.py
+index 5a148384..66f82a38 100644
+--- a/mmdet/core/visualization/image.py
++++ b/mmdet/core/visualization/image.py
+@@ -1,3 +1,5 @@
++# Copyright (c) OpenMMLab. All rights reserved.
++import cv2
+ import matplotlib.pyplot as plt
+ import mmcv
+ import numpy as np
+@@ -5,17 +7,25 @@ import pycocotools.mask as mask_util
+ from matplotlib.collections import PatchCollection
+ from matplotlib.patches import Polygon
++#from mmdet.core.evaluation.panoptic_utils import INSTANCE_OFFSET
++from ..mask.structures import bitmap_to_polygon
+ from ..utils import mask2ndarray
++from .palette import get_palette, palette_val
++
++__all__ = [
++    'color_val_matplotlib', 'draw_masks', 'draw_bboxes', 'draw_labels',
++    'imshow_det_bboxes', 'imshow_gt_det_bboxes'
++]
+ EPS = 1e-2
+ def color_val_matplotlib(color):
+     """Convert various input in BGR order to normalized RGB matplotlib color
+-    tuples,
++    tuples.
+     Args:
+-        color (:obj:`Color`/str/tuple/int/ndarray): Color inputs
++        color (:obj`Color` | str | tuple | int | ndarray): Color inputs.
+     Returns:
+         tuple[float]: A tuple of 3 normalized floats indicating RGB channels.
+@@ -25,9 +35,177 @@ def color_val_matplotlib(color):
+     return tuple(color)
++def _get_adaptive_scales(areas, min_area=800, max_area=30000):
++    """Get adaptive scales according to areas.
++
++    The scale range is [0.5, 1.0]. When the area is less than
++    ``'min_area'``, the scale is 0.5 while the area is larger than
++    ``'max_area'``, the scale is 1.0.
++
++    Args:
++        areas (ndarray): The areas of bboxes or masks with the
++            shape of (n, ).
++        min_area (int): Lower bound areas for adaptive scales.
++            Default: 800.
++        max_area (int): Upper bound areas for adaptive scales.
++            Default: 30000.
++
++    Returns:
++        ndarray: The adaotive scales with the shape of (n, ).
++    """
++    scales = 0.5 + (areas - min_area) / (max_area - min_area)
++    scales = np.clip(scales, 0.5, 1.0)
++    return scales
++
++
++def _get_bias_color(base, max_dist=30):
++    """Get different colors for each masks.
++
++    Get different colors for each masks by adding a bias
++    color to the base category color.
++    Args:
++        base (ndarray): The base category color with the shape
++            of (3, ).
++        max_dist (int): The max distance of bias. Default: 30.
++
++    Returns:
++        ndarray: The new color for a mask with the shape of (3, ).
++    """
++    new_color = base + np.random.randint(
++        low=-max_dist, high=max_dist + 1, size=3)
++    return np.clip(new_color, 0, 255, new_color)
++
++
++def draw_bboxes(ax, bboxes, color='g', alpha=0.8, thickness=2):
++    """Draw bounding boxes on the axes.
++
++    Args:
++        ax (matplotlib.Axes): The input axes.
++        bboxes (ndarray): The input bounding boxes with the shape
++            of (n, 4).
++        color (list[tuple] | matplotlib.color): the colors for each
++            bounding boxes.
++        alpha (float): Transparency of bounding boxes. Default: 0.8.
++        thickness (int): Thickness of lines. Default: 2.
++
++    Returns:
++        matplotlib.Axes: The result axes.
++    """
++    polygons = []
++    for i, bbox in enumerate(bboxes):
++        bbox_int = bbox.astype(np.int32)
++        poly = [[bbox_int[0], bbox_int[1]], [bbox_int[0], bbox_int[3]],
++                [bbox_int[2], bbox_int[3]], [bbox_int[2], bbox_int[1]]]
++        np_poly = np.array(poly).reshape((4, 2))
++        polygons.append(Polygon(np_poly))
++    p = PatchCollection(
++        polygons,
++        facecolor='none',
++        edgecolors=color,
++        linewidths=thickness,
++        alpha=alpha)
++    ax.add_collection(p)
++
++    return ax
++
++
++def draw_labels(ax,
++                labels,
++                positions,
++                scores=None,
++                class_names=None,
++                color='w',
++                font_size=8,
++                scales=None,
++                horizontal_alignment='left'):
++    """Draw labels on the axes.
++
++    Args:
++        ax (matplotlib.Axes): The input axes.
++        labels (ndarray): The labels with the shape of (n, ).
++        positions (ndarray): The positions to draw each labels.
++        scores (ndarray): The scores for each labels.
++        class_names (list[str]): The class names.
++        color (list[tuple] | matplotlib.color): The colors for labels.
++        font_size (int): Font size of texts. Default: 8.
++        scales (list[float]): Scales of texts. Default: None.
++        horizontal_alignment (str): The horizontal alignment method of
++            texts. Default: 'left'.
++
++    Returns:
++        matplotlib.Axes: The result axes.
++    """
++    for i, (pos, label) in enumerate(zip(positions, labels)):
++        label_text = class_names[
++            label] if class_names is not None else f'class {label}'
++        if scores is not None:
++            label_text += f'|{scores[i]:.02f}'
++        text_color = color[i] if isinstance(color, list) else color
++
++        font_size_mask = font_size if scales is None else font_size * scales[i]
++        ax.text(
++            pos[0],
++            pos[1],
++            f'{label_text}',
++            bbox={
++                'facecolor': 'black',
++                'alpha': 0.8,
++                'pad': 0.7,
++                'edgecolor': 'none'
++            },
++            color=text_color,
++            fontsize=font_size_mask,
++            verticalalignment='top',
++            horizontalalignment=horizontal_alignment)
++
++    return ax
++
++
++def draw_masks(ax, img, masks, color=None, with_edge=True, alpha=0.8):
++    """Draw masks on the image and their edges on the axes.
++
++    Args:
++        ax (matplotlib.Axes): The input axes.
++        img (ndarray): The image with the shape of (3, h, w).
++        masks (ndarray): The masks with the shape of (n, h, w).
++        color (ndarray): The colors for each masks with the shape
++            of (n, 3).
++        with_edge (bool): Whether to draw edges. Default: True.
++        alpha (float): Transparency of bounding boxes. Default: 0.8.
++
++    Returns:
++        matplotlib.Axes: The result axes.
++        ndarray: The result image.
++    """
++    taken_colors = set([0, 0, 0])
++    if color is None:
++        random_colors = np.random.randint(0, 255, (masks.size(0), 3))
++        color = [tuple(c) for c in random_colors]
++        color = np.array(color, dtype=np.uint8)
++    polygons = []
++    for i, mask in enumerate(masks):
++        if with_edge:
++            contours, _ = bitmap_to_polygon(mask)
++            polygons += [Polygon(c) for c in contours]
++
++        color_mask = color[i]
++        while tuple(color_mask) in taken_colors:
++            color_mask = _get_bias_color(color_mask)
++        taken_colors.add(tuple(color_mask))
++
++        mask = mask.astype(bool)
++        img[mask] = img[mask] * (1 - alpha) + color_mask * alpha
++
++    p = PatchCollection(
++        polygons, facecolor='none', edgecolors='w', linewidths=1, alpha=0.8)
++    ax.add_collection(p)
++
++    return ax, img
++
++
+ def imshow_det_bboxes(img,
+-                      bboxes,
+-                      labels,
++                      bboxes=None,
++                      labels=None,
+                       segms=None,
+                       class_names=None,
+                       score_thr=0,
+@@ -35,7 +213,7 @@ def imshow_det_bboxes(img,
+                       text_color='green',
+                       mask_color=None,
+                       thickness=2,
+-                      font_size=13,
++                      font_size=8,
+                       win_name='',
+                       show=True,
+                       wait_time=0,
+@@ -43,43 +221,51 @@ def imshow_det_bboxes(img,
+     """Draw bboxes and class labels (with scores) on an image.
+     Args:
+-        img (str or ndarray): The image to be displayed.
++        img (str | ndarray): The image to be displayed.
+         bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or
+             (n, 5).
+         labels (ndarray): Labels of bboxes.
+-        segms (ndarray or None): Masks, shaped (n,h,w) or None
++        segms (ndarray | None): Masks, shaped (n,h,w) or None.
+         class_names (list[str]): Names of each classes.
+-        score_thr (float): Minimum score of bboxes to be shown.  Default: 0
+-        bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines.
+-           The tuple of color should be in BGR order. Default: 'green'
+-        text_color (str or tuple(int) or :obj:`Color`):Color of texts.
+-           The tuple of color should be in BGR order. Default: 'green'
+-        mask_color (str or tuple(int) or :obj:`Color`, optional):
+-           Color of masks. The tuple of color should be in BGR order.
+-           Default: None
+-        thickness (int): Thickness of lines. Default: 2
+-        font_size (int): Font size of texts. Default: 13
+-        show (bool): Whether to show the image. Default: True
+-        win_name (str): The window name. Default: ''
++        score_thr (float): Minimum score of bboxes to be shown. Default: 0.
++        bbox_color (list[tuple] | tuple | str | None): Colors of bbox lines.
++           If a single color is given, it will be applied to all classes.
++           The tuple of color should be in RGB order. Default: 'green'.
++        text_color (list[tuple] | tuple | str | None): Colors of texts.
++           If a single color is given, it will be applied to all classes.
++           The tuple of color should be in RGB order. Default: 'green'.
++        mask_color (list[tuple] | tuple | str | None, optional): Colors of
++           masks. If a single color is given, it will be applied to all
++           classes. The tuple of color should be in RGB order.
++           Default: None.
++        thickness (int): Thickness of lines. Default: 2.
++        font_size (int): Font size of texts. Default: 13.
++        show (bool): Whether to show the image. Default: True.
++        win_name (str): The window name. Default: ''.
+         wait_time (float): Value of waitKey param. Default: 0.
+         out_file (str, optional): The filename to write the image.
+-            Default: None
++            Default: None.
+     Returns:
+         ndarray: The image with bboxes drawn on it.
+     """
+-    assert bboxes.ndim == 2, \
++    assert bboxes is None or bboxes.ndim == 2, \
+         f' bboxes ndim should be 2, but its ndim is {bboxes.ndim}.'
+     assert labels.ndim == 1, \
+         f' labels ndim should be 1, but its ndim is {labels.ndim}.'
+-    assert bboxes.shape[0] == labels.shape[0], \
+-        'bboxes.shape[0] and labels.shape[0] should have the same length.'
+-    assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5, \
++    assert bboxes is None or bboxes.shape[1] == 4 or bboxes.shape[1] == 5, \
+         f' bboxes.shape[1] should be 4 or 5, but its {bboxes.shape[1]}.'
++    assert bboxes is None or bboxes.shape[0] <= labels.shape[0], \
++        'labels.shape[0] should not be less than bboxes.shape[0].'
++    assert segms is None or segms.shape[0] == labels.shape[0], \
++        'segms.shape[0] and labels.shape[0] should have the same length.'
++    assert segms is not None or bboxes is not None, \
++        'segms and bboxes should not be None at the same time.'
++
+     img = mmcv.imread(img).astype(np.uint8)
+     if score_thr > 0:
+-        assert bboxes.shape[1] == 5
++        assert bboxes is not None and bboxes.shape[1] == 5
+         scores = bboxes[:, -1]
+         inds = scores > score_thr
+         bboxes = bboxes[inds, :]
+@@ -87,25 +273,6 @@ def imshow_det_bboxes(img,
+         if segms is not None:
+             segms = segms[inds, ...]
+-    mask_colors = []
+-    if labels.shape[0] > 0:
+-        if mask_color is None:
+-            # random color
+-            np.random.seed(42)
+-            mask_colors = [
+-                np.random.randint(0, 256, (1, 3), dtype=np.uint8)
+-                for _ in range(max(labels) + 1)
+-            ]
+-        else:
+-            # specify  color
+-            mask_colors = [
+-                np.array(mmcv.color_val(mask_color)[::-1], dtype=np.uint8)
+-            ] * (
+-                max(labels) + 1)
+-
+-    bbox_color = color_val_matplotlib(bbox_color)
+-    text_color = color_val_matplotlib(text_color)
+-
+     img = mmcv.bgr2rgb(img)
+     width, height = img.shape[1], img.shape[0]
+     img = np.ascontiguousarray(img)
+@@ -123,44 +290,64 @@ def imshow_det_bboxes(img,
+     ax = plt.gca()
+     ax.axis('off')
+-    polygons = []
+-    color = []
+-    for i, (bbox, label) in enumerate(zip(bboxes, labels)):
+-        bbox_int = bbox.astype(np.int32)
+-        poly = [[bbox_int[0], bbox_int[1]], [bbox_int[0], bbox_int[3]],
+-                [bbox_int[2], bbox_int[3]], [bbox_int[2], bbox_int[1]]]
+-        np_poly = np.array(poly).reshape((4, 2))
+-        polygons.append(Polygon(np_poly))
+-        color.append(bbox_color)
+-        label_text = class_names[
+-            label] if class_names is not None else f'class {label}'
+-        if len(bbox) > 4:
+-            label_text += f'|{bbox[-1]:.02f}'
+-        ax.text(
+-            bbox_int[0],
+-            bbox_int[1],
+-            f'{label_text}',
+-            bbox={
+-                'facecolor': 'black',
+-                'alpha': 0.8,
+-                'pad': 0.7,
+-                'edgecolor': 'none'
+-            },
+-            color=text_color,
+-            fontsize=font_size,
+-            verticalalignment='top',
+-            horizontalalignment='left')
+-        if segms is not None:
+-            color_mask = mask_colors[labels[i]]
+-            mask = segms[i].astype(bool)
+-            img[mask] = img[mask] * 0.5 + color_mask * 0.5
++    max_label = int(max(labels) if len(labels) > 0 else 0)
++    text_palette = palette_val(get_palette(text_color, max_label + 1))
++    text_colors = [text_palette[label] for label in labels]
++
++    num_bboxes = 0
++    if bboxes is not None:
++        num_bboxes = bboxes.shape[0]
++        bbox_palette = palette_val(get_palette(bbox_color, max_label + 1))
++        colors = [bbox_palette[label] for label in labels[:num_bboxes]]
++        draw_bboxes(ax, bboxes, colors, alpha=0.8, thickness=thickness)
++
++        horizontal_alignment = 'left'
++        positions = bboxes[:, :2].astype(np.int32) + thickness
++        areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
++        scales = _get_adaptive_scales(areas)
++        scores = bboxes[:, 4] if bboxes.shape[1] == 5 else None
++        draw_labels(
++            ax,
++            labels[:num_bboxes],
++            positions,
++            scores=scores,
++            class_names=class_names,
++            color=text_colors,
++            font_size=font_size,
++            scales=scales,
++            horizontal_alignment=horizontal_alignment)
++
++    if segms is not None:
++        mask_palette = get_palette(mask_color, max_label + 1)
++        colors = [mask_palette[label] for label in labels]
++        colors = np.array(colors, dtype=np.uint8)
++        draw_masks(ax, img, segms, colors, with_edge=True)
++
++        if num_bboxes < segms.shape[0]:
++            segms = segms[num_bboxes:]
++            horizontal_alignment = 'center'
++            areas = []
++            positions = []
++            for mask in segms:
++                _, _, stats, centroids = cv2.connectedComponentsWithStats(
++                    mask.astype(np.uint8), connectivity=8)
++                largest_id = np.argmax(stats[1:, -1]) + 1
++                positions.append(centroids[largest_id])
++                areas.append(stats[largest_id, -1])
++            areas = np.stack(areas, axis=0)
++            scales = _get_adaptive_scales(areas)
++            draw_labels(
++                ax,
++                labels[num_bboxes:],
++                positions,
++                class_names=class_names,
++                color=text_colors,
++                font_size=font_size,
++                scales=scales,
++                horizontal_alignment=horizontal_alignment)
+     plt.imshow(img)
+-    p = PatchCollection(
+-        polygons, facecolor='none', edgecolors=color, linewidths=thickness)
+-    ax.add_collection(p)
+-
+     stream, _ = canvas.print_to_buffer()
+     buffer = np.frombuffer(stream, dtype='uint8')
+     img_rgba = buffer.reshape(height, width, 4)
+@@ -191,12 +378,12 @@ def imshow_gt_det_bboxes(img,
+                          result,
+                          class_names=None,
+                          score_thr=0,
+-                         gt_bbox_color=(255, 102, 61),
+-                         gt_text_color=(255, 102, 61),
+-                         gt_mask_color=(255, 102, 61),
+-                         det_bbox_color=(72, 101, 241),
+-                         det_text_color=(72, 101, 241),
+-                         det_mask_color=(72, 101, 241),
++                         gt_bbox_color=(61, 102, 255),
++                         gt_text_color=(200, 200, 200),
++                         gt_mask_color=(61, 102, 255),
++                         det_bbox_color=(241, 101, 72),
++                         det_text_color=(200, 200, 200),
++                         det_mask_color=(241, 101, 72),
+                          thickness=2,
+                          font_size=13,
+                          win_name='',
+@@ -206,54 +393,75 @@ def imshow_gt_det_bboxes(img,
+     """General visualization GT and result function.
+     Args:
+-      img (str or ndarray): The image to be displayed.)
++      img (str | ndarray): The image to be displayed.
+       annotation (dict): Ground truth annotations where contain keys of
+-          'gt_bboxes' and 'gt_labels' or 'gt_masks'
+-      result (tuple[list] or list): The detection result, can be either
++          'gt_bboxes' and 'gt_labels' or 'gt_masks'.
++      result (tuple[list] | list): The detection result, can be either
+           (bbox, segm) or just bbox.
+       class_names (list[str]): Names of each classes.
+-      score_thr (float): Minimum score of bboxes to be shown.  Default: 0
+-      gt_bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines.
+-           The tuple of color should be in BGR order. Default: (255, 102, 61)
+-      gt_text_color (str or tuple(int) or :obj:`Color`):Color of texts.
+-           The tuple of color should be in BGR order. Default: (255, 102, 61)
+-      gt_mask_color (str or tuple(int) or :obj:`Color`, optional):
+-           Color of masks. The tuple of color should be in BGR order.
+-           Default: (255, 102, 61)
+-      det_bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines.
+-           The tuple of color should be in BGR order. Default: (72, 101, 241)
+-      det_text_color (str or tuple(int) or :obj:`Color`):Color of texts.
+-           The tuple of color should be in BGR order. Default: (72, 101, 241)
+-      det_mask_color (str or tuple(int) or :obj:`Color`, optional):
+-           Color of masks. The tuple of color should be in BGR order.
+-           Default: (72, 101, 241)
+-      thickness (int): Thickness of lines. Default: 2
+-      font_size (int): Font size of texts. Default: 13
+-      win_name (str): The window name. Default: ''
+-      show (bool): Whether to show the image. Default: True
++      score_thr (float): Minimum score of bboxes to be shown. Default: 0.
++      gt_bbox_color (list[tuple] | tuple | str | None): Colors of bbox lines.
++          If a single color is given, it will be applied to all classes.
++          The tuple of color should be in RGB order. Default: (61, 102, 255).
++      gt_text_color (list[tuple] | tuple | str | None): Colors of texts.
++          If a single color is given, it will be applied to all classes.
++          The tuple of color should be in RGB order. Default: (200, 200, 200).
++      gt_mask_color (list[tuple] | tuple | str | None, optional): Colors of
++          masks. If a single color is given, it will be applied to all classes.
++          The tuple of color should be in RGB order. Default: (61, 102, 255).
++      det_bbox_color (list[tuple] | tuple | str | None):Colors of bbox lines.
++          If a single color is given, it will be applied to all classes.
++          The tuple of color should be in RGB order. Default: (241, 101, 72).
++      det_text_color (list[tuple] | tuple | str | None):Colors of texts.
++          If a single color is given, it will be applied to all classes.
++          The tuple of color should be in RGB order. Default: (200, 200, 200).
++      det_mask_color (list[tuple] | tuple | str | None, optional): Color of
++          masks. If a single color is given, it will be applied to all classes.
++          The tuple of color should be in RGB order. Default: (241, 101, 72).
++      thickness (int): Thickness of lines. Default: 2.
++      font_size (int): Font size of texts. Default: 13.
++      win_name (str): The window name. Default: ''.
++      show (bool): Whether to show the image. Default: True.
+       wait_time (float): Value of waitKey param. Default: 0.
+       out_file (str, optional): The filename to write the image.
+-         Default: None
++          Default: None.
+     Returns:
+         ndarray: The image with bboxes or masks drawn on it.
+     """
+     assert 'gt_bboxes' in annotation
+     assert 'gt_labels' in annotation
+-    assert isinstance(
+-        result,
+-        (tuple, list)), f'Expected tuple or list, but get {type(result)}'
++    assert isinstance(result, (tuple, list, dict)), 'Expected ' \
++        f'tuple or list or dict, but get {type(result)}'
++    gt_bboxes = annotation['gt_bboxes']
++    gt_labels = annotation['gt_labels']
+     gt_masks = annotation.get('gt_masks', None)
+     if gt_masks is not None:
+         gt_masks = mask2ndarray(gt_masks)
++    gt_seg = annotation.get('gt_semantic_seg', None)
++    if gt_seg is not None:
++        pad_value = 255  # the padding value of gt_seg
++        sem_labels = np.unique(gt_seg)
++        all_labels = np.concatenate((gt_labels, sem_labels), axis=0)
++        all_labels, counts = np.unique(all_labels, return_counts=True)
++        stuff_labels = all_labels[np.logical_and(counts < 2,
++                                                 all_labels != pad_value)]
++        stuff_masks = gt_seg[None] == stuff_labels[:, None, None]
++        gt_labels = np.concatenate((gt_labels, stuff_labels), axis=0)
++        gt_masks = np.concatenate((gt_masks, stuff_masks.astype(np.uint8)),
++                                  axis=0)
++        # If you need to show the bounding boxes,
++        # please comment the following line
++        # gt_bboxes = None
++
+     img = mmcv.imread(img)
+     img = imshow_det_bboxes(
+         img,
+-        annotation['gt_bboxes'],
+-        annotation['gt_labels'],
++        gt_bboxes,
++        gt_labels,
+         gt_masks,
+         class_names=class_names,
+         bbox_color=gt_bbox_color,
+@@ -264,25 +472,38 @@ def imshow_gt_det_bboxes(img,
+         win_name=win_name,
+         show=False)
+-    if isinstance(result, tuple):
+-        bbox_result, segm_result = result
+-        if isinstance(segm_result, tuple):
+-            segm_result = segm_result[0]  # ms rcnn
++    if not isinstance(result, dict):
++        if isinstance(result, tuple):
++            bbox_result, segm_result = result
++            if isinstance(segm_result, tuple):
++                segm_result = segm_result[0]  # ms rcnn
++        else:
++            bbox_result, segm_result = result, None
++
++        bboxes = np.vstack(bbox_result)
++        labels = [
++            np.full(bbox.shape[0], i, dtype=np.int32)
++            for i, bbox in enumerate(bbox_result)
++        ]
++        labels = np.concatenate(labels)
++
++        segms = None
++        if segm_result is not None and len(labels) > 0:  # non empty
++            segms = mmcv.concat_list(segm_result)
++            segms = mask_util.decode(segms)
++            segms = segms.transpose(2, 0, 1)
+     else:
+-        bbox_result, segm_result = result, None
+-
+-    bboxes = np.vstack(bbox_result)
+-    labels = [
+-        np.full(bbox.shape[0], i, dtype=np.int32)
+-        for i, bbox in enumerate(bbox_result)
+-    ]
+-    labels = np.concatenate(labels)
+-
+-    segms = None
+-    if segm_result is not None and len(labels) > 0:  # non empty
+-        segms = mmcv.concat_list(segm_result)
+-        segms = mask_util.decode(segms)
+-        segms = segms.transpose(2, 0, 1)
++        assert class_names is not None, 'We need to know the number ' \
++                                        'of classes.'
++        VOID = len(class_names)
++        bboxes = None
++        pan_results = result['pan_results']
++        # keep objects ahead
++        ids = np.unique(pan_results)[::-1]
++        legal_indices = ids != VOID
++        ids = ids[legal_indices]
++        labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)
++        segms = (pan_results[None] == ids[:, None, None])
+     img = imshow_det_bboxes(
+         img,

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+mmcv-full==1.5.0
+mmdet==2.24.1
+numpy==1.22.4
+opencv-python-headless==4.5.5.64
+openmim==0.1.5
+timm==0.5.4
+torch==1.11.0
+torchvision==0.12.0