Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Jun 5, 2024

Commit

a7cf768

1 Parent(s): 8942278

♻️ [Refactor] the code in deploy model

Browse files

Files changed (1) hide show

yolo/utils/deploy_utils.py +24 -23

yolo/utils/deploy_utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import torch
 from loguru import logger
 from torch import Tensor
@@ -9,24 +11,24 @@ from yolo.model.yolo import create_model
 class FastModelLoader:
     def __init__(self, cfg: Config):
         self.cfg = cfg
-        self.compiler = self.cfg.task.fast_inference
         if self.compiler not in ["onnx", "trt"]:
-            logger.warning(f"⚠️ {self.compiler} is not supported, if it is spelled wrong? Select origin model")
             self.compiler = None
         if self.cfg.device == "mps" and self.compiler == "trt":
-            logger.warning("🍎 TensorRT does not support MPS devices, select origin model")
             self.compiler = None
-        self.weight = cfg.weight.split(".")[0] + "." + self.compiler
     def load_model(self):
         if self.compiler == "onnx":
-            logger.info("🚀 Try to use ONNX")
             return self._load_onnx_model()
         elif self.compiler == "trt":
-            logger.info("🚀 Try to use TensorRT")
             return self._load_trt_model()
-        else:
-            return create_model(self.cfg)
     def _load_onnx_model(self):
         from onnxruntime import InferenceSession
@@ -37,17 +39,17 @@ class FastModelLoader:
             return [x]
         InferenceSession.__call__ = onnx_forward
         try:
-            ort_session = InferenceSession(self.weight, providers=["CPUExecutionProvider"])
         except Exception as e:
             logger.warning(f"🈳 Error loading ONNX model: {e}")
-            ort_session = self._create_onnx_weight()
         # TODO: Update if GPU onnx unavailable change to cpu
         self.cfg.device = "cpu"
         return ort_session
-    def _create_onnx_weight(self):
         from onnxruntime import InferenceSession
         from torch.onnx import export
@@ -56,34 +58,33 @@ class FastModelLoader:
         export(
             model,
             dummy_input,
-            self.weight,
             input_names=["input"],
             output_names=["output"],
             dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
         )
-        logger.info(f"📥 ONNX model saved to {self.weight} ")
-        return InferenceSession(self.weight, providers=["CPUExecutionProvider"])
     def _load_trt_model(self):
         from torch2trt import TRTModule
-        model_trt = TRTModule()
         try:
             model_trt = TRTModule()
-            model_trt.load_state_dict(torch.load(self.weight))
         except FileNotFoundError:
-            logger.warning(f"🈳 No found model weight at {self.weight}")
-            model_trt = self._create_trt_weight()
         return model_trt
-    def _create_trt_weight(self):
         from torch2trt import torch2trt
         model = create_model(self.cfg).eval()
         dummy_input = torch.ones((1, 3, *self.cfg.image_size))
         logger.info(f"♻️ Creating TensorRT model")
         model_trt = torch2trt(model, [dummy_input])
-        torch.save(model_trt.state_dict(), self.weight)
-        logger.info(f"📥 TensorRT model saved to {self.weight}")
         return model_trt

+import os
 import torch
 from loguru import logger
 from torch import Tensor
 class FastModelLoader:
     def __init__(self, cfg: Config):
         self.cfg = cfg
+        self.compiler = cfg.task.fast_inference
+        self._validate_compiler()
+        self.model_path = f"{os.path.splitext(cfg.weight)[0]}.{self.compiler}"
+    def _validate_compiler(self):
         if self.compiler not in ["onnx", "trt"]:
+            logger.warning(f"⚠️ Compiler '{self.compiler}' is not supported. Using original model.")
             self.compiler = None
         if self.cfg.device == "mps" and self.compiler == "trt":
+            logger.warning("🍎 TensorRT does not support MPS devices. Using original model.")
             self.compiler = None
     def load_model(self):
         if self.compiler == "onnx":
             return self._load_onnx_model()
         elif self.compiler == "trt":
             return self._load_trt_model()
+        return create_model(self.cfg)
     def _load_onnx_model(self):
         from onnxruntime import InferenceSession
             return [x]
         InferenceSession.__call__ = onnx_forward
         try:
+            ort_session = InferenceSession(self.model_path)
+            logger.info("🚀 Using ONNX as MODEL frameworks!")
         except Exception as e:
             logger.warning(f"🈳 Error loading ONNX model: {e}")
+            ort_session = self._create_onnx_model()
         # TODO: Update if GPU onnx unavailable change to cpu
         self.cfg.device = "cpu"
         return ort_session
+    def _create_onnx_model(self):
         from onnxruntime import InferenceSession
         from torch.onnx import export
         export(
             model,
             dummy_input,
+            self.model_path,
             input_names=["input"],
             output_names=["output"],
             dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
         )
+        logger.info(f"📥 ONNX model saved to {self.model_path}")
+        return InferenceSession(self.model_path)
     def _load_trt_model(self):
         from torch2trt import TRTModule
         try:
             model_trt = TRTModule()
+            model_trt.load_state_dict(torch.load(self.model_path))
+            logger.info("🚀 Using TensorRT as MODEL frameworks!")
         except FileNotFoundError:
+            logger.warning(f"🈳 No found model weight at {self.model_path}")
+            model_trt = self._create_trt_model()
         return model_trt
+    def _create_trt_model(self):
         from torch2trt import torch2trt
         model = create_model(self.cfg).eval()
         dummy_input = torch.ones((1, 3, *self.cfg.image_size))
         logger.info(f"♻️ Creating TensorRT model")
         model_trt = torch2trt(model, [dummy_input])
+        torch.save(model_trt.state_dict(), self.model_path)
+        logger.info(f"📥 TensorRT model saved to {self.model_path}")
         return model_trt