Megvii-BaseDetection · weiji14 · Apr 13, 2022 · Apr 14, 2022 · Apr 14, 2022
diff --git a/exps/default/yolox_nano.py b/exps/default/yolox_nano.py
@@ -14,6 +14,7 @@ def __init__(self):
  super(Exp, self).__init__()
  self.depth = 0.33
  self.width = 0.25
+ self.backbone_in_channels = 3
  self.input_size = (416, 416)
  self.random_size = (10, 20)
  self.mosaic_scale = (0.5, 1.5)
@@ -34,8 +35,12 @@ def init_yolo(M):
  in_channels = [256, 512, 1024]
  # NANO model use depthwise = True, which is main difference.
  backbone = YOLOPAFPN(
- self.depth, self.width, in_channels=in_channels,
- act=self.act, depthwise=True,
+ self.depth,
+ self.width,
+ backbone_in_channels=self.backbone_in_channels,
+ in_channels=in_channels,
+ act=self.act,
+ depthwise=True,
  )
  head = YOLOXHead(
  self.num_classes, self.width, in_channels=in_channels,

diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py
@@ -17,6 +17,8 @@ def __init__(self):
  super().__init__()
 
  # ---------------- model config ---------------- #
+ # number of input channels, e.g. 3 for RGB input
+ self.backbone_in_channels = 3
  # detect classes number of model
  self.num_classes = 80
  # factor of model depth
@@ -118,8 +120,16 @@ def init_yolo(M):
 
  if getattr(self, "model", None) is None:
  in_channels = [256, 512, 1024]
- backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act)
- head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act)
+ backbone = YOLOPAFPN(
+ self.depth,
+ self.width,
+ backbone_in_channels=self.backbone_in_channels,
+ in_channels=in_channels,
+ act=self.act,
+ )
+ head = YOLOXHead(
+ self.num_classes, self.width, in_channels=in_channels, act=self.act
+ )
  self.model = YOLOX(backbone, head)
 
  self.model.apply(init_yolo)

diff --git a/yolox/models/build.py b/yolox/models/build.py
@@ -29,7 +29,11 @@
 
 
 def create_yolox_model(
- name: str, pretrained: bool = True, num_classes: int = 80, device=None
+ name: str,
+ pretrained: bool = True,
+ backbone_in_channels: int = 3,
+ num_classes: int = 80,
+ device=None,
 ) -> nn.Module:
  """creates and loads a YOLOX model
 
@@ -48,11 +52,20 @@ def create_yolox_model(
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
  device = torch.device(device)
 
- assert name in _CKPT_FULL_PATH, f"user should use one of value in {_CKPT_FULL_PATH.keys()}"
+ assert (
+ name in _CKPT_FULL_PATH
+ ), f"user should use one of value in {_CKPT_FULL_PATH.keys()}"
  exp: Exp = get_exp(exp_name=name)
+ exp.backbone_in_channels = backbone_in_channels
  exp.num_classes = num_classes
  yolox_model = exp.get_model()
- if pretrained and num_classes == 80:
+ if pretrained:
+ assert (
+ backbone_in_channels == 3
+ ), f"There are no pretrained weights for the model whose number of input channels are {backbone_in_channels}"
+ assert (
+ num_classes == 80
+ ), f"There are no pretrained weights for the model whose number of output classes are {num_classes}"
  weights_url = _CKPT_FULL_PATH[name]
  ckpt = load_state_dict_from_url(weights_url, map_location="cpu")
  if "model" in ckpt:
@@ -63,29 +76,43 @@ def create_yolox_model(
  return yolox_model
 
 
-def yolox_nano(pretrained=True, num_classes=80, device=None):
- return create_yolox_model("yolox-nano", pretrained, num_classes, device)
+def yolox_nano(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+ return create_yolox_model(
+ "yolox-nano", pretrained, backbone_in_channels, num_classes, device
+ )
 
 
-def yolox_tiny(pretrained=True, num_classes=80, device=None):
- return create_yolox_model("yolox-tiny", pretrained, num_classes, device)
+def yolox_tiny(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+ return create_yolox_model(
+ "yolox-tiny", pretrained, backbone_in_channels, num_classes, device
+ )
 
 
-def yolox_s(pretrained=True, num_classes=80, device=None):
- return create_yolox_model("yolox-s", pretrained, num_classes, device)
+def yolox_s(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+ return create_yolox_model(
+ "yolox-s", pretrained, backbone_in_channels, num_classes, device
+ )
 
 
-def yolox_m(pretrained=True, num_classes=80, device=None):
- return create_yolox_model("yolox-m", pretrained, num_classes, device)
+def yolox_m(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+ return create_yolox_model(
+ "yolox-m", pretrained, backbone_in_channels, num_classes, device
+ )
 
 
-def yolox_l(pretrained=True, num_classes=80, device=None):
- return create_yolox_model("yolox-l", pretrained, num_classes, device)
+def yolox_l(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+ return create_yolox_model(
+ "yolox-l", pretrained, backbone_in_channels, num_classes, device
+ )
 
 
-def yolox_x(pretrained=True, num_classes=80, device=None):
- return create_yolox_model("yolox-x", pretrained, num_classes, device)
+def yolox_x(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+ return create_yolox_model(
+ "yolox-x", pretrained, backbone_in_channels, num_classes, device
+ )
 
 
-def yolov3(pretrained=True, num_classes=80, device=None):
- return create_yolox_model("yolox-tiny", pretrained, num_classes, device)
+def yolov3(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+ return create_yolox_model(
+ "yolox-tiny", pretrained, backbone_in_channels, num_classes, device
+ )
diff --git a/yolox/models/darknet.py b/yolox/models/darknet.py
@@ -99,6 +99,7 @@ def __init__(
  self,
  dep_mul,
  wid_mul,
+ in_channels=3,
  out_features=("dark3", "dark4", "dark5"),
  depthwise=False,
  act="silu",
@@ -112,7 +113,7 @@ def __init__(
  base_depth = max(round(dep_mul * 3), 1) # 3
 
  # stem
- self.stem = Focus(3, base_channels, ksize=3, act=act)
+ self.stem = Focus(in_channels, base_channels, ksize=3, act=act)
 
  # dark2
  self.dark2 = nn.Sequential(

diff --git a/yolox/models/yolo_fpn.py b/yolox/models/yolo_fpn.py
@@ -17,11 +17,12 @@ class YOLOFPN(nn.Module):
  def __init__(
  self,
  depth=53,
+ backbone_in_channels=3,
  in_features=["dark3", "dark4", "dark5"],
  ):
  super().__init__()
 
- self.backbone = Darknet(depth)
+ self.backbone = Darknet(depth, in_channels=backbone_in_channels)
  self.in_features = in_features
 
  # out 1

diff --git a/yolox/models/yolo_pafpn.py b/yolox/models/yolo_pafpn.py
@@ -18,13 +18,16 @@ def __init__(
  self,
  depth=1.0,
  width=1.0,
+ backbone_in_channels=3,
  in_features=("dark3", "dark4", "dark5"),
  in_channels=[256, 512, 1024],
  depthwise=False,
  act="silu",
  ):
  super().__init__()
- self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
+ self.backbone = CSPDarknet(
+ depth, width, in_channels=backbone_in_channels, depthwise=depthwise, act=act
+ )
  self.in_features = in_features
  self.in_channels = in_channels
  Conv = DWConv if depthwise else BaseConv