From 859c67d23b9f1cb32d815c7c847e62036592b79e Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Wed, 4 Sep 2024 14:49:16 +0300
Subject: [PATCH 01/46] update

---
 kornia/contrib/models/common.py               | 14 +++-
 .../rt_detr/architecture/hybrid_encoder.py    | 33 ++++++--
 .../models/rt_detr/architecture/resnet_d.py   | 79 ++++++++++--------
 .../rt_detr/architecture/rtdetr_head.py       | 38 ++++-----
 kornia/contrib/models/rt_detr/model.py        | 83 +++++++++++++++++--
 5 files changed, 176 insertions(+), 71 deletions(-)

diff --git a/kornia/contrib/models/common.py b/kornia/contrib/models/common.py
index 7ad86d54c6..375dec4b43 100644
--- a/kornia/contrib/models/common.py
+++ b/kornia/contrib/models/common.py
@@ -7,9 +7,11 @@
 from kornia.core import Module, Tensor, pad
 
 
+
 class ConvNormAct(nn.Sequential):
     def __init__(
-        self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, act: str = "relu", groups: int = 1
+        self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, act: str = "relu", groups: int = 1,
+        conv_naming: str = "conv", norm_naming: str = "norm", act_naming: str = "act"
     ) -> None:
         super().__init__()
         if kernel_size % 2 == 0:
@@ -23,9 +25,13 @@ def __init__(
             padding = 0
         else:
             padding = (kernel_size - 1) // 2
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, 1, groups, False)
-        self.norm = nn.BatchNorm2d(out_channels)
-        self.act = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True)
+        conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, 1, groups, False)
+        norm = nn.BatchNorm2d(out_channels)
+        act = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True)
+
+        self.__setattr__(conv_naming, conv)
+        self.__setattr__(norm_naming, norm)
+        self.__setattr__(act_naming, act)
 
 
 # Lightly adapted from
diff --git a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
index 6e9573abb9..c319c7cd5e 100644
--- a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
+++ b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 from typing import Optional
+import copy
 
 import torch
 import torch.nn.functional as F
@@ -80,15 +81,16 @@ class AIFI(Module):
     def __init__(self, embed_dim: int, num_heads: int, dim_feedforward: int, dropout: float = 0.0) -> None:
         super().__init__()
         self.self_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout)  # NOTE: batch_first = False
-        self.dropout1 = nn.Dropout(dropout)
-        self.norm1 = nn.LayerNorm(embed_dim)
 
         self.linear1 = nn.Linear(embed_dim, dim_feedforward)
-        self.act = nn.GELU()
         self.dropout = nn.Dropout(dropout)
         self.linear2 = nn.Linear(dim_feedforward, embed_dim)
+
+        self.dropout1 = nn.Dropout(dropout)
         self.dropout2 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(embed_dim)
         self.norm2 = nn.LayerNorm(embed_dim)
+        self.act = nn.GELU()
 
     def forward(self, x: Tensor) -> Tensor:
         # using post-norm
@@ -149,6 +151,20 @@ def build_2d_sincos_pos_emb(
         return pos_emb.unsqueeze(1)  # (H * W, 1, C)
 
 
+class TransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer: nn.Module, num_layers: int) -> None:
+        super(TransformerEncoder, self).__init__()
+        self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(num_layers)])
+        self.num_layers = num_layers
+
+    def forward(self, src: Tensor) -> Tensor:  # NOTE: Missing src_mask: Tensor = None, pos_embed: Tensor = None
+        output = src
+        for layer in self.layers:
+            output = layer(output)
+
+        return output
+
+
 class CCFM(Module):
     def __init__(self, num_fmaps: int, hidden_dim: int, expansion: float = 1.0) -> None:
         super().__init__()
@@ -192,12 +208,17 @@ def forward(self, fmaps: list[Tensor]) -> list[Tensor]:
 class HybridEncoder(Module):
     def __init__(self, in_channels: list[int], hidden_dim: int, dim_feedforward: int, expansion: float = 1.0) -> None:
         super().__init__()
-        self.input_proj = nn.ModuleList([ConvNormAct(in_ch, hidden_dim, 1, act="none") for in_ch in in_channels])
-        self.aifi = AIFI(hidden_dim, 8, dim_feedforward)
+        self.input_proj = nn.ModuleList([
+            ConvNormAct(  # To align the naming strategy for the official weights
+                in_ch, hidden_dim, 1, act="none", conv_naming="0", norm_naming="1", act_naming="2"
+            ) for in_ch in in_channels
+        ])
+        encoder_layer = AIFI(hidden_dim, 8, dim_feedforward)
+        self.encoder = nn.Sequential(TransformerEncoder(encoder_layer, 1))
         self.ccfm = CCFM(len(in_channels), hidden_dim, expansion)
 
     def forward(self, fmaps: list[Tensor]) -> list[Tensor]:
         projected_maps = [proj(fmap) for proj, fmap in zip(self.input_proj, fmaps)]
-        projected_maps[-1] = self.aifi(projected_maps[-1])
+        projected_maps[-1] = self.encoder(projected_maps[-1])
         new_fmaps = self.ccfm(projected_maps)
         return new_fmaps
diff --git a/kornia/contrib/models/rt_detr/architecture/resnet_d.py b/kornia/contrib/models/rt_detr/architecture/resnet_d.py
index 57fa171c82..65e3a7518b 100644
--- a/kornia/contrib/models/rt_detr/architecture/resnet_d.py
+++ b/kornia/contrib/models/rt_detr/architecture/resnet_d.py
@@ -5,6 +5,8 @@
 """
 
 from __future__ import annotations
+from typing import List, Tuple
+from collections import OrderedDict
 
 from torch import nn
 
@@ -15,7 +17,10 @@
 
 def _make_shortcut(in_channels: int, out_channels: int, stride: int) -> Module:
     return (
-        nn.Sequential(nn.AvgPool2d(2, 2), ConvNormAct(in_channels, out_channels, 1, act="none"))
+        nn.Sequential(OrderedDict([
+            ("pool", nn.AvgPool2d(2, 2)),
+            ("conv", ConvNormAct(in_channels, out_channels, 1, act="none"))
+        ]))
         if stride == 2
         else ConvNormAct(in_channels, out_channels, 1, act="none")
     )
@@ -27,15 +32,15 @@ class BasicBlockD(Module):
     def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: bool) -> None:
         KORNIA_CHECK(stride in {1, 2})
         super().__init__()
-        self.convs = nn.Sequential(
-            ConvNormAct(in_channels, out_channels, 3, stride=stride),
-            ConvNormAct(out_channels, out_channels, 3, act="none"),
-        )
-        self.shortcut = nn.Identity() if shortcut else _make_shortcut(in_channels, out_channels, stride)
+        self.convs = nn.Sequential(OrderedDict([
+            ("branch2a", ConvNormAct(in_channels, out_channels, 3, stride=stride)),
+            ("branch2b", ConvNormAct(out_channels, out_channels, 3, act="none")),
+        ]))
+        self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, out_channels, stride)
         self.relu = nn.ReLU(inplace=True)
 
     def forward(self, x: Tensor) -> Tensor:
-        return self.relu(self.convs(x) + self.shortcut(x))
+        return self.relu(self.convs(x) + self.short(x))
 
 
 class BottleneckD(Module):
@@ -45,16 +50,22 @@ def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: b
         KORNIA_CHECK(stride in {1, 2})
         super().__init__()
         expanded_out_channels = out_channels * self.expansion
-        self.convs = nn.Sequential(
-            ConvNormAct(in_channels, out_channels, 1),
-            ConvNormAct(out_channels, out_channels, 3, stride=stride),
-            ConvNormAct(out_channels, expanded_out_channels, 1, act="none"),
-        )
-        self.shortcut = nn.Identity() if shortcut else _make_shortcut(in_channels, expanded_out_channels, stride)
+        self.convs = nn.Sequential(OrderedDict([
+            ("branch2a", ConvNormAct(in_channels, out_channels, 1)),
+            ("branch2b", ConvNormAct(out_channels, out_channels, 3, stride=stride)),
+            ("branch2c", ConvNormAct(out_channels, expanded_out_channels, 1, act="none")),
+        ]))
+        self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, expanded_out_channels, stride)
         self.relu = nn.ReLU(inplace=True)
 
     def forward(self, x: Tensor) -> Tensor:
-        return self.relu(self.convs(x) + self.shortcut(x))
+        return self.relu(self.convs(x) + self.short(x))
+
+
+class Block(nn.Sequential):
+    def __init__(self, blocks: Module) -> None:
+        super().__init__()
+        self.blocks = blocks
 
 
 class ResNetD(Module):
@@ -62,36 +73,38 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD])
         KORNIA_CHECK(len(n_blocks) == 4)
         super().__init__()
         in_channels = 64
-        self.conv1 = nn.Sequential(
-            ConvNormAct(3, in_channels // 2, 3, stride=2),
-            ConvNormAct(in_channels // 2, in_channels // 2, 3),
-            ConvNormAct(in_channels // 2, in_channels, 3),
-            nn.MaxPool2d(3, stride=2, padding=1),
-        )
-
-        self.res2, in_channels = self.make_stage(in_channels, 64, 1, n_blocks[0], block)
-        self.res3, in_channels = self.make_stage(in_channels, 128, 2, n_blocks[1], block)
-        self.res4, in_channels = self.make_stage(in_channels, 256, 2, n_blocks[2], block)
-        self.res5, in_channels = self.make_stage(in_channels, 512, 2, n_blocks[3], block)
+        self.conv1 = nn.Sequential(OrderedDict([
+            ("conv1_1", ConvNormAct(3, in_channels // 2, 3, stride=2)),
+            ("conv1_2", ConvNormAct(in_channels // 2, in_channels // 2, 3)),
+            ("conv1_3", ConvNormAct(in_channels // 2, in_channels, 3)),
+            ("pool", nn.MaxPool2d(3, stride=2, padding=1)),
+        ]))
+
+        res2, in_channels = self.make_stage(in_channels, 64, 1, n_blocks[0], block)
+        res3, in_channels = self.make_stage(in_channels, 128, 2, n_blocks[1], block)
+        res4, in_channels = self.make_stage(in_channels, 256, 2, n_blocks[2], block)
+        res5, in_channels = self.make_stage(in_channels, 512, 2, n_blocks[3], block)
+
+        self.res_layers = nn.ModuleList([res2, res3, res4, res5])
 
         self.out_channels = [ch * block.expansion for ch in [128, 256, 512]]
 
     @staticmethod
     def make_stage(
         in_channels: int, out_channels: int, stride: int, n_blocks: int, block: type[BasicBlockD | BottleneckD]
-    ) -> tuple[Module, int]:
-        stage = nn.Sequential(
+    ) -> Tuple[Module, int]:
+        stage = Block(nn.Sequential(
             block(in_channels, out_channels, stride, False),
             *[block(out_channels * block.expansion, out_channels, 1, True) for _ in range(n_blocks - 1)],
-        )
+        ))
         return stage, out_channels * block.expansion
 
-    def forward(self, x: Tensor) -> list[Tensor]:
+    def forward(self, x: Tensor) -> List[Tensor]:
         x = self.conv1(x)
-        res2 = self.res2(x)
-        res3 = self.res3(res2)
-        res4 = self.res4(res3)
-        res5 = self.res5(res4)
+        res2 = self.res_layers[0](x)
+        res3 = self.res_layers[1](res2)
+        res4 = self.res_layers[2](res3)
+        res5 = self.res_layers[3](res4)
         return [res3, res4, res5]
 
     @staticmethod
diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
index b75e56eca6..4b4dca4f5a 100644
--- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
+++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
@@ -4,6 +4,7 @@
 from __future__ import annotations
 
 from typing import Optional
+import copy
 
 import torch
 from torch import nn
@@ -192,14 +193,14 @@ def forward(
         return out
 
 
-class TransformerDecoder:
-    def __init__(self, hidden_dim: int, decoder_layers: nn.ModuleList, num_layers: int, eval_idx: int = -1) -> None:
+class TransformerDecoder(Module):
+    def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1) -> None:
         super().__init__()
-        self.layers = decoder_layers
+        # self.layers = decoder_layers
         # TODO: come back to this later
-        # self.layers = nn.ModuleList([
-        #    copy.deepcopy(decoder_layer) for _ in range(num_layers)
-        # ])
+        self.layers = nn.ModuleList([
+           copy.deepcopy(decoder_layer) for _ in range(num_layers)
+        ])
         self.hidden_dim = hidden_dim
         self.num_layers = num_layers
         self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx
@@ -274,6 +275,7 @@ def __init__(
         num_decoder_points: int = 4,
         # num_levels: int = 3,
         dropout: float = 0.0,
+        num_denoising: int = 100,
     ) -> None:
         super().__init__()
         self.num_queries = num_queries
@@ -288,25 +290,21 @@ def __init__(
         # https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetr_pytorch/src/zoo/rtdetr/rtdetr_decoder.py#L403-L410
 
         # NOTE: need to be integrated with the TransformerDecoderLayer
-        self.decoder_layers = nn.ModuleList(
-            [
-                TransformerDecoderLayer(
-                    embed_dim=hidden_dim,
-                    num_heads=num_heads,
-                    dropout=dropout,
-                    num_levels=len(in_channels),
-                    num_points=num_decoder_points,
-                )
-                for _ in range(num_decoder_layers)
-            ]
+        decoder_layer = TransformerDecoderLayer(
+            embed_dim=hidden_dim,
+            num_heads=num_heads,
+            dropout=dropout,
+            num_levels=len(in_channels),
+            num_points=num_decoder_points,
         )
 
         self.decoder = TransformerDecoder(
-            hidden_dim=hidden_dim, decoder_layers=self.decoder_layers, num_layers=num_decoder_layers
+            hidden_dim=hidden_dim, decoder_layer=decoder_layer, num_layers=num_decoder_layers
         )
 
         # denoising part
-        self.denoising_class_embed = nn.Embedding(num_classes, hidden_dim)  # not used in evaluation
+        if num_denoising > 0:
+            self.denoising_class_embed = nn.Embedding(num_classes + 1, hidden_dim, padding_idx=num_classes)  # not used in evaluation
 
         # decoder embedding
         self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, num_layers=2)
@@ -334,7 +332,7 @@ def forward(self, feats: Tensor) -> tuple[Tensor, Tensor]:
         )
 
         # decoder
-        out_bboxes, out_logits = self.decoder.forward(
+        out_bboxes, out_logits = self.decoder(
             target,
             init_ref_points_unact,
             memory,
diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 1f250c77aa..dc167af218 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -4,8 +4,10 @@
 
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional
+from typing import Optional, Dict
+import re
 
+import torch
 from kornia.contrib.models.base import ModelBase
 from kornia.contrib.models.rt_detr.architecture.hgnetv2 import PPHGNetV2
 from kornia.contrib.models.rt_detr.architecture.hybrid_encoder import HybridEncoder
@@ -14,6 +16,15 @@
 from kornia.core import Tensor
 
 
+URLs = {
+    'rtdetr_r18vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth',
+    'rtdetr_r34vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth',
+    'rtdetr_r50vd_m': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth',
+    'rtdetr_r50vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth',
+    'rtdetr_r101vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth',
+}
+
+
 class RTDETRModelType(Enum):
     """Enum class that maps RT-DETR model type."""
 
@@ -65,7 +76,7 @@ class RTDETRConfig:
 class RTDETR(ModelBase[RTDETRConfig]):
     """RT-DETR Object Detection model, as described in https://arxiv.org/abs/2304.08069."""
 
-    def __init__(self, backbone: ResNetD | PPHGNetV2, neck: HybridEncoder, head: RTDETRHead):
+    def __init__(self, backbone: ResNetD | PPHGNetV2, encoder: HybridEncoder, decoder: RTDETRHead):
         """Construct RT-DETR Object Detection model.
 
         Args:
@@ -75,8 +86,8 @@ def __init__(self, backbone: ResNetD | PPHGNetV2, neck: HybridEncoder, head: RTD
         """
         super().__init__()
         self.backbone = backbone
-        self.neck = neck
-        self.head = head
+        self.encoder = encoder
+        self.decoder = decoder
 
     @staticmethod
     def from_config(config: RTDETRConfig) -> RTDETR:
@@ -156,6 +167,62 @@ def from_config(config: RTDETRConfig) -> RTDETR:
             model.load_checkpoint(config.checkpoint)
         return model
 
+    def from_pretrained(model_name: str) -> RTDETR:
+        """Load model from pretrained weights.
+
+        Args:
+            model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
+        """
+
+        state_dict = torch.hub.load_state_dict_from_url(
+            URLs[model_name],
+            map_location="cuda:0" if torch.cuda.is_available() else "cpu"
+        )
+
+        def map_name(old_name: str) -> str:
+            # Start with the old name
+            new_name = old_name
+
+            new_name = re.sub('encoder.pan_blocks', 'encoder.ccfm.pan_blocks', new_name)
+            new_name = re.sub('encoder.downsample_convs', 'encoder.ccfm.downsample_convs', new_name)
+            new_name = re.sub('encoder.fpn_blocks', 'encoder.ccfm.fpn_blocks', new_name)
+            new_name = re.sub('encoder.lateral_convs', 'encoder.ccfm.lateral_convs', new_name)
+
+            # Backbone renaming
+            new_name = re.sub(f'.branch2b.', '.convs.branch2b.', new_name)
+            new_name = re.sub(f'.branch2a.', '.convs.branch2a.', new_name)
+            new_name = re.sub(f'.branch2c.', '.convs.branch2c.', new_name)
+
+            return new_name
+
+        def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]:
+
+            state_dict = state_dict["ema"]["module"]
+            new_state_dict = {}
+
+            # Apply the regex-based mapping function to each key
+            for old_name in state_dict.keys():
+                new_name = map_name(old_name)
+                new_state_dict[new_name] = state_dict[old_name]
+
+            return new_state_dict
+
+        if model_name == "rtdetr_r18vd":
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet18d, 80))
+        elif model_name == "rtdetr_r34vd":
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, 80))
+        elif model_name == "rtdetr_r50vd_m":
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80))
+        elif model_name == "rtdetr_r50vd":
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80))
+        elif model_name == "rtdetr_r101vd":
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, 80))
+        else:
+            raise ValueError
+
+        model.load_state_dict(_state_dict_proc(state_dict))
+        return model
+
     def forward(self, images: Tensor) -> tuple[Tensor, Tensor]:
         """Detect objects in an image.
 
@@ -167,10 +234,10 @@ def forward(self, images: Tensor) -> tuple[Tensor, Tensor]:
               :math:`K` is the number of classes.
             - **boxes** - Tensor of shape :math:`(N, Q, 4)`, where :math:`Q` is the number of queries.
         """
-        if self.training:
-            raise RuntimeError("Only evaluation mode is supported. Please call model.eval().")
+        # if self.training:
+        #     raise RuntimeError("Only evaluation mode is supported. Please call model.eval().")
 
         feats = self.backbone(images)
-        feats_buf = self.neck(feats)
-        logits, boxes = self.head(feats_buf)
+        feats_buf = self.encoder(feats)
+        logits, boxes = self.decoder(feats_buf)
         return logits, boxes

From 6522333612d9ca4e6dcbe0050cd28174b71a4468 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Sep 2024 11:50:53 +0000
Subject: [PATCH 02/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/models/common.py               | 13 +++-
 .../rt_detr/architecture/hybrid_encoder.py    | 15 +++--
 .../models/rt_detr/architecture/resnet_d.py   | 62 ++++++++++++-------
 .../rt_detr/architecture/rtdetr_head.py       | 10 +--
 kornia/contrib/models/rt_detr/model.py        | 34 +++++-----
 5 files changed, 78 insertions(+), 56 deletions(-)

diff --git a/kornia/contrib/models/common.py b/kornia/contrib/models/common.py
index 375dec4b43..fcb16ae61a 100644
--- a/kornia/contrib/models/common.py
+++ b/kornia/contrib/models/common.py
@@ -7,11 +7,18 @@
 from kornia.core import Module, Tensor, pad
 
 
-
 class ConvNormAct(nn.Sequential):
     def __init__(
-        self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, act: str = "relu", groups: int = 1,
-        conv_naming: str = "conv", norm_naming: str = "norm", act_naming: str = "act"
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        stride: int = 1,
+        act: str = "relu",
+        groups: int = 1,
+        conv_naming: str = "conv",
+        norm_naming: str = "norm",
+        act_naming: str = "act",
     ) -> None:
         super().__init__()
         if kernel_size % 2 == 0:
diff --git a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
index c319c7cd5e..6d25bcf53f 100644
--- a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
+++ b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
@@ -4,8 +4,8 @@
 
 from __future__ import annotations
 
-from typing import Optional
 import copy
+from typing import Optional
 
 import torch
 import torch.nn.functional as F
@@ -208,11 +208,14 @@ def forward(self, fmaps: list[Tensor]) -> list[Tensor]:
 class HybridEncoder(Module):
     def __init__(self, in_channels: list[int], hidden_dim: int, dim_feedforward: int, expansion: float = 1.0) -> None:
         super().__init__()
-        self.input_proj = nn.ModuleList([
-            ConvNormAct(  # To align the naming strategy for the official weights
-                in_ch, hidden_dim, 1, act="none", conv_naming="0", norm_naming="1", act_naming="2"
-            ) for in_ch in in_channels
-        ])
+        self.input_proj = nn.ModuleList(
+            [
+                ConvNormAct(  # To align the naming strategy for the official weights
+                    in_ch, hidden_dim, 1, act="none", conv_naming="0", norm_naming="1", act_naming="2"
+                )
+                for in_ch in in_channels
+            ]
+        )
         encoder_layer = AIFI(hidden_dim, 8, dim_feedforward)
         self.encoder = nn.Sequential(TransformerEncoder(encoder_layer, 1))
         self.ccfm = CCFM(len(in_channels), hidden_dim, expansion)
diff --git a/kornia/contrib/models/rt_detr/architecture/resnet_d.py b/kornia/contrib/models/rt_detr/architecture/resnet_d.py
index 65e3a7518b..3453ae2a7c 100644
--- a/kornia/contrib/models/rt_detr/architecture/resnet_d.py
+++ b/kornia/contrib/models/rt_detr/architecture/resnet_d.py
@@ -5,8 +5,9 @@
 """
 
 from __future__ import annotations
-from typing import List, Tuple
+
 from collections import OrderedDict
+from typing import List, Tuple
 
 from torch import nn
 
@@ -17,10 +18,9 @@
 
 def _make_shortcut(in_channels: int, out_channels: int, stride: int) -> Module:
     return (
-        nn.Sequential(OrderedDict([
-            ("pool", nn.AvgPool2d(2, 2)),
-            ("conv", ConvNormAct(in_channels, out_channels, 1, act="none"))
-        ]))
+        nn.Sequential(
+            OrderedDict([("pool", nn.AvgPool2d(2, 2)), ("conv", ConvNormAct(in_channels, out_channels, 1, act="none"))])
+        )
         if stride == 2
         else ConvNormAct(in_channels, out_channels, 1, act="none")
     )
@@ -32,10 +32,14 @@ class BasicBlockD(Module):
     def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: bool) -> None:
         KORNIA_CHECK(stride in {1, 2})
         super().__init__()
-        self.convs = nn.Sequential(OrderedDict([
-            ("branch2a", ConvNormAct(in_channels, out_channels, 3, stride=stride)),
-            ("branch2b", ConvNormAct(out_channels, out_channels, 3, act="none")),
-        ]))
+        self.convs = nn.Sequential(
+            OrderedDict(
+                [
+                    ("branch2a", ConvNormAct(in_channels, out_channels, 3, stride=stride)),
+                    ("branch2b", ConvNormAct(out_channels, out_channels, 3, act="none")),
+                ]
+            )
+        )
         self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, out_channels, stride)
         self.relu = nn.ReLU(inplace=True)
 
@@ -50,11 +54,15 @@ def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: b
         KORNIA_CHECK(stride in {1, 2})
         super().__init__()
         expanded_out_channels = out_channels * self.expansion
-        self.convs = nn.Sequential(OrderedDict([
-            ("branch2a", ConvNormAct(in_channels, out_channels, 1)),
-            ("branch2b", ConvNormAct(out_channels, out_channels, 3, stride=stride)),
-            ("branch2c", ConvNormAct(out_channels, expanded_out_channels, 1, act="none")),
-        ]))
+        self.convs = nn.Sequential(
+            OrderedDict(
+                [
+                    ("branch2a", ConvNormAct(in_channels, out_channels, 1)),
+                    ("branch2b", ConvNormAct(out_channels, out_channels, 3, stride=stride)),
+                    ("branch2c", ConvNormAct(out_channels, expanded_out_channels, 1, act="none")),
+                ]
+            )
+        )
         self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, expanded_out_channels, stride)
         self.relu = nn.ReLU(inplace=True)
 
@@ -73,12 +81,16 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD])
         KORNIA_CHECK(len(n_blocks) == 4)
         super().__init__()
         in_channels = 64
-        self.conv1 = nn.Sequential(OrderedDict([
-            ("conv1_1", ConvNormAct(3, in_channels // 2, 3, stride=2)),
-            ("conv1_2", ConvNormAct(in_channels // 2, in_channels // 2, 3)),
-            ("conv1_3", ConvNormAct(in_channels // 2, in_channels, 3)),
-            ("pool", nn.MaxPool2d(3, stride=2, padding=1)),
-        ]))
+        self.conv1 = nn.Sequential(
+            OrderedDict(
+                [
+                    ("conv1_1", ConvNormAct(3, in_channels // 2, 3, stride=2)),
+                    ("conv1_2", ConvNormAct(in_channels // 2, in_channels // 2, 3)),
+                    ("conv1_3", ConvNormAct(in_channels // 2, in_channels, 3)),
+                    ("pool", nn.MaxPool2d(3, stride=2, padding=1)),
+                ]
+            )
+        )
 
         res2, in_channels = self.make_stage(in_channels, 64, 1, n_blocks[0], block)
         res3, in_channels = self.make_stage(in_channels, 128, 2, n_blocks[1], block)
@@ -93,10 +105,12 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD])
     def make_stage(
         in_channels: int, out_channels: int, stride: int, n_blocks: int, block: type[BasicBlockD | BottleneckD]
     ) -> Tuple[Module, int]:
-        stage = Block(nn.Sequential(
-            block(in_channels, out_channels, stride, False),
-            *[block(out_channels * block.expansion, out_channels, 1, True) for _ in range(n_blocks - 1)],
-        ))
+        stage = Block(
+            nn.Sequential(
+                block(in_channels, out_channels, stride, False),
+                *[block(out_channels * block.expansion, out_channels, 1, True) for _ in range(n_blocks - 1)],
+            )
+        )
         return stage, out_channels * block.expansion
 
     def forward(self, x: Tensor) -> List[Tensor]:
diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
index 4b4dca4f5a..17fec6b7c7 100644
--- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
+++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
@@ -3,8 +3,8 @@
 
 from __future__ import annotations
 
-from typing import Optional
 import copy
+from typing import Optional
 
 import torch
 from torch import nn
@@ -198,9 +198,7 @@ def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, e
         super().__init__()
         # self.layers = decoder_layers
         # TODO: come back to this later
-        self.layers = nn.ModuleList([
-           copy.deepcopy(decoder_layer) for _ in range(num_layers)
-        ])
+        self.layers = nn.ModuleList([copy.deepcopy(decoder_layer) for _ in range(num_layers)])
         self.hidden_dim = hidden_dim
         self.num_layers = num_layers
         self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx
@@ -304,7 +302,9 @@ def __init__(
 
         # denoising part
         if num_denoising > 0:
-            self.denoising_class_embed = nn.Embedding(num_classes + 1, hidden_dim, padding_idx=num_classes)  # not used in evaluation
+            self.denoising_class_embed = nn.Embedding(
+                num_classes + 1, hidden_dim, padding_idx=num_classes
+            )  # not used in evaluation
 
         # decoder embedding
         self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, num_layers=2)
diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index dc167af218..49a9c510bd 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -2,12 +2,13 @@
 
 from __future__ import annotations
 
+import re
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Dict
-import re
+from typing import Dict, Optional
 
 import torch
+
 from kornia.contrib.models.base import ModelBase
 from kornia.contrib.models.rt_detr.architecture.hgnetv2 import PPHGNetV2
 from kornia.contrib.models.rt_detr.architecture.hybrid_encoder import HybridEncoder
@@ -15,13 +16,12 @@
 from kornia.contrib.models.rt_detr.architecture.rtdetr_head import RTDETRHead
 from kornia.core import Tensor
 
-
 URLs = {
-    'rtdetr_r18vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth',
-    'rtdetr_r34vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth',
-    'rtdetr_r50vd_m': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth',
-    'rtdetr_r50vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth',
-    'rtdetr_r101vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth',
+    "rtdetr_r18vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth",
+    "rtdetr_r34vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth",
+    "rtdetr_r50vd_m": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth",
+    "rtdetr_r50vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth",
+    "rtdetr_r101vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth",
 }
 
 
@@ -175,28 +175,26 @@ def from_pretrained(model_name: str) -> RTDETR:
         """
 
         state_dict = torch.hub.load_state_dict_from_url(
-            URLs[model_name],
-            map_location="cuda:0" if torch.cuda.is_available() else "cpu"
+            URLs[model_name], map_location="cuda:0" if torch.cuda.is_available() else "cpu"
         )
 
         def map_name(old_name: str) -> str:
             # Start with the old name
             new_name = old_name
 
-            new_name = re.sub('encoder.pan_blocks', 'encoder.ccfm.pan_blocks', new_name)
-            new_name = re.sub('encoder.downsample_convs', 'encoder.ccfm.downsample_convs', new_name)
-            new_name = re.sub('encoder.fpn_blocks', 'encoder.ccfm.fpn_blocks', new_name)
-            new_name = re.sub('encoder.lateral_convs', 'encoder.ccfm.lateral_convs', new_name)
+            new_name = re.sub("encoder.pan_blocks", "encoder.ccfm.pan_blocks", new_name)
+            new_name = re.sub("encoder.downsample_convs", "encoder.ccfm.downsample_convs", new_name)
+            new_name = re.sub("encoder.fpn_blocks", "encoder.ccfm.fpn_blocks", new_name)
+            new_name = re.sub("encoder.lateral_convs", "encoder.ccfm.lateral_convs", new_name)
 
             # Backbone renaming
-            new_name = re.sub(f'.branch2b.', '.convs.branch2b.', new_name)
-            new_name = re.sub(f'.branch2a.', '.convs.branch2a.', new_name)
-            new_name = re.sub(f'.branch2c.', '.convs.branch2c.', new_name)
+            new_name = re.sub(".branch2b.", ".convs.branch2b.", new_name)
+            new_name = re.sub(".branch2a.", ".convs.branch2a.", new_name)
+            new_name = re.sub(".branch2c.", ".convs.branch2c.", new_name)
 
             return new_name
 
         def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]:
-
             state_dict = state_dict["ema"]["module"]
             new_state_dict = {}
 

From 095a2685e9b8506925dc5001aa4060e079c86436 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Wed, 4 Sep 2024 15:07:32 +0300
Subject: [PATCH 03/46] update

---
 kornia/contrib/models/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kornia/contrib/models/common.py b/kornia/contrib/models/common.py
index fcb16ae61a..b46049f93f 100644
--- a/kornia/contrib/models/common.py
+++ b/kornia/contrib/models/common.py
@@ -34,11 +34,11 @@ def __init__(
             padding = (kernel_size - 1) // 2
         conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, 1, groups, False)
         norm = nn.BatchNorm2d(out_channels)
-        act = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True)
+        activation = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True)
 
         self.__setattr__(conv_naming, conv)
         self.__setattr__(norm_naming, norm)
-        self.__setattr__(act_naming, act)
+        self.__setattr__(act_naming, activation)
 
 
 # Lightly adapted from

From bdeee27a3fdf60919586384dfa79877b00b5056b Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Wed, 4 Sep 2024 15:11:05 +0300
Subject: [PATCH 04/46] update

---
 kornia/contrib/models/rt_detr/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 49a9c510bd..6c6653dd0a 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -195,7 +195,7 @@ def map_name(old_name: str) -> str:
             return new_name
 
         def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]:
-            state_dict = state_dict["ema"]["module"]
+            state_dict = state_dict["ema"]["module"]  # type:ignore
             new_state_dict = {}
 
             # Apply the regex-based mapping function to each key

From d7da930738e52eed860d703ce34f06c21c19604b Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Wed, 4 Sep 2024 15:14:39 +0300
Subject: [PATCH 05/46] update

---
 kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py | 2 +-
 kornia/contrib/models/rt_detr/architecture/resnet_d.py       | 5 ++---
 kornia/contrib/models/rt_detr/model.py                       | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
index 6d25bcf53f..5e2a8cc98e 100644
--- a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
+++ b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py
@@ -153,7 +153,7 @@ def build_2d_sincos_pos_emb(
 
 class TransformerEncoder(nn.Module):
     def __init__(self, encoder_layer: nn.Module, num_layers: int) -> None:
-        super(TransformerEncoder, self).__init__()
+        super().__init__()
         self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(num_layers)])
         self.num_layers = num_layers
 
diff --git a/kornia/contrib/models/rt_detr/architecture/resnet_d.py b/kornia/contrib/models/rt_detr/architecture/resnet_d.py
index 3453ae2a7c..4432b38ac3 100644
--- a/kornia/contrib/models/rt_detr/architecture/resnet_d.py
+++ b/kornia/contrib/models/rt_detr/architecture/resnet_d.py
@@ -7,7 +7,6 @@
 from __future__ import annotations
 
 from collections import OrderedDict
-from typing import List, Tuple
 
 from torch import nn
 
@@ -104,7 +103,7 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD])
     @staticmethod
     def make_stage(
         in_channels: int, out_channels: int, stride: int, n_blocks: int, block: type[BasicBlockD | BottleneckD]
-    ) -> Tuple[Module, int]:
+    ) -> tuple[Module, int]:
         stage = Block(
             nn.Sequential(
                 block(in_channels, out_channels, stride, False),
@@ -113,7 +112,7 @@ def make_stage(
         )
         return stage, out_channels * block.expansion
 
-    def forward(self, x: Tensor) -> List[Tensor]:
+    def forward(self, x: Tensor) -> list[Tensor]:
         x = self.conv1(x)
         res2 = self.res_layers[0](x)
         res3 = self.res_layers[1](res2)
diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 6c6653dd0a..67cc495520 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -5,7 +5,7 @@
 import re
 from dataclasses import dataclass
 from enum import Enum
-from typing import Dict, Optional
+from typing import Optional
 
 import torch
 
@@ -194,7 +194,7 @@ def map_name(old_name: str) -> str:
 
             return new_name
 
-        def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        def _state_dict_proc(state_dict: dict[str, Tensor]) -> dict[str, Tensor]:
             state_dict = state_dict["ema"]["module"]  # type:ignore
             new_state_dict = {}
 

From 33120ab232ae0772eae3a1303a757113bfdaa117 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Wed, 4 Sep 2024 15:17:17 +0300
Subject: [PATCH 06/46] update

---
 kornia/contrib/models/rt_detr/model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 67cc495520..c7ee40ec63 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -167,6 +167,7 @@ def from_config(config: RTDETRConfig) -> RTDETR:
             model.load_checkpoint(config.checkpoint)
         return model
 
+    @staticmethod
     def from_pretrained(model_name: str) -> RTDETR:
         """Load model from pretrained weights.
 

From a5ded631c7803b9cfb89f8cc02f61d5644ce9629 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Wed, 4 Sep 2024 15:24:58 +0300
Subject: [PATCH 07/46] update

---
 kornia/contrib/models/rt_detr/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index c7ee40ec63..0bc2cfad40 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -180,9 +180,9 @@ def from_pretrained(model_name: str) -> RTDETR:
         )
 
         def map_name(old_name: str) -> str:
-            # Start with the old name
             new_name = old_name
 
+            # Encoder renaming
             new_name = re.sub("encoder.pan_blocks", "encoder.ccfm.pan_blocks", new_name)
             new_name = re.sub("encoder.downsample_convs", "encoder.ccfm.downsample_convs", new_name)
             new_name = re.sub("encoder.fpn_blocks", "encoder.ccfm.fpn_blocks", new_name)

From e8019f01e93161ae55a84fed8db7b6cf356775a3 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Wed, 4 Sep 2024 17:50:05 +0300
Subject: [PATCH 08/46] update

---
 kornia/contrib/models/rt_detr/post_processor.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index d5fce83870..a189f4ebd0 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -4,7 +4,6 @@
 import torch
 
 from kornia.core import Module, Tensor, concatenate
-from kornia.image.base import ImageSize
 
 
 class DETRPostProcessor(Module):
@@ -12,7 +11,7 @@ def __init__(self, confidence_threshold: float) -> None:
         super().__init__()
         self.confidence_threshold = confidence_threshold
 
-    def forward(self, logits: Tensor, boxes: Tensor, original_sizes: list[ImageSize]) -> list[Tensor]:
+    def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list[Tensor]:
         """Post-process outputs from DETR.
 
         Args:
@@ -20,7 +19,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: list[ImageSize]
                 queries, :math:`K` is the number of classes.
             boxes: tensor with shape :math:`(N, Q, 4)`, where :math:`N` is the batch size, :math:`Q` is the number of
                 queries.
-            original_sizes: list of tuples, each tuple represent (img_height, img_width).
+            original_sizes: tensor with shape :math:`(N, 2)`, where :math:`N` is the batch size and each element
+                represents the image size of (img_height, img_width).
 
         Returns:
             Processed detections. For each image, the detections have shape (D, 6), where D is the number of detections
@@ -38,8 +38,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: list[ImageSize]
         boxes_xy = concatenate([cxcy - wh * 0.5, wh], -1)
 
         sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype)
-        sizes_wh[..., 0] = original_sizes[0].width
-        sizes_wh[..., 1] = original_sizes[0].height
+        sizes_wh[..., 0] = original_sizes[0][0]
+        sizes_wh[..., 1] = original_sizes[0][0]
         sizes_wh = sizes_wh.repeat(1, 1, 2)
 
         boxes_xy = boxes_xy * sizes_wh

From 85715a6eeed187cdac97bd2a1edbf30307de310d Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Thu, 5 Sep 2024 12:30:54 +0300
Subject: [PATCH 09/46] update

---
 kornia/contrib/object_detection.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index ad1849378f..a148a4c210 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -6,9 +6,8 @@
 
 import torch
 
-from kornia.core import Module, Tensor, concatenate
+from kornia.core import Module, Tensor, as_tensor, concatenate
 from kornia.core.check import KORNIA_CHECK_SHAPE
-from kornia.image.base import ImageSize
 
 __all__ = [
     "BoundingBoxDataFormat",
@@ -113,18 +112,18 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear")
         self.size = size
         self.interpolation_mode = interpolation_mode
 
-    def forward(self, imgs: list[Tensor]) -> tuple[Tensor, list[ImageSize]]:
+    def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]:
         # TODO: support other input formats e.g. file path, numpy
         resized_imgs, original_sizes = [], []
         for i in range(len(imgs)):
             img = imgs[i]
             # NOTE: assume that image layout is CHW
-            original_sizes.append(ImageSize(height=img.shape[1], width=img.shape[2]))
+            original_sizes.append([img.shape[1], img.shape[2]])
             resized_imgs.append(
                 # TODO: fix kornia resize to support onnx
                 torch.nn.functional.interpolate(img.unsqueeze(0), size=self.size, mode=self.interpolation_mode)
             )
-        return concatenate(resized_imgs), original_sizes
+        return concatenate(resized_imgs), as_tensor(original_sizes)
 
 
 # TODO: move this to kornia.models as AlgorithmicModel api

From 05106d35c913998abdd3c4d0e0451d2d5a3237ca Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Thu, 5 Sep 2024 21:08:49 +0300
Subject: [PATCH 10/46] update

---
 .../contrib/models/rt_detr/architecture/rtdetr_head.py   | 9 ++++-----
 kornia/contrib/models/rt_detr/model.py                   | 2 --
 kornia/contrib/models/rt_detr/post_processor.py          | 2 +-
 kornia/contrib/object_detection.py                       | 7 ++++++-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
index 17fec6b7c7..23a343c18b 100644
--- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
+++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
@@ -196,8 +196,6 @@ def forward(
 class TransformerDecoder(Module):
     def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1) -> None:
         super().__init__()
-        # self.layers = decoder_layers
-        # TODO: come back to this later
         self.layers = nn.ModuleList([copy.deepcopy(decoder_layer) for _ in range(num_layers)])
         self.hidden_dim = hidden_dim
         self.num_layers = num_layers
@@ -271,14 +269,15 @@ def __init__(
         num_decoder_layers: int,
         num_heads: int = 8,
         num_decoder_points: int = 4,
-        # num_levels: int = 3,
+        num_levels: int = 3,
         dropout: float = 0.0,
         num_denoising: int = 100,
     ) -> None:
         super().__init__()
         self.num_queries = num_queries
         # TODO: verify this is correct
-        self.num_levels = len(in_channels)
+        assert len(in_channels) <= num_levels
+        self.num_levels = num_levels
 
         # build the input projection layers
         self.input_proj = nn.ModuleList()
@@ -292,7 +291,7 @@ def __init__(
             embed_dim=hidden_dim,
             num_heads=num_heads,
             dropout=dropout,
-            num_levels=len(in_channels),
+            num_levels=self.num_levels,
             num_points=num_decoder_points,
         )
 
diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 0bc2cfad40..1239702043 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -233,8 +233,6 @@ def forward(self, images: Tensor) -> tuple[Tensor, Tensor]:
               :math:`K` is the number of classes.
             - **boxes** - Tensor of shape :math:`(N, Q, 4)`, where :math:`Q` is the number of queries.
         """
-        # if self.training:
-        #     raise RuntimeError("Only evaluation mode is supported. Please call model.eval().")
 
         feats = self.backbone(images)
         feats_buf = self.encoder(feats)
diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index a189f4ebd0..fb73e17e56 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -39,7 +39,7 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list
 
         sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype)
         sizes_wh[..., 0] = original_sizes[0][0]
-        sizes_wh[..., 1] = original_sizes[0][0]
+        sizes_wh[..., 1] = original_sizes[0][1]
         sizes_wh = sizes_wh.repeat(1, 1, 2)
 
         boxes_xy = boxes_xy * sizes_wh
diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index a148a4c210..35c9beea64 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -113,12 +113,17 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear")
         self.interpolation_mode = interpolation_mode
 
     def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]:
+        """
+        Returns:
+            resized_imgs: resized images in a batch.
+            original_sizes: the original image sizes of (height, width).
+        """
         # TODO: support other input formats e.g. file path, numpy
         resized_imgs, original_sizes = [], []
         for i in range(len(imgs)):
             img = imgs[i]
             # NOTE: assume that image layout is CHW
-            original_sizes.append([img.shape[1], img.shape[2]])
+            original_sizes.append([img.shape[-2], img.shape[-1]])
             resized_imgs.append(
                 # TODO: fix kornia resize to support onnx
                 torch.nn.functional.interpolate(img.unsqueeze(0), size=self.size, mode=self.interpolation_mode)

From 2dcba2c1e8cd6015afb7b2d3e0ea57362a50b6e5 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Thu, 5 Sep 2024 21:16:01 +0300
Subject: [PATCH 11/46] update

---
 kornia/contrib/models/rt_detr/architecture/rtdetr_head.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
index 23a343c18b..6345577e5a 100644
--- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
+++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py
@@ -276,7 +276,8 @@ def __init__(
         super().__init__()
         self.num_queries = num_queries
         # TODO: verify this is correct
-        assert len(in_channels) <= num_levels
+        if len(in_channels) > num_levels:
+            raise ValueError(f"`num_levels` cannot be greater than {len(in_channels)}. Got {num_levels}.")
         self.num_levels = num_levels
 
         # build the input projection layers

From 83fa545c5bbe45f49fc71bf4ed8d1f763569352a Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 11:40:32 +0300
Subject: [PATCH 12/46] update

---
 kornia/contrib/models/rt_detr/model.py | 29 ++++++++++++++-----
 kornia/models/__init__.py              |  0
 kornia/models/detector/__init__.py     |  0
 kornia/models/detector/rtdetr.py       | 39 ++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 7 deletions(-)
 create mode 100644 kornia/models/__init__.py
 create mode 100644 kornia/models/detector/__init__.py
 create mode 100644 kornia/models/detector/rtdetr.py

diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 1239702043..3692788ed1 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -175,6 +175,9 @@ def from_pretrained(model_name: str) -> RTDETR:
             model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
         """
 
+        if model_name not in URLs:
+            raise ValueError(f"No pretrained model for '{model_name}'. Please select from {list(URLs.keys())}.")
+
         state_dict = torch.hub.load_state_dict_from_url(
             URLs[model_name], map_location="cuda:0" if torch.cuda.is_available() else "cpu"
         )
@@ -206,20 +209,32 @@ def _state_dict_proc(state_dict: dict[str, Tensor]) -> dict[str, Tensor]:
 
             return new_state_dict
 
+        model = RTDETR.from_name(model_name, num_classes=80)
+
+        model.load_state_dict(_state_dict_proc(state_dict))
+        return model
+    
+    @staticmethod
+    def from_name(model_name: str, num_classes: int = 80) -> RTDETR:
+        """Load model without pretrained weights.
+
+        Args:
+            model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
+        """
+
         if model_name == "rtdetr_r18vd":
-            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet18d, 80))
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet18d, num_classes))
         elif model_name == "rtdetr_r34vd":
-            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, 80))
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, num_classes))
         elif model_name == "rtdetr_r50vd_m":
-            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80))
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes))
         elif model_name == "rtdetr_r50vd":
-            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80))
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes))
         elif model_name == "rtdetr_r101vd":
-            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, 80))
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, num_classes))
         else:
             raise ValueError
-
-        model.load_state_dict(_state_dict_proc(state_dict))
+        
         return model
 
     def forward(self, images: Tensor) -> tuple[Tensor, Tensor]:
diff --git a/kornia/models/__init__.py b/kornia/models/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/kornia/models/detector/__init__.py b/kornia/models/detector/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
new file mode 100644
index 0000000000..6630c4632e
--- /dev/null
+++ b/kornia/models/detector/rtdetr.py
@@ -0,0 +1,39 @@
+from typing import Optional
+import warnings
+
+from kornia.core import Module
+from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig
+from kornia.contrib.models.rt_detr import DETRPostProcessor
+from kornia.contrib.object_detection import ResizePreProcessor, ObjectDetector
+
+
+class RTDETRDetectorBuilder:
+
+    @staticmethod
+    def build(
+        model_name: Optional[str] = None,
+        config: Optional[RTDETRConfig] = None,
+        pretrained: bool = True,
+        image_size: int = 640,
+        confidence_threshold: float = 0.5
+    ) -> ObjectDetector:
+        if (model_name is not None and config is not None):
+            raise ValueError("Either `model_name` or `config` should be `None`.")
+        
+        if model_name is None and config is None:
+            warnings.warn("No `model_name` or `config` found. Will build `rtdetr_r18vd`.")
+            model_name = "rtdetr_r18vd"
+            
+        if config is not None:
+            model = RTDETR.from_config(config)
+        else:
+            if pretrained:
+                model = RTDETR.from_pretrained(model_name)
+            else:
+                model = RTDETR.from_name(model_name)
+        
+        return ObjectDetector(
+            model,
+            ResizePreProcessor(image_size),
+            DETRPostProcessor(confidence_threshold)
+        )

From 023133a1062cda0f8ad1ae6c14666840e44b43ed Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 11:47:21 +0300
Subject: [PATCH 13/46] update

---
 kornia/contrib/object_detection.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 35c9beea64..4c90d9c6bb 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -8,6 +8,7 @@
 
 from kornia.core import Module, Tensor, as_tensor, concatenate
 from kornia.core.check import KORNIA_CHECK_SHAPE
+from kornia.utils.draw import draw_rectangle
 
 __all__ = [
     "BoundingBoxDataFormat",
@@ -164,6 +165,21 @@ def forward(self, images: list[Tensor]) -> list[Tensor]:
         detections = self.post_processor(logits, boxes, images_sizes)
         return detections
 
+    def draw(self, images: list[Tensor]) -> list[Tensor]:
+        """Very simple drawing. Needs to be more fancy later.
+        """
+        detections = self.forward(images)
+        output = []
+        for image, detection in zip(images, detections):
+            out_img = image.clone()
+            for out in detection:
+                out_img = draw_rectangle(
+                    out_img,
+                    torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]])
+                )
+            output.append(out_img)
+        return output
+
     def compile(
         self,
         *,

From b69d535b17191e029f4a98a40846c1e220465146 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 11:54:56 +0300
Subject: [PATCH 14/46] update

---
 kornia/contrib/models/rt_detr/post_processor.py | 4 ++--
 kornia/contrib/object_detection.py              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index fb73e17e56..95ad6109c7 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -38,8 +38,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list
         boxes_xy = concatenate([cxcy - wh * 0.5, wh], -1)
 
         sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype)
-        sizes_wh[..., 0] = original_sizes[0][0]
-        sizes_wh[..., 1] = original_sizes[0][1]
+        sizes_wh[..., 0] = original_sizes[0][1]
+        sizes_wh[..., 1] = original_sizes[0][0]
         sizes_wh = sizes_wh.repeat(1, 1, 2)
 
         boxes_xy = boxes_xy * sizes_wh
diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 4c90d9c6bb..688eee98d1 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -171,7 +171,7 @@ def draw(self, images: list[Tensor]) -> list[Tensor]:
         detections = self.forward(images)
         output = []
         for image, detection in zip(images, detections):
-            out_img = image.clone()
+            out_img = image[None].clone()
             for out in detection:
                 out_img = draw_rectangle(
                     out_img,

From a9412fc0782890ede16f8385c661f86e293a6fa6 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 12:01:52 +0300
Subject: [PATCH 15/46] update

---
 kornia/contrib/object_detection.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 688eee98d1..121593c6ca 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -8,6 +8,8 @@
 
 from kornia.core import Module, Tensor, as_tensor, concatenate
 from kornia.core.check import KORNIA_CHECK_SHAPE
+from kornia.core.external import PILImage as Image
+from kornia.core.external import numpy as np
 from kornia.utils.draw import draw_rectangle
 
 __all__ = [
@@ -165,7 +167,7 @@ def forward(self, images: list[Tensor]) -> list[Tensor]:
         detections = self.post_processor(logits, boxes, images_sizes)
         return detections
 
-    def draw(self, images: list[Tensor]) -> list[Tensor]:
+    def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | Image.Image:  # type: ignore
         """Very simple drawing. Needs to be more fancy later.
         """
         detections = self.forward(images)
@@ -177,7 +179,11 @@ def draw(self, images: list[Tensor]) -> list[Tensor]:
                     out_img,
                     torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]])
                 )
-            output.append(out_img)
+            if output_type == "torch":
+                output.append(out_img)
+            elif output_type == "pil":
+                output.append(Image.fromarray(
+                    (out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8)))  # type: ignore
         return output
 
     def compile(

From 5f2aae546d70960837df5c6f65ffec59335d3397 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 6 Sep 2024 09:03:21 +0000
Subject: [PATCH 16/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/models/rt_detr/model.py |  4 ++--
 kornia/contrib/object_detection.py     | 10 ++++-----
 kornia/models/detector/rtdetr.py       | 31 ++++++++++----------------
 3 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 3692788ed1..6871b85260 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -213,7 +213,7 @@ def _state_dict_proc(state_dict: dict[str, Tensor]) -> dict[str, Tensor]:
 
         model.load_state_dict(_state_dict_proc(state_dict))
         return model
-    
+
     @staticmethod
     def from_name(model_name: str, num_classes: int = 80) -> RTDETR:
         """Load model without pretrained weights.
@@ -234,7 +234,7 @@ def from_name(model_name: str, num_classes: int = 80) -> RTDETR:
             model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, num_classes))
         else:
             raise ValueError
-        
+
         return model
 
     def forward(self, images: Tensor) -> tuple[Tensor, Tensor]:
diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 121593c6ca..ed75882f63 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -168,7 +168,9 @@ def forward(self, images: list[Tensor]) -> list[Tensor]:
         return detections
 
     def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | Image.Image:  # type: ignore
-        """Very simple drawing. Needs to be more fancy later.
+        """Very simple drawing.
+
+        Needs to be more fancy later.
         """
         detections = self.forward(images)
         output = []
@@ -176,14 +178,12 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor]
             out_img = image[None].clone()
             for out in detection:
                 out_img = draw_rectangle(
-                    out_img,
-                    torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]])
+                    out_img, torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]])
                 )
             if output_type == "torch":
                 output.append(out_img)
             elif output_type == "pil":
-                output.append(Image.fromarray(
-                    (out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8)))  # type: ignore
+                output.append(Image.fromarray((out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8)))  # type: ignore
         return output
 
     def compile(
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index 6630c4632e..2d808b3d0f 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -1,39 +1,32 @@
-from typing import Optional
 import warnings
+from typing import Optional
 
-from kornia.core import Module
-from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig
 from kornia.contrib.models.rt_detr import DETRPostProcessor
-from kornia.contrib.object_detection import ResizePreProcessor, ObjectDetector
+from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig
+from kornia.contrib.object_detection import ObjectDetector, ResizePreProcessor
 
 
 class RTDETRDetectorBuilder:
-
     @staticmethod
     def build(
         model_name: Optional[str] = None,
         config: Optional[RTDETRConfig] = None,
         pretrained: bool = True,
         image_size: int = 640,
-        confidence_threshold: float = 0.5
+        confidence_threshold: float = 0.5,
     ) -> ObjectDetector:
-        if (model_name is not None and config is not None):
+        if model_name is not None and config is not None:
             raise ValueError("Either `model_name` or `config` should be `None`.")
-        
+
         if model_name is None and config is None:
             warnings.warn("No `model_name` or `config` found. Will build `rtdetr_r18vd`.")
             model_name = "rtdetr_r18vd"
-            
+
         if config is not None:
             model = RTDETR.from_config(config)
+        elif pretrained:
+            model = RTDETR.from_pretrained(model_name)
         else:
-            if pretrained:
-                model = RTDETR.from_pretrained(model_name)
-            else:
-                model = RTDETR.from_name(model_name)
-        
-        return ObjectDetector(
-            model,
-            ResizePreProcessor(image_size),
-            DETRPostProcessor(confidence_threshold)
-        )
+            model = RTDETR.from_name(model_name)
+
+        return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold))

From 928aec765c5f6d977ac24e951aa10bc73d499694 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 12:56:54 +0300
Subject: [PATCH 17/46] update

---
 kornia/contrib/object_detection.py | 10 ++++++----
 kornia/models/detector/rtdetr.py   | 20 ++++++++++++--------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index ed75882f63..5ff63566e1 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -167,10 +167,12 @@ def forward(self, images: list[Tensor]) -> list[Tensor]:
         detections = self.post_processor(logits, boxes, images_sizes)
         return detections
 
-    def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | Image.Image:  # type: ignore
-        """Very simple drawing.
-
-        Needs to be more fancy later.
+    def draw(
+        self,
+        images: list[Tensor],
+        output_type: str = "torch"
+    ) -> list[Tensor] | list[Image.Image]:  # type: ignore
+        """Very simple drawing. Needs to be more fancy later.
         """
         detections = self.forward(images)
         output = []
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index 2d808b3d0f..a2a53d401a 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -18,15 +18,19 @@ def build(
         if model_name is not None and config is not None:
             raise ValueError("Either `model_name` or `config` should be `None`.")
 
-        if model_name is None and config is None:
-            warnings.warn("No `model_name` or `config` found. Will build `rtdetr_r18vd`.")
-            model_name = "rtdetr_r18vd"
-
         if config is not None:
             model = RTDETR.from_config(config)
-        elif pretrained:
-            model = RTDETR.from_pretrained(model_name)
+        elif model_name is not None:
+            if pretrained:
+                model = RTDETR.from_pretrained(model_name)
+            else:
+                model = RTDETR.from_name(model_name)
         else:
-            model = RTDETR.from_name(model_name)
+            warnings.warn("No `model_name` or `config` found. Will build pretrained `rtdetr_r18vd`.")
+            model = RTDETR.from_pretrained("rtdetr_r18vd")
 
-        return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold))
+        return ObjectDetector(
+            model,
+            ResizePreProcessor(image_size),
+            DETRPostProcessor(confidence_threshold)
+        )

From 3d7ac8419432ebad0d11bb42a76bf6469a53651c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 6 Sep 2024 09:58:14 +0000
Subject: [PATCH 18/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/object_detection.py | 10 ++++------
 kornia/models/detector/rtdetr.py   |  6 +-----
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 5ff63566e1..47aff20a6f 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -167,12 +167,10 @@ def forward(self, images: list[Tensor]) -> list[Tensor]:
         detections = self.post_processor(logits, boxes, images_sizes)
         return detections
 
-    def draw(
-        self,
-        images: list[Tensor],
-        output_type: str = "torch"
-    ) -> list[Tensor] | list[Image.Image]:  # type: ignore
-        """Very simple drawing. Needs to be more fancy later.
+    def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | list[Image.Image]:  # type: ignore
+        """Very simple drawing.
+
+        Needs to be more fancy later.
         """
         detections = self.forward(images)
         output = []
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index a2a53d401a..d5f11b9a8c 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -29,8 +29,4 @@ def build(
             warnings.warn("No `model_name` or `config` found. Will build pretrained `rtdetr_r18vd`.")
             model = RTDETR.from_pretrained("rtdetr_r18vd")
 
-        return ObjectDetector(
-            model,
-            ResizePreProcessor(image_size),
-            DETRPostProcessor(confidence_threshold)
-        )
+        return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold))

From db87c53c523a0c31bdb7a2cd4dcd899ed1041a61 Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 13:17:28 +0300
Subject: [PATCH 19/46] update

---
 kornia/contrib/object_detection.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 47aff20a6f..497cae17a3 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import os
+import datetime
 from dataclasses import dataclass
 from enum import Enum
 from typing import Optional
@@ -11,6 +13,7 @@
 from kornia.core.external import PILImage as Image
 from kornia.core.external import numpy as np
 from kornia.utils.draw import draw_rectangle
+from kornia.io import write_image
 
 __all__ = [
     "BoundingBoxDataFormat",
@@ -181,11 +184,28 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor]
                     out_img, torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]])
                 )
             if output_type == "torch":
-                output.append(out_img)
+                output.append(out_img[0])
             elif output_type == "pil":
                 output.append(Image.fromarray((out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8)))  # type: ignore
+            else:
+                raise RuntimeError(f"Unsupported output type `{output_type}`.")
         return output
 
+    def save(self, images: list[Tensor], directory: Optional[str] = None) -> None:
+        """Saves the output image(s) to a directory.
+
+        Args:
+            name: Directory to save the images.
+            n_row: Number of images displayed in each row of the grid.
+        """
+        if directory is None:
+            name = f"detection-{datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y%m%d%H%M%S')!s}"
+            directory = os.path.join("Kornia_outputs", name)
+        outputs = self.draw(images)
+        os.makedirs(directory, exist_ok=True)
+        for i, out_image in enumerate(outputs):
+            write_image(os.path.join(directory, f"{str(i).zfill(6)}.jpg"), out_image.mul(255.0).byte())
+
     def compile(
         self,
         *,

From 70ae085cd693b5d6cb92e8b44aaf0f9c3be6bfe5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 6 Sep 2024 10:19:23 +0000
Subject: [PATCH 20/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/object_detection.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 497cae17a3..b348899a3b 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
-import os
 import datetime
+import os
 from dataclasses import dataclass
 from enum import Enum
 from typing import Optional
@@ -12,8 +12,8 @@
 from kornia.core.check import KORNIA_CHECK_SHAPE
 from kornia.core.external import PILImage as Image
 from kornia.core.external import numpy as np
-from kornia.utils.draw import draw_rectangle
 from kornia.io import write_image
+from kornia.utils.draw import draw_rectangle
 
 __all__ = [
     "BoundingBoxDataFormat",

From 82e3240ca8cf1cc3240a519ae3a0decff1db109b Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 14:27:04 +0300
Subject: [PATCH 21/46] update

---
 kornia/models/detector/rtdetr.py | 114 ++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index d5f11b9a8c..cabf060027 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -1,20 +1,54 @@
 import warnings
 from typing import Optional
 
+import torch
+import torch.nn as nn
+
 from kornia.contrib.models.rt_detr import DETRPostProcessor
 from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig
 from kornia.contrib.object_detection import ObjectDetector, ResizePreProcessor
+from kornia.core import rand
 
 
 class RTDETRDetectorBuilder:
+    """A builder class for constructing RT-DETR object detection models.
+
+    This class provides static methods to:
+        - Build an object detection model from a model name or configuration.
+        - Export the model to ONNX format for inference.
+    """
+
     @staticmethod
     def build(
         model_name: Optional[str] = None,
         config: Optional[RTDETRConfig] = None,
         pretrained: bool = True,
-        image_size: int = 640,
+        image_size: Optional[int] = 640,
         confidence_threshold: float = 0.5,
     ) -> ObjectDetector:
+        """Builds and returns an RT-DETR object detector model.
+
+        Either `model_name` or `config` must be provided. If neither is provided,
+        a default pretrained model (`rtdetr_r18vd`) will be built.
+
+        Args:
+            model_name:
+                Name of the RT-DETR model to load. Can be one of the available pretrained models.
+            config:
+                A custom configuration object for building the RT-DETR model.
+            pretrained:
+                Whether to load a pretrained version of the model (applies when `model_name` is provided).
+            image_size:
+                The size to which input images will be resized during preprocessing.
+                If None, no resizing will be performed before passing to the model. Recommended scales include
+                [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800].
+            confidence_threshold:
+                The confidence threshold used during post-processing to filter detections.
+
+        Returns:
+            ObjectDetector
+                An object detector instance initialized with the specified model, preprocessor, and post-processor.
+        """
         if model_name is not None and config is not None:
             raise ValueError("Either `model_name` or `config` should be `None`.")
 
@@ -29,4 +63,80 @@ def build(
             warnings.warn("No `model_name` or `config` found. Will build pretrained `rtdetr_r18vd`.")
             model = RTDETR.from_pretrained("rtdetr_r18vd")
 
-        return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold))
+        return ObjectDetector(
+            model,
+            ResizePreProcessor(image_size) if image_size is not None else nn.Identity(),
+            DETRPostProcessor(confidence_threshold)
+        )
+
+    @staticmethod
+    def to_onnx(
+        onnx_name: Optional[str] = None,
+        model_name: Optional[str] = None,
+        config: Optional[RTDETRConfig] = None,
+        pretrained: bool = True,
+        image_size: Optional[int] = 640,
+        confidence_threshold: float = 0.5,
+    ) -> None:
+        """Exports an RT-DETR object detection model to ONNX format.
+
+        Either `model_name` or `config` must be provided. If neither is provided,
+        a default pretrained model (`rtdetr_r18vd`) will be built.
+
+        Args:
+            model_name:
+                Name of the RT-DETR model to load. Can be one of the available pretrained models.
+            config:
+                A custom configuration object for building the RT-DETR model.
+            pretrained:
+                Whether to load a pretrained version of the model (applies when `model_name` is provided).
+            image_size:
+                The size to which input images will be resized during preprocessing.
+                If None, image_size will be dynamic. Recommended scales include
+                [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800].
+            confidence_threshold:
+                The confidence threshold used during post-processing to filter detections.
+
+        Returns:
+            ObjectDetector
+                An object detector instance initialized with the specified model, preprocessor, and post-processor.
+        """
+
+        detector = RTDETRDetectorBuilder.build(
+            model_name=model_name,
+            config=config,
+            pretrained=pretrained,
+            image_size=image_size,
+            confidence_threshold=confidence_threshold,
+        )
+        if onnx_name is None:
+            _model_name = model_name
+            if model_name is None and config is not None:
+                _model_name = "rtdetr-customized"
+            elif model_name is None and config is None:
+                _model_name = "rtdetr_r18vd"
+            onnx_name = f"Kornia-RTDETR-{_model_name}-{image_size}.onnx"
+
+        if image_size is None:
+            val_image = rand(1, 3, 640, 640)
+            dynamic_axes={
+                'input' : {0 : 'batch_size', 2: 'height', 3: 'width'},
+                'output' : {0 : 'batch_size', 2: 'height', 3: 'width'}
+            }
+        else:
+            val_image = rand(1, 3, image_size, image_size)
+            dynamic_axes={
+                'input' : {0 : 'batch_size'},
+                'output' : {0 : 'batch_size'}
+            }
+        torch.onnx.export(
+            detector,
+            val_image,
+            onnx_name,
+            export_params=True,
+            opset_version=17,
+            do_constant_folding=True,
+            input_names=['input'],
+            output_names=['output'],
+            dynamic_axes=dynamic_axes
+        )

From 22bb115683d50682038c8b4a769fb82b78ca4c76 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 6 Sep 2024 11:27:28 +0000
Subject: [PATCH 22/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/models/detector/rtdetr.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index cabf060027..d16bb9e372 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -2,7 +2,7 @@
 from typing import Optional
 
 import torch
-import torch.nn as nn
+from torch import nn
 
 from kornia.contrib.models.rt_detr import DETRPostProcessor
 from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig
@@ -66,7 +66,7 @@ def build(
         return ObjectDetector(
             model,
             ResizePreProcessor(image_size) if image_size is not None else nn.Identity(),
-            DETRPostProcessor(confidence_threshold)
+            DETRPostProcessor(confidence_threshold),
         )
 
     @staticmethod
@@ -119,16 +119,13 @@ def to_onnx(
 
         if image_size is None:
             val_image = rand(1, 3, 640, 640)
-            dynamic_axes={
-                'input' : {0 : 'batch_size', 2: 'height', 3: 'width'},
-                'output' : {0 : 'batch_size', 2: 'height', 3: 'width'}
+            dynamic_axes = {
+                "input": {0: "batch_size", 2: "height", 3: "width"},
+                "output": {0: "batch_size", 2: "height", 3: "width"},
             }
         else:
             val_image = rand(1, 3, image_size, image_size)
-            dynamic_axes={
-                'input' : {0 : 'batch_size'},
-                'output' : {0 : 'batch_size'}
-            }
+            dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}}
         torch.onnx.export(
             detector,
             val_image,
@@ -136,7 +133,7 @@ def to_onnx(
             export_params=True,
             opset_version=17,
             do_constant_folding=True,
-            input_names=['input'],
-            output_names=['output'],
-            dynamic_axes=dynamic_axes
+            input_names=["input"],
+            output_names=["output"],
+            dynamic_axes=dynamic_axes,
         )

From d6390258d2901488b90c306e9553efcc65618b2a Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Fri, 6 Sep 2024 23:04:00 +0300
Subject: [PATCH 23/46] update

---
 kornia/models/detector/rtdetr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index d16bb9e372..d0421ff50d 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -65,8 +65,8 @@ def build(
 
         return ObjectDetector(
             model,
-            ResizePreProcessor(image_size) if image_size is not None else nn.Identity(),
-            DETRPostProcessor(confidence_threshold),
+            ResizePreProcessor((image_size, image_size)) if image_size is not None else nn.Identity(),
+            DETRPostProcessor(confidence_threshold)
         )
 
     @staticmethod

From a902739010341113b242867a6e5dee8d1ab921d8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 6 Sep 2024 20:04:40 +0000
Subject: [PATCH 24/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/models/detector/rtdetr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index d0421ff50d..bfcb1944f7 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -66,7 +66,7 @@ def build(
         return ObjectDetector(
             model,
             ResizePreProcessor((image_size, image_size)) if image_size is not None else nn.Identity(),
-            DETRPostProcessor(confidence_threshold)
+            DETRPostProcessor(confidence_threshold),
         )
 
     @staticmethod

From 292f410ee1629df69013d23f461fe00bb513f7db Mon Sep 17 00:00:00 2001
From: shijianjian <sj8716643@126.com>
Date: Sat, 7 Sep 2024 12:54:12 +0300
Subject: [PATCH 25/46] doc update

---
 docs/source/models/rt_detr.rst   | 22 ++++++++++++++++++++++
 kornia/io/io.py                  |  6 +++++-
 kornia/models/detector/rtdetr.py |  1 +
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/docs/source/models/rt_detr.rst b/docs/source/models/rt_detr.rst
index cfeaa06c8a..ca67cb06b0 100644
--- a/docs/source/models/rt_detr.rst
+++ b/docs/source/models/rt_detr.rst
@@ -1,6 +1,28 @@
 Real-Time Detection Transformer (RT-DETR)
 =========================================
 
+.. code-block:: python
+
+    from kornia.io import load_image
+    from kornia.models.detector.rtdetr import RTDETRDetectorBuilder
+
+    input_img = load_image(img_path)[None]  # Load image to BCHW
+
+    # NOTE: available models: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
+    # NOTE: recommended image scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
+    detector = RTDETRDetectorBuilder.build("rtdetr_r18vd", image_size=640)
+
+    # get the output boxes
+    boxes = detector(input_img)
+
+    # draw the bounding boxes on the images directly.
+    output = detector.draw(input_img, output_type="pil")
+    output[0].save("Kornia-RTDETR-output.png")
+
+    # convert the whole model to ONNX directly
+    RTDETRDetectorBuilder.to_onnx("RTDETR-640.onnx", model_name="rtdetr_r18vd", image_size=640)
+
+
 .. card::
     :link: https://arxiv.org/abs/2304.08069
 
diff --git a/kornia/io/io.py b/kornia/io/io.py
index 62d9ed45b9..334ecb2828 100644
--- a/kornia/io/io.py
+++ b/kornia/io/io.py
@@ -65,7 +65,11 @@ def _to_uint8(image: Tensor) -> Tensor:
     return image.mul(255.0).byte()
 
 
-def load_image(path_file: str | Path, desired_type: ImageLoadType, device: Device = "cpu") -> Tensor:
+def load_image(
+    path_file: str | Path,
+    desired_type: ImageLoadType = ImageLoadType.RGB32,
+    device: Device = "cpu"
+) -> Tensor:
     """Read an image file and decode using the Kornia Rust backend.
 
     Args:
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index bfcb1944f7..e8a963cd0d 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -34,6 +34,7 @@ def build(
         Args:
             model_name:
                 Name of the RT-DETR model to load. Can be one of the available pretrained models.
+                Including 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
             config:
                 A custom configuration object for building the RT-DETR model.
             pretrained:

From 7e87160a3e64dc44177eda566a55985b40764322 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 7 Sep 2024 09:54:38 +0000
Subject: [PATCH 26/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/io/io.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kornia/io/io.py b/kornia/io/io.py
index 334ecb2828..1e8797d8b2 100644
--- a/kornia/io/io.py
+++ b/kornia/io/io.py
@@ -66,9 +66,7 @@ def _to_uint8(image: Tensor) -> Tensor:
 
 
 def load_image(
-    path_file: str | Path,
-    desired_type: ImageLoadType = ImageLoadType.RGB32,
-    device: Device = "cpu"
+    path_file: str | Path, desired_type: ImageLoadType = ImageLoadType.RGB32, device: Device = "cpu"
 ) -> Tensor:
     """Read an image file and decode using the Kornia Rust backend.
 

From db1cb534d99cf87beb704bcb4fd56995cc684f65 Mon Sep 17 00:00:00 2001
From: edgar <edgar.riba@gmail.com>
Date: Sat, 7 Sep 2024 22:42:52 +0200
Subject: [PATCH 27/46] post processor as in the original codew

---
 .../contrib/models/rt_detr/post_processor.py  | 47 ++++++++++++++-----
 kornia/contrib/object_detection.py            | 16 ++++---
 2 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index 95ad6109c7..0ae898ad5e 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -1,11 +1,35 @@
+"""Post-processor for the RT-DETR model."""
+
 from __future__ import annotations
 
-# TODO:
 import torch
 
 from kornia.core import Module, Tensor, concatenate
 
 
+def mod(a, b):
+    """Compute the modulo operation for two numbers.
+
+    This function calculates the remainder of the division of 'a' by 'b'
+    using the formula: a - (a // b) * b, which is equivalent to the modulo operation.
+
+    Args:
+        a: The dividend.
+        b: The divisor.
+
+    Returns:
+        The remainder of a divided by b.
+
+    Example:
+        >>> mod(7, 3)
+        1
+        >>> mod(8.5, 3.2)
+        2.1
+    """
+    return a - (a // b) * b
+
+
+# TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter
 class DETRPostProcessor(Module):
     def __init__(self, confidence_threshold: float) -> None:
         super().__init__()
@@ -45,16 +69,13 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list
         boxes_xy = boxes_xy * sizes_wh
         scores = logits.sigmoid()  # RT-DETR was trained with focal loss. thus sigmoid is used instead of softmax
 
-        # the original code is slightly different
-        # it allows 1 bounding box to have multiple classes (multi-label)
-        scores, labels = scores.max(-1)
-
-        detections: list[Tensor] = []
-        for i in range(scores.shape[0]):
-            mask = scores[i] >= self.confidence_threshold
-            labels_i = labels[i, mask].unsqueeze(-1)
-            scores_i = scores[i, mask].unsqueeze(-1)
-            boxes_i = boxes_xy[i, mask]
-            detections.append(concatenate([labels_i, scores_i, boxes_i], -1))
+        # retrieve the boxes with the highest score for each class
+        # https://github.com/lyuwenyu/RT-DETR/blob/b6bf0200b249a6e35b44e0308b6058f55b99696b/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py#L55-L62
+        num_top_queries = 300  # TODO: make this configurable
+        num_classes = 80  # TODO: make this configurable
+        scores, index = torch.topk(scores.flatten(1), num_top_queries, dim=-1)
+        labels = mod(index, num_classes)
+        index = index // num_classes
+        boxes = boxes_xy.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes_xy.shape[-1]))
 
-        return detections
+        return concatenate([labels[..., None], scores[..., None], boxes], -1)
diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index b348899a3b..9a289b4f17 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -12,6 +12,7 @@
 from kornia.core.check import KORNIA_CHECK_SHAPE
 from kornia.core.external import PILImage as Image
 from kornia.core.external import numpy as np
+from kornia.geometry.transform import resize
 from kornia.io import write_image
 from kornia.utils.draw import draw_rectangle
 
@@ -126,13 +127,12 @@ def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]:
         """
         # TODO: support other input formats e.g. file path, numpy
         resized_imgs, original_sizes = [], []
-        for i in range(len(imgs)):
+        for i in range(imgs.shape[0]):
             img = imgs[i]
-            # NOTE: assume that image layout is CHW
             original_sizes.append([img.shape[-2], img.shape[-1]])
             resized_imgs.append(
-                # TODO: fix kornia resize to support onnx
-                torch.nn.functional.interpolate(img.unsqueeze(0), size=self.size, mode=self.interpolation_mode)
+                # TODO: fix kornia resize warnings
+                resize(img[None], size=self.size, interpolation=self.interpolation_mode)
             )
         return concatenate(resized_imgs), as_tensor(original_sizes)
 
@@ -181,7 +181,8 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor]
             out_img = image[None].clone()
             for out in detection:
                 out_img = draw_rectangle(
-                    out_img, torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]])
+                    out_img,
+                    torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]),
                 )
             if output_type == "torch":
                 output.append(out_img[0])
@@ -204,7 +205,10 @@ def save(self, images: list[Tensor], directory: Optional[str] = None) -> None:
         outputs = self.draw(images)
         os.makedirs(directory, exist_ok=True)
         for i, out_image in enumerate(outputs):
-            write_image(os.path.join(directory, f"{str(i).zfill(6)}.jpg"), out_image.mul(255.0).byte())
+            write_image(
+                os.path.join(directory, f"{str(i).zfill(6)}.jpg"),
+                out_image.mul(255.0).byte(),
+            )
 
     def compile(
         self,

From 82f206220b31735a44da64d7ba50853876c71a38 Mon Sep 17 00:00:00 2001
From: edgar <edgar.riba@gmail.com>
Date: Sun, 8 Sep 2024 11:45:38 +0200
Subject: [PATCH 28/46] fix typing

---
 kornia/contrib/models/rt_detr/post_processor.py | 8 ++++----
 kornia/contrib/object_detection.py              | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index 0ae898ad5e..3a7d8117c3 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -7,7 +7,7 @@
 from kornia.core import Module, Tensor, concatenate
 
 
-def mod(a, b):
+def mod(a: Tensor, b: int) -> Tensor:
     """Compute the modulo operation for two numbers.
 
     This function calculates the remainder of the division of 'a' by 'b'
@@ -35,7 +35,7 @@ def __init__(self, confidence_threshold: float) -> None:
         super().__init__()
         self.confidence_threshold = confidence_threshold
 
-    def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list[Tensor]:
+    def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tensor:
         """Post-process outputs from DETR.
 
         Args:
@@ -71,8 +71,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list
 
         # retrieve the boxes with the highest score for each class
         # https://github.com/lyuwenyu/RT-DETR/blob/b6bf0200b249a6e35b44e0308b6058f55b99696b/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py#L55-L62
-        num_top_queries = 300  # TODO: make this configurable
-        num_classes = 80  # TODO: make this configurable
+        num_top_queries: int = 300  # TODO: make this configurable
+        num_classes: int = 80  # TODO: make this configurable
         scores, index = torch.topk(scores.flatten(1), num_top_queries, dim=-1)
         labels = mod(index, num_classes)
         index = index // num_classes
diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 9a289b4f17..c609214837 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -119,7 +119,7 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear")
         self.size = size
         self.interpolation_mode = interpolation_mode
 
-    def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]:
+    def forward(self, imgs: Tensor) -> tuple[Tensor, Tensor]:
         """
         Returns:
             resized_imgs: resized images in a batch.

From fea1f92f85a617ff2e0ef372f0703e825852b13a Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 01:42:13 +0800
Subject: [PATCH 29/46] update

---
 kornia/color/yuv.py                           |  2 +-
 .../contrib/models/rt_detr/post_processor.py  | 11 ++++---
 kornia/contrib/object_detection.py            | 33 ++++++++++++-------
 kornia/core/external.py                       | 28 +++++++++++++---
 kornia/geometry/transform/affwarp.py          |  6 ++--
 kornia/models/detector/__init__.py            |  1 +
 kornia/models/detector/rtdetr.py              | 25 +++++++-------
 kornia/utils/image.py                         |  2 +-
 8 files changed, 73 insertions(+), 35 deletions(-)

diff --git a/kornia/color/yuv.py b/kornia/color/yuv.py
index e250e1ac36..1334be1089 100644
--- a/kornia/color/yuv.py
+++ b/kornia/color/yuv.py
@@ -122,7 +122,7 @@ def yuv_to_rgb(image: Tensor) -> Tensor:
     if not isinstance(image, Tensor):
         raise TypeError(f"Input type is not a Tensor. Got {type(image)}")
 
-    if len(image.shape) < 3 or image.shape[-3] != 3:
+    if image.dim() < 3 or image.shape[-3] != 3:
         raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}")
 
     y: Tensor = image[..., 0, :, :]
diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index 3a7d8117c3..8f79953bfc 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -61,10 +61,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens
         cxcy, wh = boxes[..., :2], boxes[..., 2:]
         boxes_xy = concatenate([cxcy - wh * 0.5, wh], -1)
 
-        sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype)
-        sizes_wh[..., 0] = original_sizes[0][1]
-        sizes_wh[..., 1] = original_sizes[0][0]
-        sizes_wh = sizes_wh.repeat(1, 1, 2)
+        # Get dynamic size from the input tensor itself
+        sizes_wh = original_sizes[0].flip(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 2)
 
         boxes_xy = boxes_xy * sizes_wh
         scores = logits.sigmoid()  # RT-DETR was trained with focal loss. thus sigmoid is used instead of softmax
@@ -78,4 +76,7 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens
         index = index // num_classes
         boxes = boxes_xy.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes_xy.shape[-1]))
 
-        return concatenate([labels[..., None], scores[..., None], boxes], -1)
+        all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1)
+
+        return all_boxes[(all_boxes[:, :, 1] > self.confidence_threshold).unsqueeze(-1).expand_as(all_boxes)].view(
+            all_boxes.shape[0], -1, all_boxes.shape[-1])
diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index c609214837..cffbac333c 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -4,7 +4,7 @@
 import os
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional
+from typing import Optional, Union
 
 import torch
 
@@ -119,7 +119,7 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear")
         self.size = size
         self.interpolation_mode = interpolation_mode
 
-    def forward(self, imgs: Tensor) -> tuple[Tensor, Tensor]:
+    def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]:
         """
         Returns:
             resized_imgs: resized images in a batch.
@@ -127,14 +127,18 @@ def forward(self, imgs: Tensor) -> tuple[Tensor, Tensor]:
         """
         # TODO: support other input formats e.g. file path, numpy
         resized_imgs, original_sizes = [], []
-        for i in range(imgs.shape[0]):
+
+        iters = len(imgs) if isinstance(imgs, list) else imgs.shape[0]
+        original_sizes = imgs.new_zeros((imgs.shape[0], 2))
+        for i in range(iters):
             img = imgs[i]
-            original_sizes.append([img.shape[-2], img.shape[-1]])
+            original_sizes[i, 0] = img.shape[-2]  # Height
+            original_sizes[i, 1] = img.shape[-1]  # Width
             resized_imgs.append(
                 # TODO: fix kornia resize warnings
                 resize(img[None], size=self.size, interpolation=self.interpolation_mode)
             )
-        return concatenate(resized_imgs), as_tensor(original_sizes)
+        return concatenate(resized_imgs), original_sizes
 
 
 # TODO: move this to kornia.models as AlgorithmicModel api
@@ -155,11 +159,12 @@ def __init__(self, model: Module, pre_processor: Module, post_processor: Module)
         self.post_processor = post_processor.eval()
 
     @torch.inference_mode()
-    def forward(self, images: list[Tensor]) -> list[Tensor]:
+    def forward(self, images: Union[Tensor, list[Tensor]]) -> list[Tensor]:
         """Detect objects in a given list of images.
 
         Args:
-            images: list of RGB images. Each image is a Tensor with shape :math:`(3, H, W)`.
+            images: If list of RGB images. Each image is a Tensor with shape :math:`(3, H, W)`.
+                If Tensor, a Tensor with shape :math:`(B, 3, H, W)`.
 
         Returns:
             list of detections found in each image. For item in a batch, shape is :math:`(D, 6)`, where :math:`D` is the
@@ -170,12 +175,15 @@ def forward(self, images: list[Tensor]) -> list[Tensor]:
         detections = self.post_processor(logits, boxes, images_sizes)
         return detections
 
-    def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | list[Image.Image]:  # type: ignore
+    def draw(
+        self, images: Union[Tensor, list[Tensor]], detections: Optional[Tensor] = None, output_type: str = "torch"
+    ) -> Union[Tensor, list[Tensor], list[Image.Image]]:  # type: ignore
         """Very simple drawing.
 
         Needs to be more fancy later.
         """
-        detections = self.forward(images)
+        if detections is None:
+            detections = self.forward(images)
         output = []
         for image, detection in zip(images, detections):
             out_img = image[None].clone()
@@ -192,7 +200,9 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor]
                 raise RuntimeError(f"Unsupported output type `{output_type}`.")
         return output
 
-    def save(self, images: list[Tensor], directory: Optional[str] = None) -> None:
+    def save(
+        self, images: Union[Tensor, list[Tensor]], detections: Optional[Tensor] = None, directory: Optional[str] = None
+    ) -> None:
         """Saves the output image(s) to a directory.
 
         Args:
@@ -202,13 +212,14 @@ def save(self, images: list[Tensor], directory: Optional[str] = None) -> None:
         if directory is None:
             name = f"detection-{datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y%m%d%H%M%S')!s}"
             directory = os.path.join("Kornia_outputs", name)
-        outputs = self.draw(images)
+        outputs = self.draw(images, detections)
         os.makedirs(directory, exist_ok=True)
         for i, out_image in enumerate(outputs):
             write_image(
                 os.path.join(directory, f"{str(i).zfill(6)}.jpg"),
                 out_image.mul(255.0).byte(),
             )
+        print(f"Outputs are saved in {directory}")
 
     def compile(
         self,
diff --git a/kornia/core/external.py b/kornia/core/external.py
index 4efdbfe189..cee492f250 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -1,4 +1,6 @@
 import importlib
+import subprocess
+
 from types import ModuleType
 from typing import List, Optional
 
@@ -14,6 +16,7 @@ class LazyLoader:
         module_name: The name of the module to be lazily loaded.
         module: The actual module object, initialized to None and loaded upon first access.
     """
+    auto_install: bool = False
 
     def __init__(self, module_name: str) -> None:
         """Initializes the LazyLoader with the name of the module.
@@ -24,6 +27,10 @@ def __init__(self, module_name: str) -> None:
         self.module_name = module_name
         self.module: Optional[ModuleType] = None
 
+    def _install_package(self, module_name: str) -> None:
+        print(f"Installing `{self.module_name}` ...")
+        subprocess.run(["pip", "install", "-U", self.module_name]) 
+
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.
 
@@ -34,10 +41,23 @@ def _load(self) -> None:
             try:
                 self.module = importlib.import_module(self.module_name)
             except ImportError as e:
-                raise ImportError(
-                    f"Optional dependency '{self.module_name}' is not installed. "
-                    f"Please install it to use this functionality."
-                ) from e
+                if self.auto_install:
+                    self._install_package(self.module_name)
+                else:
+                    if_install = input(
+                        f"Optional dependency '{self.module_name}' is not installed. "
+                        "Do you wish to install the dependency? [Y]es, [N]o, [A]ll."
+                    )
+                    if if_install.lower() == "y":
+                        subprocess.run(["pip", "install", "-U", self.module_name]) 
+                    elif if_install.lower() == "a":
+                        subprocess.run(["pip", "install", "-U", self.module_name]) 
+                        self.auto_install = True
+                    else:
+                        raise ImportError(
+                            f"Optional dependency '{self.module_name}' is not installed. "
+                            f"Please install it to use this functionality."
+                        ) from e
 
     def __getattr__(self, item: str) -> object:
         """Loads the module (if not already loaded) and returns the requested attribute.
diff --git a/kornia/geometry/transform/affwarp.py b/kornia/geometry/transform/affwarp.py
index b0abb14fe1..61cf05f4d4 100644
--- a/kornia/geometry/transform/affwarp.py
+++ b/kornia/geometry/transform/affwarp.py
@@ -570,8 +570,10 @@ def resize(
         aspect_ratio = w / h
         size = _side_to_image_size(size, aspect_ratio, side)
 
-    if size == input_size:
-        return input
+    # Skip this dangerous if-else when converting to ONNX.
+    if not torch.onnx.is_in_onnx_export():
+        if size == input_size:
+            return input
 
     factors = (h / size[0], w / size[1])
 
diff --git a/kornia/models/detector/__init__.py b/kornia/models/detector/__init__.py
index e69de29bb2..55a62efc22 100644
--- a/kornia/models/detector/__init__.py
+++ b/kornia/models/detector/__init__.py
@@ -0,0 +1 @@
+from .rtdetr import *
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index e8a963cd0d..492d7ea167 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -9,6 +9,8 @@
 from kornia.contrib.object_detection import ObjectDetector, ResizePreProcessor
 from kornia.core import rand
 
+__all__ = ["RTDETRDetectorBuilder"]
+
 
 class RTDETRDetectorBuilder:
     """A builder class for constructing RT-DETR object detection models.
@@ -72,13 +74,13 @@ def build(
 
     @staticmethod
     def to_onnx(
-        onnx_name: Optional[str] = None,
         model_name: Optional[str] = None,
+        onnx_name: Optional[str] = None,
         config: Optional[RTDETRConfig] = None,
         pretrained: bool = True,
         image_size: Optional[int] = 640,
         confidence_threshold: float = 0.5,
-    ) -> None:
+    ) -> tuple[str, ObjectDetector]:
         """Exports an RT-DETR object detection model to ONNX format.
 
         Either `model_name` or `config` must be provided. If neither is provided,
@@ -99,8 +101,8 @@ def to_onnx(
                 The confidence threshold used during post-processing to filter detections.
 
         Returns:
-            ObjectDetector
-                An object detector instance initialized with the specified model, preprocessor, and post-processor.
+            - The name of the ONNX model.
+            - The exported torch model.
         """
 
         detector = RTDETRDetectorBuilder.build(
@@ -118,15 +120,14 @@ def to_onnx(
                 _model_name = "rtdetr_r18vd"
             onnx_name = f"Kornia-RTDETR-{_model_name}-{image_size}.onnx"
 
+        val_image = rand(1, 3, image_size, image_size)
         if image_size is None:
             val_image = rand(1, 3, 640, 640)
-            dynamic_axes = {
-                "input": {0: "batch_size", 2: "height", 3: "width"},
-                "output": {0: "batch_size", 2: "height", 3: "width"},
-            }
-        else:
-            val_image = rand(1, 3, image_size, image_size)
-            dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}}
+
+        dynamic_axes = {
+            "input": {0: "batch_size", 2: "height", 3: "width"},
+            "output": {0: "batch_size"}
+        }
         torch.onnx.export(
             detector,
             val_image,
@@ -138,3 +139,5 @@ def to_onnx(
             output_names=["output"],
             dynamic_axes=dynamic_axes,
         )
+
+        return onnx_name, detector
diff --git a/kornia/utils/image.py b/kornia/utils/image.py
index a3f6a76393..05293e757d 100644
--- a/kornia/utils/image.py
+++ b/kornia/utils/image.py
@@ -264,7 +264,7 @@ def _wrapper(input: Tensor, *args: Any, **kwargs: Any) -> Tensor:
         if not isinstance(input, Tensor):
             raise TypeError(f"Input input type is not a Tensor. Got {type(input)}")
 
-        if input.numel() == 0:
+        if input.shape.numel() == 0:
             raise ValueError("Invalid input tensor, it is empty.")
 
         input_shape = input.shape

From 6f4b5f306e47403e0d49229e50fa0143524b5ecc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 8 Sep 2024 17:42:33 +0000
Subject: [PATCH 30/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/models/rt_detr/post_processor.py | 3 ++-
 kornia/contrib/object_detection.py              | 2 +-
 kornia/core/external.py                         | 8 ++++----
 kornia/models/detector/rtdetr.py                | 5 +----
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index 8f79953bfc..17ff06315d 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -79,4 +79,5 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens
         all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1)
 
         return all_boxes[(all_boxes[:, :, 1] > self.confidence_threshold).unsqueeze(-1).expand_as(all_boxes)].view(
-            all_boxes.shape[0], -1, all_boxes.shape[-1])
+            all_boxes.shape[0], -1, all_boxes.shape[-1]
+        )
diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index cffbac333c..ed0131511e 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -8,7 +8,7 @@
 
 import torch
 
-from kornia.core import Module, Tensor, as_tensor, concatenate
+from kornia.core import Module, Tensor, concatenate
 from kornia.core.check import KORNIA_CHECK_SHAPE
 from kornia.core.external import PILImage as Image
 from kornia.core.external import numpy as np
diff --git a/kornia/core/external.py b/kornia/core/external.py
index cee492f250..fa0af1a906 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -1,6 +1,5 @@
 import importlib
 import subprocess
-
 from types import ModuleType
 from typing import List, Optional
 
@@ -16,6 +15,7 @@ class LazyLoader:
         module_name: The name of the module to be lazily loaded.
         module: The actual module object, initialized to None and loaded upon first access.
     """
+
     auto_install: bool = False
 
     def __init__(self, module_name: str) -> None:
@@ -29,7 +29,7 @@ def __init__(self, module_name: str) -> None:
 
     def _install_package(self, module_name: str) -> None:
         print(f"Installing `{self.module_name}` ...")
-        subprocess.run(["pip", "install", "-U", self.module_name]) 
+        subprocess.run(["pip", "install", "-U", self.module_name], check=False)
 
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.
@@ -49,9 +49,9 @@ def _load(self) -> None:
                         "Do you wish to install the dependency? [Y]es, [N]o, [A]ll."
                     )
                     if if_install.lower() == "y":
-                        subprocess.run(["pip", "install", "-U", self.module_name]) 
+                        subprocess.run(["pip", "install", "-U", self.module_name], check=False)
                     elif if_install.lower() == "a":
-                        subprocess.run(["pip", "install", "-U", self.module_name]) 
+                        subprocess.run(["pip", "install", "-U", self.module_name], check=False)
                         self.auto_install = True
                     else:
                         raise ImportError(
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index 492d7ea167..d62a638df7 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -124,10 +124,7 @@ def to_onnx(
         if image_size is None:
             val_image = rand(1, 3, 640, 640)
 
-        dynamic_axes = {
-            "input": {0: "batch_size", 2: "height", 3: "width"},
-            "output": {0: "batch_size"}
-        }
+        dynamic_axes = {"input": {0: "batch_size", 2: "height", 3: "width"}, "output": {0: "batch_size"}}
         torch.onnx.export(
             detector,
             val_image,

From f4a8b128a0e03169f894f89e950cca1b07c726f7 Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 02:34:51 +0800
Subject: [PATCH 31/46] update

---
 kornia/contrib/models/rt_detr/post_processor.py |  8 +++++++-
 kornia/models/detector/rtdetr.py                | 15 ++++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index 17ff06315d..9538364def 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -1,6 +1,7 @@
 """Post-processor for the RT-DETR model."""
 
 from __future__ import annotations
+from typing import Optional
 
 import torch
 
@@ -31,9 +32,11 @@ def mod(a: Tensor, b: int) -> Tensor:
 
 # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter
 class DETRPostProcessor(Module):
-    def __init__(self, confidence_threshold: float) -> None:
+    def __init__(self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True) -> None:
         super().__init__()
         self.confidence_threshold = confidence_threshold
+        self.num_classes = num_classes
+        self.confidence_filtering = confidence_filtering
 
     def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tensor:
         """Post-process outputs from DETR.
@@ -78,6 +81,9 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens
 
         all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1)
 
+        if not self.confidence_filtering or self.confidence_threshold == 0:
+            return all_boxes
+
         return all_boxes[(all_boxes[:, :, 1] > self.confidence_threshold).unsqueeze(-1).expand_as(all_boxes)].view(
             all_boxes.shape[0], -1, all_boxes.shape[-1]
         )
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index d62a638df7..523c4564ea 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -27,6 +27,7 @@ def build(
         pretrained: bool = True,
         image_size: Optional[int] = 640,
         confidence_threshold: float = 0.5,
+        confidence_filtering: Optional[bool] = None,
     ) -> ObjectDetector:
         """Builds and returns an RT-DETR object detector model.
 
@@ -47,6 +48,9 @@ def build(
                 [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800].
             confidence_threshold:
                 The confidence threshold used during post-processing to filter detections.
+            confidence_filtering:
+                If to perform filtering on resulting boxes. If None, the filtering will be blocked when exporting
+                to ONNX, while it would perform as per confidence_threshold when build the model.
 
         Returns:
             ObjectDetector
@@ -69,7 +73,11 @@ def build(
         return ObjectDetector(
             model,
             ResizePreProcessor((image_size, image_size)) if image_size is not None else nn.Identity(),
-            DETRPostProcessor(confidence_threshold),
+            DETRPostProcessor(
+                confidence_threshold,
+                num_classes=config.num_classes if config is not None else 80,
+                confidence_filtering=confidence_filtering or not torch.onnx.is_in_onnx_export
+            ),
         )
 
     @staticmethod
@@ -80,6 +88,7 @@ def to_onnx(
         pretrained: bool = True,
         image_size: Optional[int] = 640,
         confidence_threshold: float = 0.5,
+        confidence_filtering: Optional[bool] = None,
     ) -> tuple[str, ObjectDetector]:
         """Exports an RT-DETR object detection model to ONNX format.
 
@@ -99,6 +108,9 @@ def to_onnx(
                 [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800].
             confidence_threshold:
                 The confidence threshold used during post-processing to filter detections.
+            confidence_filtering:
+                If to perform filtering on resulting boxes. If None, the filtering will be blocked when exporting
+                to ONNX, while it would perform as per confidence_threshold when build the model.
 
         Returns:
             - The name of the ONNX model.
@@ -111,6 +123,7 @@ def to_onnx(
             pretrained=pretrained,
             image_size=image_size,
             confidence_threshold=confidence_threshold,
+            confidence_filtering=confidence_filtering,
         )
         if onnx_name is None:
             _model_name = model_name

From 9f6a0c2070a1603e9ec0dadbbb5ccce23ecaa7d5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 8 Sep 2024 18:35:26 +0000
Subject: [PATCH 32/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/models/rt_detr/post_processor.py | 5 ++++-
 kornia/models/detector/rtdetr.py                | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index 9538364def..a977e940e3 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -1,6 +1,7 @@
 """Post-processor for the RT-DETR model."""
 
 from __future__ import annotations
+
 from typing import Optional
 
 import torch
@@ -32,7 +33,9 @@ def mod(a: Tensor, b: int) -> Tensor:
 
 # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter
 class DETRPostProcessor(Module):
-    def __init__(self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True) -> None:
+    def __init__(
+        self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True
+    ) -> None:
         super().__init__()
         self.confidence_threshold = confidence_threshold
         self.num_classes = num_classes
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index 523c4564ea..0e35953b00 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -76,7 +76,7 @@ def build(
             DETRPostProcessor(
                 confidence_threshold,
                 num_classes=config.num_classes if config is not None else 80,
-                confidence_filtering=confidence_filtering or not torch.onnx.is_in_onnx_export
+                confidence_filtering=confidence_filtering or not torch.onnx.is_in_onnx_export,
             ),
         )
 

From 33e454c75ed89b523c24b5378616d834b6a95ee0 Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 02:44:59 +0800
Subject: [PATCH 33/46] update

---
 kornia/contrib/object_detection.py | 11 +++++++----
 kornia/core/external.py            |  5 ++++-
 kornia/models/detector/rtdetr.py   |  3 ++-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index ed0131511e..322e787839 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -4,6 +4,7 @@
 import os
 from dataclasses import dataclass
 from enum import Enum
+import logging
 from typing import Optional, Union
 
 import torch
@@ -25,6 +26,8 @@
     "ObjectDetectorResult",
 ]
 
+logger = logging.getLogger(__name__)
+
 
 class BoundingBoxDataFormat(Enum):
     """Enum class that maps bounding box data format."""
@@ -126,10 +129,10 @@ def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]:
             original_sizes: the original image sizes of (height, width).
         """
         # TODO: support other input formats e.g. file path, numpy
-        resized_imgs, original_sizes = [], []
+        resized_imgs: list[Tensor] = []
 
         iters = len(imgs) if isinstance(imgs, list) else imgs.shape[0]
-        original_sizes = imgs.new_zeros((imgs.shape[0], 2))
+        original_sizes = imgs.new_zeros((iters, 2))
         for i in range(iters):
             img = imgs[i]
             original_sizes[i, 0] = img.shape[-2]  # Height
@@ -159,7 +162,7 @@ def __init__(self, model: Module, pre_processor: Module, post_processor: Module)
         self.post_processor = post_processor.eval()
 
     @torch.inference_mode()
-    def forward(self, images: Union[Tensor, list[Tensor]]) -> list[Tensor]:
+    def forward(self, images: Union[Tensor, list[Tensor]]) -> Tensor:
         """Detect objects in a given list of images.
 
         Args:
@@ -219,7 +222,7 @@ def save(
                 os.path.join(directory, f"{str(i).zfill(6)}.jpg"),
                 out_image.mul(255.0).byte(),
             )
-        print(f"Outputs are saved in {directory}")
+        logger.info(f"Outputs are saved in {directory}")
 
     def compile(
         self,
diff --git a/kornia/core/external.py b/kornia/core/external.py
index fa0af1a906..2d61c2fb30 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -2,6 +2,9 @@
 import subprocess
 from types import ModuleType
 from typing import List, Optional
+import logging
+
+logger = logging.getLogger(__name__)
 
 
 class LazyLoader:
@@ -28,7 +31,7 @@ def __init__(self, module_name: str) -> None:
         self.module: Optional[ModuleType] = None
 
     def _install_package(self, module_name: str) -> None:
-        print(f"Installing `{self.module_name}` ...")
+        logger.info(f"Installing `{self.module_name}` ...")
         subprocess.run(["pip", "install", "-U", self.module_name], check=False)
 
     def _load(self) -> None:
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index 523c4564ea..c84c3f876a 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -133,9 +133,10 @@ def to_onnx(
                 _model_name = "rtdetr_r18vd"
             onnx_name = f"Kornia-RTDETR-{_model_name}-{image_size}.onnx"
 
-        val_image = rand(1, 3, image_size, image_size)
         if image_size is None:
             val_image = rand(1, 3, 640, 640)
+        else:
+            val_image = rand(1, 3, image_size, image_size)
 
         dynamic_axes = {"input": {0: "batch_size", 2: "height", 3: "width"}, "output": {0: "batch_size"}}
         torch.onnx.export(

From 2f3f531a4f9c9aab757951db7a7d8fbea687d025 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 8 Sep 2024 18:45:48 +0000
Subject: [PATCH 34/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/object_detection.py | 2 +-
 kornia/core/external.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 322e787839..2fdb50cae2 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 import datetime
+import logging
 import os
 from dataclasses import dataclass
 from enum import Enum
-import logging
 from typing import Optional, Union
 
 import torch
diff --git a/kornia/core/external.py b/kornia/core/external.py
index 2d61c2fb30..0f467eb60f 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -1,8 +1,8 @@
 import importlib
+import logging
 import subprocess
 from types import ModuleType
 from typing import List, Optional
-import logging
 
 logger = logging.getLogger(__name__)
 

From 6790ed1c65f6745a4bcb9952e00f48eebb08c365 Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 02:51:40 +0800
Subject: [PATCH 35/46] update

---
 kornia/contrib/object_detection.py | 3 +--
 kornia/core/external.py            | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index 322e787839..d669b426e8 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -132,13 +132,12 @@ def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]:
         resized_imgs: list[Tensor] = []
 
         iters = len(imgs) if isinstance(imgs, list) else imgs.shape[0]
-        original_sizes = imgs.new_zeros((iters, 2))
+        original_sizes = imgs[0].new_zeros((iters, 2))
         for i in range(iters):
             img = imgs[i]
             original_sizes[i, 0] = img.shape[-2]  # Height
             original_sizes[i, 1] = img.shape[-1]  # Width
             resized_imgs.append(
-                # TODO: fix kornia resize warnings
                 resize(img[None], size=self.size, interpolation=self.interpolation_mode)
             )
         return concatenate(resized_imgs), original_sizes
diff --git a/kornia/core/external.py b/kornia/core/external.py
index 2d61c2fb30..33ccf18c01 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -32,7 +32,7 @@ def __init__(self, module_name: str) -> None:
 
     def _install_package(self, module_name: str) -> None:
         logger.info(f"Installing `{self.module_name}` ...")
-        subprocess.run(["pip", "install", "-U", self.module_name], check=False)
+        subprocess.run(["pip", "install", "-U", self.module_name], shell=False)
 
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.
@@ -52,10 +52,10 @@ def _load(self) -> None:
                         "Do you wish to install the dependency? [Y]es, [N]o, [A]ll."
                     )
                     if if_install.lower() == "y":
-                        subprocess.run(["pip", "install", "-U", self.module_name], check=False)
+                        self._install_package(self.module_name)
                     elif if_install.lower() == "a":
-                        subprocess.run(["pip", "install", "-U", self.module_name], check=False)
                         self.auto_install = True
+                        self._install_package(self.module_name)
                     else:
                         raise ImportError(
                             f"Optional dependency '{self.module_name}' is not installed. "

From fb07bb9ff99dbd765ab39a30d0ddf8afc51e6591 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 8 Sep 2024 18:52:07 +0000
Subject: [PATCH 36/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/object_detection.py | 4 +---
 kornia/core/external.py            | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py
index acdbae0c9f..4a4b965dc2 100644
--- a/kornia/contrib/object_detection.py
+++ b/kornia/contrib/object_detection.py
@@ -137,9 +137,7 @@ def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]:
             img = imgs[i]
             original_sizes[i, 0] = img.shape[-2]  # Height
             original_sizes[i, 1] = img.shape[-1]  # Width
-            resized_imgs.append(
-                resize(img[None], size=self.size, interpolation=self.interpolation_mode)
-            )
+            resized_imgs.append(resize(img[None], size=self.size, interpolation=self.interpolation_mode))
         return concatenate(resized_imgs), original_sizes
 
 
diff --git a/kornia/core/external.py b/kornia/core/external.py
index 5ecf97a5b8..40bb7ca7b3 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -32,7 +32,7 @@ def __init__(self, module_name: str) -> None:
 
     def _install_package(self, module_name: str) -> None:
         logger.info(f"Installing `{self.module_name}` ...")
-        subprocess.run(["pip", "install", "-U", self.module_name], shell=False)
+        subprocess.run(["pip", "install", "-U", self.module_name], shell=False, check=False)
 
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.

From e84ab3a761c6322e1bf631f5987de1a9e0687a8a Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 02:55:35 +0800
Subject: [PATCH 37/46] update

---
 kornia/core/external.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kornia/core/external.py b/kornia/core/external.py
index 5ecf97a5b8..66c022d779 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -32,7 +32,7 @@ def __init__(self, module_name: str) -> None:
 
     def _install_package(self, module_name: str) -> None:
         logger.info(f"Installing `{self.module_name}` ...")
-        subprocess.run(["pip", "install", "-U", self.module_name], shell=False)
+        subprocess.run(["pip", "install", "-U", self.module_name], shell=True)
 
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.

From 5f49f2801d4fed242e102c42f6b5544856080b1b Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 02:56:29 +0800
Subject: [PATCH 38/46] update

---
 kornia/core/external.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kornia/core/external.py b/kornia/core/external.py
index 66c022d779..ab992fb4a7 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -31,8 +31,8 @@ def __init__(self, module_name: str) -> None:
         self.module: Optional[ModuleType] = None
 
     def _install_package(self, module_name: str) -> None:
-        logger.info(f"Installing `{self.module_name}` ...")
-        subprocess.run(["pip", "install", "-U", self.module_name], shell=True)
+        logger.info(f"Installing `{module_name}` ...")
+        subprocess.run(["pip", "install", "-U", module_name], shell=True)
 
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.

From 41d94fffd449325e324bcf575da3d73d5ddf3c69 Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 02:58:29 +0800
Subject: [PATCH 39/46] update

---
 kornia/core/external.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/kornia/core/external.py b/kornia/core/external.py
index 6d793a61cf..5f56c669c6 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -31,13 +31,8 @@ def __init__(self, module_name: str) -> None:
         self.module: Optional[ModuleType] = None
 
     def _install_package(self, module_name: str) -> None:
-<<<<<<< HEAD
         logger.info(f"Installing `{module_name}` ...")
-        subprocess.run(["pip", "install", "-U", module_name], shell=True)
-=======
-        logger.info(f"Installing `{self.module_name}` ...")
-        subprocess.run(["pip", "install", "-U", self.module_name], shell=False, check=False)
->>>>>>> fb07bb9ff99dbd765ab39a30d0ddf8afc51e6591
+        subprocess.run(["pip", "install", "-U", module_name], shell=False, check=False)
 
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.

From 0dd67b5690bb50fbe93b2db4279cddf56695c484 Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 05:22:59 +0800
Subject: [PATCH 40/46] update

---
 kornia/contrib/models/rt_detr/post_processor.py | 12 ++++++------
 tests/contrib/test_object_detector.py           |  4 ++--
 tests/core/test_lazyloader.py                   |  4 +++-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index a977e940e3..5a600a3765 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -34,12 +34,14 @@ def mod(a: Tensor, b: int) -> Tensor:
 # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter
 class DETRPostProcessor(Module):
     def __init__(
-        self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True
+        self, confidence_threshold: Optional[float] = None, num_classes: int = 80,
+        num_top_queries: int = 300, confidence_filtering: bool = True
     ) -> None:
         super().__init__()
         self.confidence_threshold = confidence_threshold
         self.num_classes = num_classes
         self.confidence_filtering = confidence_filtering
+        self.num_top_queries = num_top_queries
 
     def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tensor:
         """Post-process outputs from DETR.
@@ -75,11 +77,9 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens
 
         # retrieve the boxes with the highest score for each class
         # https://github.com/lyuwenyu/RT-DETR/blob/b6bf0200b249a6e35b44e0308b6058f55b99696b/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py#L55-L62
-        num_top_queries: int = 300  # TODO: make this configurable
-        num_classes: int = 80  # TODO: make this configurable
-        scores, index = torch.topk(scores.flatten(1), num_top_queries, dim=-1)
-        labels = mod(index, num_classes)
-        index = index // num_classes
+        scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1)
+        labels = mod(index, self.num_classes)
+        index = index // self.num_classes
         boxes = boxes_xy.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes_xy.shape[-1]))
 
         all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1)
diff --git a/tests/contrib/test_object_detector.py b/tests/contrib/test_object_detector.py
index dd5c07aacb..e82facace0 100644
--- a/tests/contrib/test_object_detector.py
+++ b/tests/contrib/test_object_detector.py
@@ -17,7 +17,7 @@ def test_smoke(self, device, dtype):
         config = RTDETRConfig("resnet50d", 10, head_num_queries=10)
         model = RTDETR.from_config(config).to(device, dtype).eval()
         pre_processor = kornia.contrib.object_detection.ResizePreProcessor((32, 32))
-        post_processor = DETRPostProcessor(confidence).to(device, dtype).eval()
+        post_processor = DETRPostProcessor(confidence, num_top_queries=3).to(device, dtype).eval()
         detector = kornia.contrib.ObjectDetector(model, pre_processor, post_processor)
 
         sizes = torch.randint(5, 10, (batch_size, 2)) * 32
@@ -40,7 +40,7 @@ def test_onnx(self, device, dtype, tmp_path: Path, variant: str):
         config = RTDETRConfig(variant, 1)
         model = RTDETR.from_config(config).to(device=device, dtype=dtype).eval()
         pre_processor = kornia.contrib.object_detection.ResizePreProcessor(640)
-        post_processor = DETRPostProcessor(0.3)
+        post_processor = DETRPostProcessor(0.3, num_top_queries=3)
         detector = kornia.contrib.ObjectDetector(model, pre_processor, post_processor)
 
         data = torch.rand(3, 400, 640, device=device, dtype=dtype)
diff --git a/tests/core/test_lazyloader.py b/tests/core/test_lazyloader.py
index 1025f97827..13c227a32a 100644
--- a/tests/core/test_lazyloader.py
+++ b/tests/core/test_lazyloader.py
@@ -1,4 +1,5 @@
 import pytest
+from io import StringIO
 
 from kornia.core.external import LazyLoader
 
@@ -19,7 +20,8 @@ def test_lazy_loader_loading_module(self):
         assert loader.sqrt(4) == 2.0
         assert loader.module is not None  # Should be loaded now
 
-    def test_lazy_loader_invalid_module(self):
+    def test_lazy_loader_invalid_module(self, monkeypatch):
+        monkeypatch.setattr('sys.stdin', StringIO("n"))
         # Test that LazyLoader raises an ImportError for an invalid module
         loader = LazyLoader("non_existent_module")
         with pytest.raises(ImportError) as excinfo:

From 03edce3f371a43d724ea09173424ce5599860afb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 8 Sep 2024 21:23:15 +0000
Subject: [PATCH 41/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/models/rt_detr/post_processor.py | 7 +++++--
 tests/core/test_lazyloader.py                   | 5 +++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index 5a600a3765..ca1d37cd08 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -34,8 +34,11 @@ def mod(a: Tensor, b: int) -> Tensor:
 # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter
 class DETRPostProcessor(Module):
     def __init__(
-        self, confidence_threshold: Optional[float] = None, num_classes: int = 80,
-        num_top_queries: int = 300, confidence_filtering: bool = True
+        self,
+        confidence_threshold: Optional[float] = None,
+        num_classes: int = 80,
+        num_top_queries: int = 300,
+        confidence_filtering: bool = True,
     ) -> None:
         super().__init__()
         self.confidence_threshold = confidence_threshold
diff --git a/tests/core/test_lazyloader.py b/tests/core/test_lazyloader.py
index 13c227a32a..b832de99c8 100644
--- a/tests/core/test_lazyloader.py
+++ b/tests/core/test_lazyloader.py
@@ -1,6 +1,7 @@
-import pytest
 from io import StringIO
 
+import pytest
+
 from kornia.core.external import LazyLoader
 
 
@@ -21,7 +22,7 @@ def test_lazy_loader_loading_module(self):
         assert loader.module is not None  # Should be loaded now
 
     def test_lazy_loader_invalid_module(self, monkeypatch):
-        monkeypatch.setattr('sys.stdin', StringIO("n"))
+        monkeypatch.setattr("sys.stdin", StringIO("n"))
         # Test that LazyLoader raises an ImportError for an invalid module
         loader = LazyLoader("non_existent_module")
         with pytest.raises(ImportError) as excinfo:

From 11c6f7f9ecea37d41aa77115e816078dd880df8b Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 05:26:27 +0800
Subject: [PATCH 42/46] update

---
 kornia/core/external.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kornia/core/external.py b/kornia/core/external.py
index 5f56c669c6..1e0160035d 100644
--- a/kornia/core/external.py
+++ b/kornia/core/external.py
@@ -1,6 +1,7 @@
 import importlib
 import logging
 import subprocess
+import sys
 from types import ModuleType
 from typing import List, Optional
 
@@ -32,7 +33,7 @@ def __init__(self, module_name: str) -> None:
 
     def _install_package(self, module_name: str) -> None:
         logger.info(f"Installing `{module_name}` ...")
-        subprocess.run(["pip", "install", "-U", module_name], shell=False, check=False)
+        subprocess.run([sys.executable, "-m", "pip", "install", "-U", module_name], shell=False, check=False)  # noqa: S603
 
     def _load(self) -> None:
         """Loads the module if it hasn't been loaded yet.

From 365de3cb5b8b8520f208cc6a4bb327e60fc546db Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 06:08:21 +0800
Subject: [PATCH 43/46] update

---
 kornia/contrib/models/rt_detr/post_processor.py | 2 --
 kornia/geometry/transform/affwarp.py            | 3 +++
 tests/contrib/test_object_detector.py           | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py
index ca1d37cd08..95b35623df 100644
--- a/kornia/contrib/models/rt_detr/post_processor.py
+++ b/kornia/contrib/models/rt_detr/post_processor.py
@@ -25,8 +25,6 @@ def mod(a: Tensor, b: int) -> Tensor:
     Example:
         >>> mod(7, 3)
         1
-        >>> mod(8.5, 3.2)
-        2.1
     """
     return a - (a // b) * b
 
diff --git a/kornia/geometry/transform/affwarp.py b/kornia/geometry/transform/affwarp.py
index 61cf05f4d4..3e85a1da72 100644
--- a/kornia/geometry/transform/affwarp.py
+++ b/kornia/geometry/transform/affwarp.py
@@ -1,6 +1,7 @@
 from typing import Optional, Tuple, Union
 
 import torch
+import warnings
 from torch import nn
 
 from kornia.core import ones, ones_like, zeros
@@ -567,6 +568,8 @@ def resize(
 
     input_size = h, w = input.shape[-2:]
     if isinstance(size, int):
+        if torch.onnx.is_in_onnx_export():
+            warnings.warn("Please pass the size with a tuple when exporting to ONNX to correct the tracing.")
         aspect_ratio = w / h
         size = _side_to_image_size(size, aspect_ratio, side)
 
diff --git a/tests/contrib/test_object_detector.py b/tests/contrib/test_object_detector.py
index e82facace0..00eee98b81 100644
--- a/tests/contrib/test_object_detector.py
+++ b/tests/contrib/test_object_detector.py
@@ -39,7 +39,7 @@ def test_smoke(self, device, dtype):
     def test_onnx(self, device, dtype, tmp_path: Path, variant: str):
         config = RTDETRConfig(variant, 1)
         model = RTDETR.from_config(config).to(device=device, dtype=dtype).eval()
-        pre_processor = kornia.contrib.object_detection.ResizePreProcessor(640)
+        pre_processor = kornia.contrib.object_detection.ResizePreProcessor((640, 640))
         post_processor = DETRPostProcessor(0.3, num_top_queries=3)
         detector = kornia.contrib.ObjectDetector(model, pre_processor, post_processor)
 
@@ -55,7 +55,7 @@ def test_onnx(self, device, dtype, tmp_path: Path, variant: str):
             input_names=["images"],
             output_names=["detections"],
             dynamic_axes=dynamic_axes,
-            opset_version=16,
+            opset_version=17,
         )
 
         assert model_path.is_file()

From 224392ee54710aac17bcd08960b8487e14d7adc6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 8 Sep 2024 22:08:39 +0000
Subject: [PATCH 44/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/geometry/transform/affwarp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kornia/geometry/transform/affwarp.py b/kornia/geometry/transform/affwarp.py
index 3e85a1da72..c85c5ddf3d 100644
--- a/kornia/geometry/transform/affwarp.py
+++ b/kornia/geometry/transform/affwarp.py
@@ -1,7 +1,7 @@
+import warnings
 from typing import Optional, Tuple, Union
 
 import torch
-import warnings
 from torch import nn
 
 from kornia.core import ones, ones_like, zeros

From 0a2da30fbc72ac97891d8cb50dfad0524b4bc950 Mon Sep 17 00:00:00 2001
From: Jian S <sj8716643@126.com>
Date: Mon, 9 Sep 2024 15:33:30 +0800
Subject: [PATCH 45/46] update

---
 kornia/contrib/models/rt_detr/model.py | 10 +++++++++-
 kornia/models/detector/rtdetr.py       |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 6871b85260..512585b9e6 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -34,6 +34,7 @@ class RTDETRModelType(Enum):
     resnet101d = 3
     hgnetv2_l = 4
     hgnetv2_x = 5
+    resnet50d_m = 6
 
 
 @dataclass
@@ -130,6 +131,13 @@ def from_config(config: RTDETRConfig) -> RTDETR:
             head_num_decoder_layers = config.head_num_decoder_layers or 6
             neck_expansion = config.neck_expansion or 1.0
 
+        elif model_type == RTDETRModelType.resnet50d_m:
+            backbone = ResNetD.from_config(50)
+            neck_hidden_dim = config.neck_hidden_dim or 256
+            neck_dim_feedforward = config.neck_dim_feedforward or 1024
+            head_num_decoder_layers = config.head_num_decoder_layers or 6
+            neck_expansion = config.neck_expansion or .5
+
         elif model_type == RTDETRModelType.resnet101d:
             backbone = ResNetD.from_config(101)
             neck_hidden_dim = config.neck_hidden_dim or 384
@@ -227,7 +235,7 @@ def from_name(model_name: str, num_classes: int = 80) -> RTDETR:
         elif model_name == "rtdetr_r34vd":
             model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, num_classes))
         elif model_name == "rtdetr_r50vd_m":
-            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes))
+            model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d_m, num_classes))
         elif model_name == "rtdetr_r50vd":
             model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes))
         elif model_name == "rtdetr_r101vd":
diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py
index d8ce489586..4bc1c6bd2d 100644
--- a/kornia/models/detector/rtdetr.py
+++ b/kornia/models/detector/rtdetr.py
@@ -45,7 +45,7 @@ def build(
             image_size:
                 The size to which input images will be resized during preprocessing.
                 If None, no resizing will be performed before passing to the model. Recommended scales include
-                [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800].
+                [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800].
             confidence_threshold:
                 The confidence threshold used during post-processing to filter detections.
             confidence_filtering:
@@ -105,7 +105,7 @@ def to_onnx(
             image_size:
                 The size to which input images will be resized during preprocessing.
                 If None, image_size will be dynamic. Recommended scales include
-                [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800].
+                [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800].
             confidence_threshold:
                 The confidence threshold used during post-processing to filter detections.
             confidence_filtering:

From ab5fb53a3440287ecfd116c6e6124151a4a72197 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 9 Sep 2024 07:33:50 +0000
Subject: [PATCH 46/46] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 kornia/contrib/models/rt_detr/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py
index 512585b9e6..3e6e6226bb 100644
--- a/kornia/contrib/models/rt_detr/model.py
+++ b/kornia/contrib/models/rt_detr/model.py
@@ -136,7 +136,7 @@ def from_config(config: RTDETRConfig) -> RTDETR:
             neck_hidden_dim = config.neck_hidden_dim or 256
             neck_dim_feedforward = config.neck_dim_feedforward or 1024
             head_num_decoder_layers = config.head_num_decoder_layers or 6
-            neck_expansion = config.neck_expansion or .5
+            neck_expansion = config.neck_expansion or 0.5
 
         elif model_type == RTDETRModelType.resnet101d:
             backbone = ResNetD.from_config(101)