From 859c67d23b9f1cb32d815c7c847e62036592b79e Mon Sep 17 00:00:00 2001 From: shijianjian Date: Wed, 4 Sep 2024 14:49:16 +0300 Subject: [PATCH 01/46] update --- kornia/contrib/models/common.py | 14 +++- .../rt_detr/architecture/hybrid_encoder.py | 33 ++++++-- .../models/rt_detr/architecture/resnet_d.py | 79 ++++++++++-------- .../rt_detr/architecture/rtdetr_head.py | 38 ++++----- kornia/contrib/models/rt_detr/model.py | 83 +++++++++++++++++-- 5 files changed, 176 insertions(+), 71 deletions(-) diff --git a/kornia/contrib/models/common.py b/kornia/contrib/models/common.py index 7ad86d54c6..375dec4b43 100644 --- a/kornia/contrib/models/common.py +++ b/kornia/contrib/models/common.py @@ -7,9 +7,11 @@ from kornia.core import Module, Tensor, pad + class ConvNormAct(nn.Sequential): def __init__( - self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, act: str = "relu", groups: int = 1 + self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, act: str = "relu", groups: int = 1, + conv_naming: str = "conv", norm_naming: str = "norm", act_naming: str = "act" ) -> None: super().__init__() if kernel_size % 2 == 0: @@ -23,9 +25,13 @@ def __init__( padding = 0 else: padding = (kernel_size - 1) // 2 - self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, 1, groups, False) - self.norm = nn.BatchNorm2d(out_channels) - self.act = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True) + conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, 1, groups, False) + norm = nn.BatchNorm2d(out_channels) + act = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True) + + self.__setattr__(conv_naming, conv) + self.__setattr__(norm_naming, norm) + self.__setattr__(act_naming, act) # Lightly adapted from diff --git a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py index 6e9573abb9..c319c7cd5e 100644 --- a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py +++ b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py @@ -5,6 +5,7 @@ from __future__ import annotations from typing import Optional +import copy import torch import torch.nn.functional as F @@ -80,15 +81,16 @@ class AIFI(Module): def __init__(self, embed_dim: int, num_heads: int, dim_feedforward: int, dropout: float = 0.0) -> None: super().__init__() self.self_attn = nn.MultiheadAttention(embed_dim, num_heads, dropout) # NOTE: batch_first = False - self.dropout1 = nn.Dropout(dropout) - self.norm1 = nn.LayerNorm(embed_dim) self.linear1 = nn.Linear(embed_dim, dim_feedforward) - self.act = nn.GELU() self.dropout = nn.Dropout(dropout) self.linear2 = nn.Linear(dim_feedforward, embed_dim) + + self.dropout1 = nn.Dropout(dropout) self.dropout2 = nn.Dropout(dropout) + self.norm1 = nn.LayerNorm(embed_dim) self.norm2 = nn.LayerNorm(embed_dim) + self.act = nn.GELU() def forward(self, x: Tensor) -> Tensor: # using post-norm @@ -149,6 +151,20 @@ def build_2d_sincos_pos_emb( return pos_emb.unsqueeze(1) # (H * W, 1, C) +class TransformerEncoder(nn.Module): + def __init__(self, encoder_layer: nn.Module, num_layers: int) -> None: + super(TransformerEncoder, self).__init__() + self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(num_layers)]) + self.num_layers = num_layers + + def forward(self, src: Tensor) -> Tensor: # NOTE: Missing src_mask: Tensor = None, pos_embed: Tensor = None + output = src + for layer in self.layers: + output = layer(output) + + return output + + class CCFM(Module): def __init__(self, num_fmaps: int, hidden_dim: int, expansion: float = 1.0) -> None: super().__init__() @@ -192,12 +208,17 @@ def forward(self, fmaps: list[Tensor]) -> list[Tensor]: class HybridEncoder(Module): def __init__(self, in_channels: list[int], hidden_dim: int, dim_feedforward: int, expansion: float = 1.0) -> None: super().__init__() - self.input_proj = nn.ModuleList([ConvNormAct(in_ch, hidden_dim, 1, act="none") for in_ch in in_channels]) - self.aifi = AIFI(hidden_dim, 8, dim_feedforward) + self.input_proj = nn.ModuleList([ + ConvNormAct( # To align the naming strategy for the official weights + in_ch, hidden_dim, 1, act="none", conv_naming="0", norm_naming="1", act_naming="2" + ) for in_ch in in_channels + ]) + encoder_layer = AIFI(hidden_dim, 8, dim_feedforward) + self.encoder = nn.Sequential(TransformerEncoder(encoder_layer, 1)) self.ccfm = CCFM(len(in_channels), hidden_dim, expansion) def forward(self, fmaps: list[Tensor]) -> list[Tensor]: projected_maps = [proj(fmap) for proj, fmap in zip(self.input_proj, fmaps)] - projected_maps[-1] = self.aifi(projected_maps[-1]) + projected_maps[-1] = self.encoder(projected_maps[-1]) new_fmaps = self.ccfm(projected_maps) return new_fmaps diff --git a/kornia/contrib/models/rt_detr/architecture/resnet_d.py b/kornia/contrib/models/rt_detr/architecture/resnet_d.py index 57fa171c82..65e3a7518b 100644 --- a/kornia/contrib/models/rt_detr/architecture/resnet_d.py +++ b/kornia/contrib/models/rt_detr/architecture/resnet_d.py @@ -5,6 +5,8 @@ """ from __future__ import annotations +from typing import List, Tuple +from collections import OrderedDict from torch import nn @@ -15,7 +17,10 @@ def _make_shortcut(in_channels: int, out_channels: int, stride: int) -> Module: return ( - nn.Sequential(nn.AvgPool2d(2, 2), ConvNormAct(in_channels, out_channels, 1, act="none")) + nn.Sequential(OrderedDict([ + ("pool", nn.AvgPool2d(2, 2)), + ("conv", ConvNormAct(in_channels, out_channels, 1, act="none")) + ])) if stride == 2 else ConvNormAct(in_channels, out_channels, 1, act="none") ) @@ -27,15 +32,15 @@ class BasicBlockD(Module): def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: bool) -> None: KORNIA_CHECK(stride in {1, 2}) super().__init__() - self.convs = nn.Sequential( - ConvNormAct(in_channels, out_channels, 3, stride=stride), - ConvNormAct(out_channels, out_channels, 3, act="none"), - ) - self.shortcut = nn.Identity() if shortcut else _make_shortcut(in_channels, out_channels, stride) + self.convs = nn.Sequential(OrderedDict([ + ("branch2a", ConvNormAct(in_channels, out_channels, 3, stride=stride)), + ("branch2b", ConvNormAct(out_channels, out_channels, 3, act="none")), + ])) + self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, out_channels, stride) self.relu = nn.ReLU(inplace=True) def forward(self, x: Tensor) -> Tensor: - return self.relu(self.convs(x) + self.shortcut(x)) + return self.relu(self.convs(x) + self.short(x)) class BottleneckD(Module): @@ -45,16 +50,22 @@ def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: b KORNIA_CHECK(stride in {1, 2}) super().__init__() expanded_out_channels = out_channels * self.expansion - self.convs = nn.Sequential( - ConvNormAct(in_channels, out_channels, 1), - ConvNormAct(out_channels, out_channels, 3, stride=stride), - ConvNormAct(out_channels, expanded_out_channels, 1, act="none"), - ) - self.shortcut = nn.Identity() if shortcut else _make_shortcut(in_channels, expanded_out_channels, stride) + self.convs = nn.Sequential(OrderedDict([ + ("branch2a", ConvNormAct(in_channels, out_channels, 1)), + ("branch2b", ConvNormAct(out_channels, out_channels, 3, stride=stride)), + ("branch2c", ConvNormAct(out_channels, expanded_out_channels, 1, act="none")), + ])) + self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, expanded_out_channels, stride) self.relu = nn.ReLU(inplace=True) def forward(self, x: Tensor) -> Tensor: - return self.relu(self.convs(x) + self.shortcut(x)) + return self.relu(self.convs(x) + self.short(x)) + + +class Block(nn.Sequential): + def __init__(self, blocks: Module) -> None: + super().__init__() + self.blocks = blocks class ResNetD(Module): @@ -62,36 +73,38 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD]) KORNIA_CHECK(len(n_blocks) == 4) super().__init__() in_channels = 64 - self.conv1 = nn.Sequential( - ConvNormAct(3, in_channels // 2, 3, stride=2), - ConvNormAct(in_channels // 2, in_channels // 2, 3), - ConvNormAct(in_channels // 2, in_channels, 3), - nn.MaxPool2d(3, stride=2, padding=1), - ) - - self.res2, in_channels = self.make_stage(in_channels, 64, 1, n_blocks[0], block) - self.res3, in_channels = self.make_stage(in_channels, 128, 2, n_blocks[1], block) - self.res4, in_channels = self.make_stage(in_channels, 256, 2, n_blocks[2], block) - self.res5, in_channels = self.make_stage(in_channels, 512, 2, n_blocks[3], block) + self.conv1 = nn.Sequential(OrderedDict([ + ("conv1_1", ConvNormAct(3, in_channels // 2, 3, stride=2)), + ("conv1_2", ConvNormAct(in_channels // 2, in_channels // 2, 3)), + ("conv1_3", ConvNormAct(in_channels // 2, in_channels, 3)), + ("pool", nn.MaxPool2d(3, stride=2, padding=1)), + ])) + + res2, in_channels = self.make_stage(in_channels, 64, 1, n_blocks[0], block) + res3, in_channels = self.make_stage(in_channels, 128, 2, n_blocks[1], block) + res4, in_channels = self.make_stage(in_channels, 256, 2, n_blocks[2], block) + res5, in_channels = self.make_stage(in_channels, 512, 2, n_blocks[3], block) + + self.res_layers = nn.ModuleList([res2, res3, res4, res5]) self.out_channels = [ch * block.expansion for ch in [128, 256, 512]] @staticmethod def make_stage( in_channels: int, out_channels: int, stride: int, n_blocks: int, block: type[BasicBlockD | BottleneckD] - ) -> tuple[Module, int]: - stage = nn.Sequential( + ) -> Tuple[Module, int]: + stage = Block(nn.Sequential( block(in_channels, out_channels, stride, False), *[block(out_channels * block.expansion, out_channels, 1, True) for _ in range(n_blocks - 1)], - ) + )) return stage, out_channels * block.expansion - def forward(self, x: Tensor) -> list[Tensor]: + def forward(self, x: Tensor) -> List[Tensor]: x = self.conv1(x) - res2 = self.res2(x) - res3 = self.res3(res2) - res4 = self.res4(res3) - res5 = self.res5(res4) + res2 = self.res_layers[0](x) + res3 = self.res_layers[1](res2) + res4 = self.res_layers[2](res3) + res5 = self.res_layers[3](res4) return [res3, res4, res5] @staticmethod diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py index b75e56eca6..4b4dca4f5a 100644 --- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py +++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py @@ -4,6 +4,7 @@ from __future__ import annotations from typing import Optional +import copy import torch from torch import nn @@ -192,14 +193,14 @@ def forward( return out -class TransformerDecoder: - def __init__(self, hidden_dim: int, decoder_layers: nn.ModuleList, num_layers: int, eval_idx: int = -1) -> None: +class TransformerDecoder(Module): + def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1) -> None: super().__init__() - self.layers = decoder_layers + # self.layers = decoder_layers # TODO: come back to this later - # self.layers = nn.ModuleList([ - # copy.deepcopy(decoder_layer) for _ in range(num_layers) - # ]) + self.layers = nn.ModuleList([ + copy.deepcopy(decoder_layer) for _ in range(num_layers) + ]) self.hidden_dim = hidden_dim self.num_layers = num_layers self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx @@ -274,6 +275,7 @@ def __init__( num_decoder_points: int = 4, # num_levels: int = 3, dropout: float = 0.0, + num_denoising: int = 100, ) -> None: super().__init__() self.num_queries = num_queries @@ -288,25 +290,21 @@ def __init__( # https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetr_pytorch/src/zoo/rtdetr/rtdetr_decoder.py#L403-L410 # NOTE: need to be integrated with the TransformerDecoderLayer - self.decoder_layers = nn.ModuleList( - [ - TransformerDecoderLayer( - embed_dim=hidden_dim, - num_heads=num_heads, - dropout=dropout, - num_levels=len(in_channels), - num_points=num_decoder_points, - ) - for _ in range(num_decoder_layers) - ] + decoder_layer = TransformerDecoderLayer( + embed_dim=hidden_dim, + num_heads=num_heads, + dropout=dropout, + num_levels=len(in_channels), + num_points=num_decoder_points, ) self.decoder = TransformerDecoder( - hidden_dim=hidden_dim, decoder_layers=self.decoder_layers, num_layers=num_decoder_layers + hidden_dim=hidden_dim, decoder_layer=decoder_layer, num_layers=num_decoder_layers ) # denoising part - self.denoising_class_embed = nn.Embedding(num_classes, hidden_dim) # not used in evaluation + if num_denoising > 0: + self.denoising_class_embed = nn.Embedding(num_classes + 1, hidden_dim, padding_idx=num_classes) # not used in evaluation # decoder embedding self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, num_layers=2) @@ -334,7 +332,7 @@ def forward(self, feats: Tensor) -> tuple[Tensor, Tensor]: ) # decoder - out_bboxes, out_logits = self.decoder.forward( + out_bboxes, out_logits = self.decoder( target, init_ref_points_unact, memory, diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 1f250c77aa..dc167af218 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -4,8 +4,10 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional +from typing import Optional, Dict +import re +import torch from kornia.contrib.models.base import ModelBase from kornia.contrib.models.rt_detr.architecture.hgnetv2 import PPHGNetV2 from kornia.contrib.models.rt_detr.architecture.hybrid_encoder import HybridEncoder @@ -14,6 +16,15 @@ from kornia.core import Tensor +URLs = { + 'rtdetr_r18vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth', + 'rtdetr_r34vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth', + 'rtdetr_r50vd_m': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth', + 'rtdetr_r50vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth', + 'rtdetr_r101vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth', +} + + class RTDETRModelType(Enum): """Enum class that maps RT-DETR model type.""" @@ -65,7 +76,7 @@ class RTDETRConfig: class RTDETR(ModelBase[RTDETRConfig]): """RT-DETR Object Detection model, as described in https://arxiv.org/abs/2304.08069.""" - def __init__(self, backbone: ResNetD | PPHGNetV2, neck: HybridEncoder, head: RTDETRHead): + def __init__(self, backbone: ResNetD | PPHGNetV2, encoder: HybridEncoder, decoder: RTDETRHead): """Construct RT-DETR Object Detection model. Args: @@ -75,8 +86,8 @@ def __init__(self, backbone: ResNetD | PPHGNetV2, neck: HybridEncoder, head: RTD """ super().__init__() self.backbone = backbone - self.neck = neck - self.head = head + self.encoder = encoder + self.decoder = decoder @staticmethod def from_config(config: RTDETRConfig) -> RTDETR: @@ -156,6 +167,62 @@ def from_config(config: RTDETRConfig) -> RTDETR: model.load_checkpoint(config.checkpoint) return model + def from_pretrained(model_name: str) -> RTDETR: + """Load model from pretrained weights. + + Args: + model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'. + """ + + state_dict = torch.hub.load_state_dict_from_url( + URLs[model_name], + map_location="cuda:0" if torch.cuda.is_available() else "cpu" + ) + + def map_name(old_name: str) -> str: + # Start with the old name + new_name = old_name + + new_name = re.sub('encoder.pan_blocks', 'encoder.ccfm.pan_blocks', new_name) + new_name = re.sub('encoder.downsample_convs', 'encoder.ccfm.downsample_convs', new_name) + new_name = re.sub('encoder.fpn_blocks', 'encoder.ccfm.fpn_blocks', new_name) + new_name = re.sub('encoder.lateral_convs', 'encoder.ccfm.lateral_convs', new_name) + + # Backbone renaming + new_name = re.sub(f'.branch2b.', '.convs.branch2b.', new_name) + new_name = re.sub(f'.branch2a.', '.convs.branch2a.', new_name) + new_name = re.sub(f'.branch2c.', '.convs.branch2c.', new_name) + + return new_name + + def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]: + + state_dict = state_dict["ema"]["module"] + new_state_dict = {} + + # Apply the regex-based mapping function to each key + for old_name in state_dict.keys(): + new_name = map_name(old_name) + new_state_dict[new_name] = state_dict[old_name] + + return new_state_dict + + if model_name == "rtdetr_r18vd": + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet18d, 80)) + elif model_name == "rtdetr_r34vd": + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, 80)) + elif model_name == "rtdetr_r50vd_m": + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80)) + elif model_name == "rtdetr_r50vd": + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80)) + elif model_name == "rtdetr_r101vd": + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, 80)) + else: + raise ValueError + + model.load_state_dict(_state_dict_proc(state_dict)) + return model + def forward(self, images: Tensor) -> tuple[Tensor, Tensor]: """Detect objects in an image. @@ -167,10 +234,10 @@ def forward(self, images: Tensor) -> tuple[Tensor, Tensor]: :math:`K` is the number of classes. - **boxes** - Tensor of shape :math:`(N, Q, 4)`, where :math:`Q` is the number of queries. """ - if self.training: - raise RuntimeError("Only evaluation mode is supported. Please call model.eval().") + # if self.training: + # raise RuntimeError("Only evaluation mode is supported. Please call model.eval().") feats = self.backbone(images) - feats_buf = self.neck(feats) - logits, boxes = self.head(feats_buf) + feats_buf = self.encoder(feats) + logits, boxes = self.decoder(feats_buf) return logits, boxes From 6522333612d9ca4e6dcbe0050cd28174b71a4468 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:50:53 +0000 Subject: [PATCH 02/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/models/common.py | 13 +++- .../rt_detr/architecture/hybrid_encoder.py | 15 +++-- .../models/rt_detr/architecture/resnet_d.py | 62 ++++++++++++------- .../rt_detr/architecture/rtdetr_head.py | 10 +-- kornia/contrib/models/rt_detr/model.py | 34 +++++----- 5 files changed, 78 insertions(+), 56 deletions(-) diff --git a/kornia/contrib/models/common.py b/kornia/contrib/models/common.py index 375dec4b43..fcb16ae61a 100644 --- a/kornia/contrib/models/common.py +++ b/kornia/contrib/models/common.py @@ -7,11 +7,18 @@ from kornia.core import Module, Tensor, pad - class ConvNormAct(nn.Sequential): def __init__( - self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, act: str = "relu", groups: int = 1, - conv_naming: str = "conv", norm_naming: str = "norm", act_naming: str = "act" + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + act: str = "relu", + groups: int = 1, + conv_naming: str = "conv", + norm_naming: str = "norm", + act_naming: str = "act", ) -> None: super().__init__() if kernel_size % 2 == 0: diff --git a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py index c319c7cd5e..6d25bcf53f 100644 --- a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py +++ b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py @@ -4,8 +4,8 @@ from __future__ import annotations -from typing import Optional import copy +from typing import Optional import torch import torch.nn.functional as F @@ -208,11 +208,14 @@ def forward(self, fmaps: list[Tensor]) -> list[Tensor]: class HybridEncoder(Module): def __init__(self, in_channels: list[int], hidden_dim: int, dim_feedforward: int, expansion: float = 1.0) -> None: super().__init__() - self.input_proj = nn.ModuleList([ - ConvNormAct( # To align the naming strategy for the official weights - in_ch, hidden_dim, 1, act="none", conv_naming="0", norm_naming="1", act_naming="2" - ) for in_ch in in_channels - ]) + self.input_proj = nn.ModuleList( + [ + ConvNormAct( # To align the naming strategy for the official weights + in_ch, hidden_dim, 1, act="none", conv_naming="0", norm_naming="1", act_naming="2" + ) + for in_ch in in_channels + ] + ) encoder_layer = AIFI(hidden_dim, 8, dim_feedforward) self.encoder = nn.Sequential(TransformerEncoder(encoder_layer, 1)) self.ccfm = CCFM(len(in_channels), hidden_dim, expansion) diff --git a/kornia/contrib/models/rt_detr/architecture/resnet_d.py b/kornia/contrib/models/rt_detr/architecture/resnet_d.py index 65e3a7518b..3453ae2a7c 100644 --- a/kornia/contrib/models/rt_detr/architecture/resnet_d.py +++ b/kornia/contrib/models/rt_detr/architecture/resnet_d.py @@ -5,8 +5,9 @@ """ from __future__ import annotations -from typing import List, Tuple + from collections import OrderedDict +from typing import List, Tuple from torch import nn @@ -17,10 +18,9 @@ def _make_shortcut(in_channels: int, out_channels: int, stride: int) -> Module: return ( - nn.Sequential(OrderedDict([ - ("pool", nn.AvgPool2d(2, 2)), - ("conv", ConvNormAct(in_channels, out_channels, 1, act="none")) - ])) + nn.Sequential( + OrderedDict([("pool", nn.AvgPool2d(2, 2)), ("conv", ConvNormAct(in_channels, out_channels, 1, act="none"))]) + ) if stride == 2 else ConvNormAct(in_channels, out_channels, 1, act="none") ) @@ -32,10 +32,14 @@ class BasicBlockD(Module): def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: bool) -> None: KORNIA_CHECK(stride in {1, 2}) super().__init__() - self.convs = nn.Sequential(OrderedDict([ - ("branch2a", ConvNormAct(in_channels, out_channels, 3, stride=stride)), - ("branch2b", ConvNormAct(out_channels, out_channels, 3, act="none")), - ])) + self.convs = nn.Sequential( + OrderedDict( + [ + ("branch2a", ConvNormAct(in_channels, out_channels, 3, stride=stride)), + ("branch2b", ConvNormAct(out_channels, out_channels, 3, act="none")), + ] + ) + ) self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, out_channels, stride) self.relu = nn.ReLU(inplace=True) @@ -50,11 +54,15 @@ def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: b KORNIA_CHECK(stride in {1, 2}) super().__init__() expanded_out_channels = out_channels * self.expansion - self.convs = nn.Sequential(OrderedDict([ - ("branch2a", ConvNormAct(in_channels, out_channels, 1)), - ("branch2b", ConvNormAct(out_channels, out_channels, 3, stride=stride)), - ("branch2c", ConvNormAct(out_channels, expanded_out_channels, 1, act="none")), - ])) + self.convs = nn.Sequential( + OrderedDict( + [ + ("branch2a", ConvNormAct(in_channels, out_channels, 1)), + ("branch2b", ConvNormAct(out_channels, out_channels, 3, stride=stride)), + ("branch2c", ConvNormAct(out_channels, expanded_out_channels, 1, act="none")), + ] + ) + ) self.short = nn.Identity() if shortcut else _make_shortcut(in_channels, expanded_out_channels, stride) self.relu = nn.ReLU(inplace=True) @@ -73,12 +81,16 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD]) KORNIA_CHECK(len(n_blocks) == 4) super().__init__() in_channels = 64 - self.conv1 = nn.Sequential(OrderedDict([ - ("conv1_1", ConvNormAct(3, in_channels // 2, 3, stride=2)), - ("conv1_2", ConvNormAct(in_channels // 2, in_channels // 2, 3)), - ("conv1_3", ConvNormAct(in_channels // 2, in_channels, 3)), - ("pool", nn.MaxPool2d(3, stride=2, padding=1)), - ])) + self.conv1 = nn.Sequential( + OrderedDict( + [ + ("conv1_1", ConvNormAct(3, in_channels // 2, 3, stride=2)), + ("conv1_2", ConvNormAct(in_channels // 2, in_channels // 2, 3)), + ("conv1_3", ConvNormAct(in_channels // 2, in_channels, 3)), + ("pool", nn.MaxPool2d(3, stride=2, padding=1)), + ] + ) + ) res2, in_channels = self.make_stage(in_channels, 64, 1, n_blocks[0], block) res3, in_channels = self.make_stage(in_channels, 128, 2, n_blocks[1], block) @@ -93,10 +105,12 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD]) def make_stage( in_channels: int, out_channels: int, stride: int, n_blocks: int, block: type[BasicBlockD | BottleneckD] ) -> Tuple[Module, int]: - stage = Block(nn.Sequential( - block(in_channels, out_channels, stride, False), - *[block(out_channels * block.expansion, out_channels, 1, True) for _ in range(n_blocks - 1)], - )) + stage = Block( + nn.Sequential( + block(in_channels, out_channels, stride, False), + *[block(out_channels * block.expansion, out_channels, 1, True) for _ in range(n_blocks - 1)], + ) + ) return stage, out_channels * block.expansion def forward(self, x: Tensor) -> List[Tensor]: diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py index 4b4dca4f5a..17fec6b7c7 100644 --- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py +++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py @@ -3,8 +3,8 @@ from __future__ import annotations -from typing import Optional import copy +from typing import Optional import torch from torch import nn @@ -198,9 +198,7 @@ def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, e super().__init__() # self.layers = decoder_layers # TODO: come back to this later - self.layers = nn.ModuleList([ - copy.deepcopy(decoder_layer) for _ in range(num_layers) - ]) + self.layers = nn.ModuleList([copy.deepcopy(decoder_layer) for _ in range(num_layers)]) self.hidden_dim = hidden_dim self.num_layers = num_layers self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx @@ -304,7 +302,9 @@ def __init__( # denoising part if num_denoising > 0: - self.denoising_class_embed = nn.Embedding(num_classes + 1, hidden_dim, padding_idx=num_classes) # not used in evaluation + self.denoising_class_embed = nn.Embedding( + num_classes + 1, hidden_dim, padding_idx=num_classes + ) # not used in evaluation # decoder embedding self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, num_layers=2) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index dc167af218..49a9c510bd 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -2,12 +2,13 @@ from __future__ import annotations +import re from dataclasses import dataclass from enum import Enum -from typing import Optional, Dict -import re +from typing import Dict, Optional import torch + from kornia.contrib.models.base import ModelBase from kornia.contrib.models.rt_detr.architecture.hgnetv2 import PPHGNetV2 from kornia.contrib.models.rt_detr.architecture.hybrid_encoder import HybridEncoder @@ -15,13 +16,12 @@ from kornia.contrib.models.rt_detr.architecture.rtdetr_head import RTDETRHead from kornia.core import Tensor - URLs = { - 'rtdetr_r18vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth', - 'rtdetr_r34vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth', - 'rtdetr_r50vd_m': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth', - 'rtdetr_r50vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth', - 'rtdetr_r101vd': 'https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth', + "rtdetr_r18vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth", + "rtdetr_r34vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth", + "rtdetr_r50vd_m": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth", + "rtdetr_r50vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth", + "rtdetr_r101vd": "https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth", } @@ -175,28 +175,26 @@ def from_pretrained(model_name: str) -> RTDETR: """ state_dict = torch.hub.load_state_dict_from_url( - URLs[model_name], - map_location="cuda:0" if torch.cuda.is_available() else "cpu" + URLs[model_name], map_location="cuda:0" if torch.cuda.is_available() else "cpu" ) def map_name(old_name: str) -> str: # Start with the old name new_name = old_name - new_name = re.sub('encoder.pan_blocks', 'encoder.ccfm.pan_blocks', new_name) - new_name = re.sub('encoder.downsample_convs', 'encoder.ccfm.downsample_convs', new_name) - new_name = re.sub('encoder.fpn_blocks', 'encoder.ccfm.fpn_blocks', new_name) - new_name = re.sub('encoder.lateral_convs', 'encoder.ccfm.lateral_convs', new_name) + new_name = re.sub("encoder.pan_blocks", "encoder.ccfm.pan_blocks", new_name) + new_name = re.sub("encoder.downsample_convs", "encoder.ccfm.downsample_convs", new_name) + new_name = re.sub("encoder.fpn_blocks", "encoder.ccfm.fpn_blocks", new_name) + new_name = re.sub("encoder.lateral_convs", "encoder.ccfm.lateral_convs", new_name) # Backbone renaming - new_name = re.sub(f'.branch2b.', '.convs.branch2b.', new_name) - new_name = re.sub(f'.branch2a.', '.convs.branch2a.', new_name) - new_name = re.sub(f'.branch2c.', '.convs.branch2c.', new_name) + new_name = re.sub(".branch2b.", ".convs.branch2b.", new_name) + new_name = re.sub(".branch2a.", ".convs.branch2a.", new_name) + new_name = re.sub(".branch2c.", ".convs.branch2c.", new_name) return new_name def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]: - state_dict = state_dict["ema"]["module"] new_state_dict = {} From 095a2685e9b8506925dc5001aa4060e079c86436 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Wed, 4 Sep 2024 15:07:32 +0300 Subject: [PATCH 03/46] update --- kornia/contrib/models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kornia/contrib/models/common.py b/kornia/contrib/models/common.py index fcb16ae61a..b46049f93f 100644 --- a/kornia/contrib/models/common.py +++ b/kornia/contrib/models/common.py @@ -34,11 +34,11 @@ def __init__( padding = (kernel_size - 1) // 2 conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, 1, groups, False) norm = nn.BatchNorm2d(out_channels) - act = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True) + activation = {"relu": nn.ReLU, "silu": nn.SiLU, "none": nn.Identity}[act](inplace=True) self.__setattr__(conv_naming, conv) self.__setattr__(norm_naming, norm) - self.__setattr__(act_naming, act) + self.__setattr__(act_naming, activation) # Lightly adapted from From bdeee27a3fdf60919586384dfa79877b00b5056b Mon Sep 17 00:00:00 2001 From: shijianjian Date: Wed, 4 Sep 2024 15:11:05 +0300 Subject: [PATCH 04/46] update --- kornia/contrib/models/rt_detr/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 49a9c510bd..6c6653dd0a 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -195,7 +195,7 @@ def map_name(old_name: str) -> str: return new_name def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]: - state_dict = state_dict["ema"]["module"] + state_dict = state_dict["ema"]["module"] # type:ignore new_state_dict = {} # Apply the regex-based mapping function to each key From d7da930738e52eed860d703ce34f06c21c19604b Mon Sep 17 00:00:00 2001 From: shijianjian Date: Wed, 4 Sep 2024 15:14:39 +0300 Subject: [PATCH 05/46] update --- kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py | 2 +- kornia/contrib/models/rt_detr/architecture/resnet_d.py | 5 ++--- kornia/contrib/models/rt_detr/model.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py index 6d25bcf53f..5e2a8cc98e 100644 --- a/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py +++ b/kornia/contrib/models/rt_detr/architecture/hybrid_encoder.py @@ -153,7 +153,7 @@ def build_2d_sincos_pos_emb( class TransformerEncoder(nn.Module): def __init__(self, encoder_layer: nn.Module, num_layers: int) -> None: - super(TransformerEncoder, self).__init__() + super().__init__() self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(num_layers)]) self.num_layers = num_layers diff --git a/kornia/contrib/models/rt_detr/architecture/resnet_d.py b/kornia/contrib/models/rt_detr/architecture/resnet_d.py index 3453ae2a7c..4432b38ac3 100644 --- a/kornia/contrib/models/rt_detr/architecture/resnet_d.py +++ b/kornia/contrib/models/rt_detr/architecture/resnet_d.py @@ -7,7 +7,6 @@ from __future__ import annotations from collections import OrderedDict -from typing import List, Tuple from torch import nn @@ -104,7 +103,7 @@ def __init__(self, n_blocks: list[int], block: type[BasicBlockD | BottleneckD]) @staticmethod def make_stage( in_channels: int, out_channels: int, stride: int, n_blocks: int, block: type[BasicBlockD | BottleneckD] - ) -> Tuple[Module, int]: + ) -> tuple[Module, int]: stage = Block( nn.Sequential( block(in_channels, out_channels, stride, False), @@ -113,7 +112,7 @@ def make_stage( ) return stage, out_channels * block.expansion - def forward(self, x: Tensor) -> List[Tensor]: + def forward(self, x: Tensor) -> list[Tensor]: x = self.conv1(x) res2 = self.res_layers[0](x) res3 = self.res_layers[1](res2) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 6c6653dd0a..67cc495520 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -5,7 +5,7 @@ import re from dataclasses import dataclass from enum import Enum -from typing import Dict, Optional +from typing import Optional import torch @@ -194,7 +194,7 @@ def map_name(old_name: str) -> str: return new_name - def _state_dict_proc(state_dict: Dict[str, Tensor]) -> Dict[str, Tensor]: + def _state_dict_proc(state_dict: dict[str, Tensor]) -> dict[str, Tensor]: state_dict = state_dict["ema"]["module"] # type:ignore new_state_dict = {} From 33120ab232ae0772eae3a1303a757113bfdaa117 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Wed, 4 Sep 2024 15:17:17 +0300 Subject: [PATCH 06/46] update --- kornia/contrib/models/rt_detr/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 67cc495520..c7ee40ec63 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -167,6 +167,7 @@ def from_config(config: RTDETRConfig) -> RTDETR: model.load_checkpoint(config.checkpoint) return model + @staticmethod def from_pretrained(model_name: str) -> RTDETR: """Load model from pretrained weights. From a5ded631c7803b9cfb89f8cc02f61d5644ce9629 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Wed, 4 Sep 2024 15:24:58 +0300 Subject: [PATCH 07/46] update --- kornia/contrib/models/rt_detr/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index c7ee40ec63..0bc2cfad40 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -180,9 +180,9 @@ def from_pretrained(model_name: str) -> RTDETR: ) def map_name(old_name: str) -> str: - # Start with the old name new_name = old_name + # Encoder renaming new_name = re.sub("encoder.pan_blocks", "encoder.ccfm.pan_blocks", new_name) new_name = re.sub("encoder.downsample_convs", "encoder.ccfm.downsample_convs", new_name) new_name = re.sub("encoder.fpn_blocks", "encoder.ccfm.fpn_blocks", new_name) From e8019f01e93161ae55a84fed8db7b6cf356775a3 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Wed, 4 Sep 2024 17:50:05 +0300 Subject: [PATCH 08/46] update --- kornia/contrib/models/rt_detr/post_processor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index d5fce83870..a189f4ebd0 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -4,7 +4,6 @@ import torch from kornia.core import Module, Tensor, concatenate -from kornia.image.base import ImageSize class DETRPostProcessor(Module): @@ -12,7 +11,7 @@ def __init__(self, confidence_threshold: float) -> None: super().__init__() self.confidence_threshold = confidence_threshold - def forward(self, logits: Tensor, boxes: Tensor, original_sizes: list[ImageSize]) -> list[Tensor]: + def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list[Tensor]: """Post-process outputs from DETR. Args: @@ -20,7 +19,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: list[ImageSize] queries, :math:`K` is the number of classes. boxes: tensor with shape :math:`(N, Q, 4)`, where :math:`N` is the batch size, :math:`Q` is the number of queries. - original_sizes: list of tuples, each tuple represent (img_height, img_width). + original_sizes: tensor with shape :math:`(N, 2)`, where :math:`N` is the batch size and each element + represents the image size of (img_height, img_width). Returns: Processed detections. For each image, the detections have shape (D, 6), where D is the number of detections @@ -38,8 +38,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: list[ImageSize] boxes_xy = concatenate([cxcy - wh * 0.5, wh], -1) sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype) - sizes_wh[..., 0] = original_sizes[0].width - sizes_wh[..., 1] = original_sizes[0].height + sizes_wh[..., 0] = original_sizes[0][0] + sizes_wh[..., 1] = original_sizes[0][0] sizes_wh = sizes_wh.repeat(1, 1, 2) boxes_xy = boxes_xy * sizes_wh From 85715a6eeed187cdac97bd2a1edbf30307de310d Mon Sep 17 00:00:00 2001 From: shijianjian Date: Thu, 5 Sep 2024 12:30:54 +0300 Subject: [PATCH 09/46] update --- kornia/contrib/object_detection.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index ad1849378f..a148a4c210 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -6,9 +6,8 @@ import torch -from kornia.core import Module, Tensor, concatenate +from kornia.core import Module, Tensor, as_tensor, concatenate from kornia.core.check import KORNIA_CHECK_SHAPE -from kornia.image.base import ImageSize __all__ = [ "BoundingBoxDataFormat", @@ -113,18 +112,18 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear") self.size = size self.interpolation_mode = interpolation_mode - def forward(self, imgs: list[Tensor]) -> tuple[Tensor, list[ImageSize]]: + def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]: # TODO: support other input formats e.g. file path, numpy resized_imgs, original_sizes = [], [] for i in range(len(imgs)): img = imgs[i] # NOTE: assume that image layout is CHW - original_sizes.append(ImageSize(height=img.shape[1], width=img.shape[2])) + original_sizes.append([img.shape[1], img.shape[2]]) resized_imgs.append( # TODO: fix kornia resize to support onnx torch.nn.functional.interpolate(img.unsqueeze(0), size=self.size, mode=self.interpolation_mode) ) - return concatenate(resized_imgs), original_sizes + return concatenate(resized_imgs), as_tensor(original_sizes) # TODO: move this to kornia.models as AlgorithmicModel api From 05106d35c913998abdd3c4d0e0451d2d5a3237ca Mon Sep 17 00:00:00 2001 From: shijianjian Date: Thu, 5 Sep 2024 21:08:49 +0300 Subject: [PATCH 10/46] update --- .../contrib/models/rt_detr/architecture/rtdetr_head.py | 9 ++++----- kornia/contrib/models/rt_detr/model.py | 2 -- kornia/contrib/models/rt_detr/post_processor.py | 2 +- kornia/contrib/object_detection.py | 7 ++++++- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py index 17fec6b7c7..23a343c18b 100644 --- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py +++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py @@ -196,8 +196,6 @@ def forward( class TransformerDecoder(Module): def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1) -> None: super().__init__() - # self.layers = decoder_layers - # TODO: come back to this later self.layers = nn.ModuleList([copy.deepcopy(decoder_layer) for _ in range(num_layers)]) self.hidden_dim = hidden_dim self.num_layers = num_layers @@ -271,14 +269,15 @@ def __init__( num_decoder_layers: int, num_heads: int = 8, num_decoder_points: int = 4, - # num_levels: int = 3, + num_levels: int = 3, dropout: float = 0.0, num_denoising: int = 100, ) -> None: super().__init__() self.num_queries = num_queries # TODO: verify this is correct - self.num_levels = len(in_channels) + assert len(in_channels) <= num_levels + self.num_levels = num_levels # build the input projection layers self.input_proj = nn.ModuleList() @@ -292,7 +291,7 @@ def __init__( embed_dim=hidden_dim, num_heads=num_heads, dropout=dropout, - num_levels=len(in_channels), + num_levels=self.num_levels, num_points=num_decoder_points, ) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 0bc2cfad40..1239702043 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -233,8 +233,6 @@ def forward(self, images: Tensor) -> tuple[Tensor, Tensor]: :math:`K` is the number of classes. - **boxes** - Tensor of shape :math:`(N, Q, 4)`, where :math:`Q` is the number of queries. """ - # if self.training: - # raise RuntimeError("Only evaluation mode is supported. Please call model.eval().") feats = self.backbone(images) feats_buf = self.encoder(feats) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index a189f4ebd0..fb73e17e56 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -39,7 +39,7 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype) sizes_wh[..., 0] = original_sizes[0][0] - sizes_wh[..., 1] = original_sizes[0][0] + sizes_wh[..., 1] = original_sizes[0][1] sizes_wh = sizes_wh.repeat(1, 1, 2) boxes_xy = boxes_xy * sizes_wh diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index a148a4c210..35c9beea64 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -113,12 +113,17 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear") self.interpolation_mode = interpolation_mode def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]: + """ + Returns: + resized_imgs: resized images in a batch. + original_sizes: the original image sizes of (height, width). + """ # TODO: support other input formats e.g. file path, numpy resized_imgs, original_sizes = [], [] for i in range(len(imgs)): img = imgs[i] # NOTE: assume that image layout is CHW - original_sizes.append([img.shape[1], img.shape[2]]) + original_sizes.append([img.shape[-2], img.shape[-1]]) resized_imgs.append( # TODO: fix kornia resize to support onnx torch.nn.functional.interpolate(img.unsqueeze(0), size=self.size, mode=self.interpolation_mode) From 2dcba2c1e8cd6015afb7b2d3e0ea57362a50b6e5 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Thu, 5 Sep 2024 21:16:01 +0300 Subject: [PATCH 11/46] update --- kornia/contrib/models/rt_detr/architecture/rtdetr_head.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py index 23a343c18b..6345577e5a 100644 --- a/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py +++ b/kornia/contrib/models/rt_detr/architecture/rtdetr_head.py @@ -276,7 +276,8 @@ def __init__( super().__init__() self.num_queries = num_queries # TODO: verify this is correct - assert len(in_channels) <= num_levels + if len(in_channels) > num_levels: + raise ValueError(f"`num_levels` cannot be greater than {len(in_channels)}. Got {num_levels}.") self.num_levels = num_levels # build the input projection layers From 83fa545c5bbe45f49fc71bf4ed8d1f763569352a Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 11:40:32 +0300 Subject: [PATCH 12/46] update --- kornia/contrib/models/rt_detr/model.py | 29 ++++++++++++++----- kornia/models/__init__.py | 0 kornia/models/detector/__init__.py | 0 kornia/models/detector/rtdetr.py | 39 ++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 kornia/models/__init__.py create mode 100644 kornia/models/detector/__init__.py create mode 100644 kornia/models/detector/rtdetr.py diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 1239702043..3692788ed1 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -175,6 +175,9 @@ def from_pretrained(model_name: str) -> RTDETR: model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'. """ + if model_name not in URLs: + raise ValueError(f"No pretrained model for '{model_name}'. Please select from {list(URLs.keys())}.") + state_dict = torch.hub.load_state_dict_from_url( URLs[model_name], map_location="cuda:0" if torch.cuda.is_available() else "cpu" ) @@ -206,20 +209,32 @@ def _state_dict_proc(state_dict: dict[str, Tensor]) -> dict[str, Tensor]: return new_state_dict + model = RTDETR.from_name(model_name, num_classes=80) + + model.load_state_dict(_state_dict_proc(state_dict)) + return model + + @staticmethod + def from_name(model_name: str, num_classes: int = 80) -> RTDETR: + """Load model without pretrained weights. + + Args: + model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'. + """ + if model_name == "rtdetr_r18vd": - model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet18d, 80)) + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet18d, num_classes)) elif model_name == "rtdetr_r34vd": - model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, 80)) + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, num_classes)) elif model_name == "rtdetr_r50vd_m": - model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80)) + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes)) elif model_name == "rtdetr_r50vd": - model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, 80)) + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes)) elif model_name == "rtdetr_r101vd": - model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, 80)) + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, num_classes)) else: raise ValueError - - model.load_state_dict(_state_dict_proc(state_dict)) + return model def forward(self, images: Tensor) -> tuple[Tensor, Tensor]: diff --git a/kornia/models/__init__.py b/kornia/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kornia/models/detector/__init__.py b/kornia/models/detector/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py new file mode 100644 index 0000000000..6630c4632e --- /dev/null +++ b/kornia/models/detector/rtdetr.py @@ -0,0 +1,39 @@ +from typing import Optional +import warnings + +from kornia.core import Module +from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig +from kornia.contrib.models.rt_detr import DETRPostProcessor +from kornia.contrib.object_detection import ResizePreProcessor, ObjectDetector + + +class RTDETRDetectorBuilder: + + @staticmethod + def build( + model_name: Optional[str] = None, + config: Optional[RTDETRConfig] = None, + pretrained: bool = True, + image_size: int = 640, + confidence_threshold: float = 0.5 + ) -> ObjectDetector: + if (model_name is not None and config is not None): + raise ValueError("Either `model_name` or `config` should be `None`.") + + if model_name is None and config is None: + warnings.warn("No `model_name` or `config` found. Will build `rtdetr_r18vd`.") + model_name = "rtdetr_r18vd" + + if config is not None: + model = RTDETR.from_config(config) + else: + if pretrained: + model = RTDETR.from_pretrained(model_name) + else: + model = RTDETR.from_name(model_name) + + return ObjectDetector( + model, + ResizePreProcessor(image_size), + DETRPostProcessor(confidence_threshold) + ) From 023133a1062cda0f8ad1ae6c14666840e44b43ed Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 11:47:21 +0300 Subject: [PATCH 13/46] update --- kornia/contrib/object_detection.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 35c9beea64..4c90d9c6bb 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -8,6 +8,7 @@ from kornia.core import Module, Tensor, as_tensor, concatenate from kornia.core.check import KORNIA_CHECK_SHAPE +from kornia.utils.draw import draw_rectangle __all__ = [ "BoundingBoxDataFormat", @@ -164,6 +165,21 @@ def forward(self, images: list[Tensor]) -> list[Tensor]: detections = self.post_processor(logits, boxes, images_sizes) return detections + def draw(self, images: list[Tensor]) -> list[Tensor]: + """Very simple drawing. Needs to be more fancy later. + """ + detections = self.forward(images) + output = [] + for image, detection in zip(images, detections): + out_img = image.clone() + for out in detection: + out_img = draw_rectangle( + out_img, + torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]) + ) + output.append(out_img) + return output + def compile( self, *, From b69d535b17191e029f4a98a40846c1e220465146 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 11:54:56 +0300 Subject: [PATCH 14/46] update --- kornia/contrib/models/rt_detr/post_processor.py | 4 ++-- kornia/contrib/object_detection.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index fb73e17e56..95ad6109c7 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -38,8 +38,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list boxes_xy = concatenate([cxcy - wh * 0.5, wh], -1) sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype) - sizes_wh[..., 0] = original_sizes[0][0] - sizes_wh[..., 1] = original_sizes[0][1] + sizes_wh[..., 0] = original_sizes[0][1] + sizes_wh[..., 1] = original_sizes[0][0] sizes_wh = sizes_wh.repeat(1, 1, 2) boxes_xy = boxes_xy * sizes_wh diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 4c90d9c6bb..688eee98d1 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -171,7 +171,7 @@ def draw(self, images: list[Tensor]) -> list[Tensor]: detections = self.forward(images) output = [] for image, detection in zip(images, detections): - out_img = image.clone() + out_img = image[None].clone() for out in detection: out_img = draw_rectangle( out_img, From a9412fc0782890ede16f8385c661f86e293a6fa6 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 12:01:52 +0300 Subject: [PATCH 15/46] update --- kornia/contrib/object_detection.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 688eee98d1..121593c6ca 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -8,6 +8,8 @@ from kornia.core import Module, Tensor, as_tensor, concatenate from kornia.core.check import KORNIA_CHECK_SHAPE +from kornia.core.external import PILImage as Image +from kornia.core.external import numpy as np from kornia.utils.draw import draw_rectangle __all__ = [ @@ -165,7 +167,7 @@ def forward(self, images: list[Tensor]) -> list[Tensor]: detections = self.post_processor(logits, boxes, images_sizes) return detections - def draw(self, images: list[Tensor]) -> list[Tensor]: + def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | Image.Image: # type: ignore """Very simple drawing. Needs to be more fancy later. """ detections = self.forward(images) @@ -177,7 +179,11 @@ def draw(self, images: list[Tensor]) -> list[Tensor]: out_img, torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]) ) - output.append(out_img) + if output_type == "torch": + output.append(out_img) + elif output_type == "pil": + output.append(Image.fromarray( + (out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8))) # type: ignore return output def compile( From 5f2aae546d70960837df5c6f65ffec59335d3397 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Sep 2024 09:03:21 +0000 Subject: [PATCH 16/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/models/rt_detr/model.py | 4 ++-- kornia/contrib/object_detection.py | 10 ++++----- kornia/models/detector/rtdetr.py | 31 ++++++++++---------------- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 3692788ed1..6871b85260 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -213,7 +213,7 @@ def _state_dict_proc(state_dict: dict[str, Tensor]) -> dict[str, Tensor]: model.load_state_dict(_state_dict_proc(state_dict)) return model - + @staticmethod def from_name(model_name: str, num_classes: int = 80) -> RTDETR: """Load model without pretrained weights. @@ -234,7 +234,7 @@ def from_name(model_name: str, num_classes: int = 80) -> RTDETR: model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet101d, num_classes)) else: raise ValueError - + return model def forward(self, images: Tensor) -> tuple[Tensor, Tensor]: diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 121593c6ca..ed75882f63 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -168,7 +168,9 @@ def forward(self, images: list[Tensor]) -> list[Tensor]: return detections def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | Image.Image: # type: ignore - """Very simple drawing. Needs to be more fancy later. + """Very simple drawing. + + Needs to be more fancy later. """ detections = self.forward(images) output = [] @@ -176,14 +178,12 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] out_img = image[None].clone() for out in detection: out_img = draw_rectangle( - out_img, - torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]) + out_img, torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]) ) if output_type == "torch": output.append(out_img) elif output_type == "pil": - output.append(Image.fromarray( - (out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8))) # type: ignore + output.append(Image.fromarray((out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8))) # type: ignore return output def compile( diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index 6630c4632e..2d808b3d0f 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -1,39 +1,32 @@ -from typing import Optional import warnings +from typing import Optional -from kornia.core import Module -from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig from kornia.contrib.models.rt_detr import DETRPostProcessor -from kornia.contrib.object_detection import ResizePreProcessor, ObjectDetector +from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig +from kornia.contrib.object_detection import ObjectDetector, ResizePreProcessor class RTDETRDetectorBuilder: - @staticmethod def build( model_name: Optional[str] = None, config: Optional[RTDETRConfig] = None, pretrained: bool = True, image_size: int = 640, - confidence_threshold: float = 0.5 + confidence_threshold: float = 0.5, ) -> ObjectDetector: - if (model_name is not None and config is not None): + if model_name is not None and config is not None: raise ValueError("Either `model_name` or `config` should be `None`.") - + if model_name is None and config is None: warnings.warn("No `model_name` or `config` found. Will build `rtdetr_r18vd`.") model_name = "rtdetr_r18vd" - + if config is not None: model = RTDETR.from_config(config) + elif pretrained: + model = RTDETR.from_pretrained(model_name) else: - if pretrained: - model = RTDETR.from_pretrained(model_name) - else: - model = RTDETR.from_name(model_name) - - return ObjectDetector( - model, - ResizePreProcessor(image_size), - DETRPostProcessor(confidence_threshold) - ) + model = RTDETR.from_name(model_name) + + return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold)) From 928aec765c5f6d977ac24e951aa10bc73d499694 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 12:56:54 +0300 Subject: [PATCH 17/46] update --- kornia/contrib/object_detection.py | 10 ++++++---- kornia/models/detector/rtdetr.py | 20 ++++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index ed75882f63..5ff63566e1 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -167,10 +167,12 @@ def forward(self, images: list[Tensor]) -> list[Tensor]: detections = self.post_processor(logits, boxes, images_sizes) return detections - def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | Image.Image: # type: ignore - """Very simple drawing. - - Needs to be more fancy later. + def draw( + self, + images: list[Tensor], + output_type: str = "torch" + ) -> list[Tensor] | list[Image.Image]: # type: ignore + """Very simple drawing. Needs to be more fancy later. """ detections = self.forward(images) output = [] diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index 2d808b3d0f..a2a53d401a 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -18,15 +18,19 @@ def build( if model_name is not None and config is not None: raise ValueError("Either `model_name` or `config` should be `None`.") - if model_name is None and config is None: - warnings.warn("No `model_name` or `config` found. Will build `rtdetr_r18vd`.") - model_name = "rtdetr_r18vd" - if config is not None: model = RTDETR.from_config(config) - elif pretrained: - model = RTDETR.from_pretrained(model_name) + elif model_name is not None: + if pretrained: + model = RTDETR.from_pretrained(model_name) + else: + model = RTDETR.from_name(model_name) else: - model = RTDETR.from_name(model_name) + warnings.warn("No `model_name` or `config` found. Will build pretrained `rtdetr_r18vd`.") + model = RTDETR.from_pretrained("rtdetr_r18vd") - return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold)) + return ObjectDetector( + model, + ResizePreProcessor(image_size), + DETRPostProcessor(confidence_threshold) + ) From 3d7ac8419432ebad0d11bb42a76bf6469a53651c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Sep 2024 09:58:14 +0000 Subject: [PATCH 18/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/object_detection.py | 10 ++++------ kornia/models/detector/rtdetr.py | 6 +----- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 5ff63566e1..47aff20a6f 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -167,12 +167,10 @@ def forward(self, images: list[Tensor]) -> list[Tensor]: detections = self.post_processor(logits, boxes, images_sizes) return detections - def draw( - self, - images: list[Tensor], - output_type: str = "torch" - ) -> list[Tensor] | list[Image.Image]: # type: ignore - """Very simple drawing. Needs to be more fancy later. + def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | list[Image.Image]: # type: ignore + """Very simple drawing. + + Needs to be more fancy later. """ detections = self.forward(images) output = [] diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index a2a53d401a..d5f11b9a8c 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -29,8 +29,4 @@ def build( warnings.warn("No `model_name` or `config` found. Will build pretrained `rtdetr_r18vd`.") model = RTDETR.from_pretrained("rtdetr_r18vd") - return ObjectDetector( - model, - ResizePreProcessor(image_size), - DETRPostProcessor(confidence_threshold) - ) + return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold)) From db87c53c523a0c31bdb7a2cd4dcd899ed1041a61 Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 13:17:28 +0300 Subject: [PATCH 19/46] update --- kornia/contrib/object_detection.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 47aff20a6f..497cae17a3 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -1,5 +1,7 @@ from __future__ import annotations +import os +import datetime from dataclasses import dataclass from enum import Enum from typing import Optional @@ -11,6 +13,7 @@ from kornia.core.external import PILImage as Image from kornia.core.external import numpy as np from kornia.utils.draw import draw_rectangle +from kornia.io import write_image __all__ = [ "BoundingBoxDataFormat", @@ -181,11 +184,28 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] out_img, torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]) ) if output_type == "torch": - output.append(out_img) + output.append(out_img[0]) elif output_type == "pil": output.append(Image.fromarray((out_img[0] * 255).permute(1, 2, 0).numpy().astype(np.uint8))) # type: ignore + else: + raise RuntimeError(f"Unsupported output type `{output_type}`.") return output + def save(self, images: list[Tensor], directory: Optional[str] = None) -> None: + """Saves the output image(s) to a directory. + + Args: + name: Directory to save the images. + n_row: Number of images displayed in each row of the grid. + """ + if directory is None: + name = f"detection-{datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y%m%d%H%M%S')!s}" + directory = os.path.join("Kornia_outputs", name) + outputs = self.draw(images) + os.makedirs(directory, exist_ok=True) + for i, out_image in enumerate(outputs): + write_image(os.path.join(directory, f"{str(i).zfill(6)}.jpg"), out_image.mul(255.0).byte()) + def compile( self, *, From 70ae085cd693b5d6cb92e8b44aaf0f9c3be6bfe5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Sep 2024 10:19:23 +0000 Subject: [PATCH 20/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/object_detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 497cae17a3..b348899a3b 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -1,7 +1,7 @@ from __future__ import annotations -import os import datetime +import os from dataclasses import dataclass from enum import Enum from typing import Optional @@ -12,8 +12,8 @@ from kornia.core.check import KORNIA_CHECK_SHAPE from kornia.core.external import PILImage as Image from kornia.core.external import numpy as np -from kornia.utils.draw import draw_rectangle from kornia.io import write_image +from kornia.utils.draw import draw_rectangle __all__ = [ "BoundingBoxDataFormat", From 82e3240ca8cf1cc3240a519ae3a0decff1db109b Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 14:27:04 +0300 Subject: [PATCH 21/46] update --- kornia/models/detector/rtdetr.py | 114 ++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 2 deletions(-) diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index d5f11b9a8c..cabf060027 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -1,20 +1,54 @@ import warnings from typing import Optional +import torch +import torch.nn as nn + from kornia.contrib.models.rt_detr import DETRPostProcessor from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig from kornia.contrib.object_detection import ObjectDetector, ResizePreProcessor +from kornia.core import rand class RTDETRDetectorBuilder: + """A builder class for constructing RT-DETR object detection models. + + This class provides static methods to: + - Build an object detection model from a model name or configuration. + - Export the model to ONNX format for inference. + """ + @staticmethod def build( model_name: Optional[str] = None, config: Optional[RTDETRConfig] = None, pretrained: bool = True, - image_size: int = 640, + image_size: Optional[int] = 640, confidence_threshold: float = 0.5, ) -> ObjectDetector: + """Builds and returns an RT-DETR object detector model. + + Either `model_name` or `config` must be provided. If neither is provided, + a default pretrained model (`rtdetr_r18vd`) will be built. + + Args: + model_name: + Name of the RT-DETR model to load. Can be one of the available pretrained models. + config: + A custom configuration object for building the RT-DETR model. + pretrained: + Whether to load a pretrained version of the model (applies when `model_name` is provided). + image_size: + The size to which input images will be resized during preprocessing. + If None, no resizing will be performed before passing to the model. Recommended scales include + [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]. + confidence_threshold: + The confidence threshold used during post-processing to filter detections. + + Returns: + ObjectDetector + An object detector instance initialized with the specified model, preprocessor, and post-processor. + """ if model_name is not None and config is not None: raise ValueError("Either `model_name` or `config` should be `None`.") @@ -29,4 +63,80 @@ def build( warnings.warn("No `model_name` or `config` found. Will build pretrained `rtdetr_r18vd`.") model = RTDETR.from_pretrained("rtdetr_r18vd") - return ObjectDetector(model, ResizePreProcessor(image_size), DETRPostProcessor(confidence_threshold)) + return ObjectDetector( + model, + ResizePreProcessor(image_size) if image_size is not None else nn.Identity(), + DETRPostProcessor(confidence_threshold) + ) + + @staticmethod + def to_onnx( + onnx_name: Optional[str] = None, + model_name: Optional[str] = None, + config: Optional[RTDETRConfig] = None, + pretrained: bool = True, + image_size: Optional[int] = 640, + confidence_threshold: float = 0.5, + ) -> None: + """Exports an RT-DETR object detection model to ONNX format. + + Either `model_name` or `config` must be provided. If neither is provided, + a default pretrained model (`rtdetr_r18vd`) will be built. + + Args: + model_name: + Name of the RT-DETR model to load. Can be one of the available pretrained models. + config: + A custom configuration object for building the RT-DETR model. + pretrained: + Whether to load a pretrained version of the model (applies when `model_name` is provided). + image_size: + The size to which input images will be resized during preprocessing. + If None, image_size will be dynamic. Recommended scales include + [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]. + confidence_threshold: + The confidence threshold used during post-processing to filter detections. + + Returns: + ObjectDetector + An object detector instance initialized with the specified model, preprocessor, and post-processor. + """ + + detector = RTDETRDetectorBuilder.build( + model_name=model_name, + config=config, + pretrained=pretrained, + image_size=image_size, + confidence_threshold=confidence_threshold, + ) + if onnx_name is None: + _model_name = model_name + if model_name is None and config is not None: + _model_name = "rtdetr-customized" + elif model_name is None and config is None: + _model_name = "rtdetr_r18vd" + onnx_name = f"Kornia-RTDETR-{_model_name}-{image_size}.onnx" + + if image_size is None: + val_image = rand(1, 3, 640, 640) + dynamic_axes={ + 'input' : {0 : 'batch_size', 2: 'height', 3: 'width'}, + 'output' : {0 : 'batch_size', 2: 'height', 3: 'width'} + } + else: + val_image = rand(1, 3, image_size, image_size) + dynamic_axes={ + 'input' : {0 : 'batch_size'}, + 'output' : {0 : 'batch_size'} + } + torch.onnx.export( + detector, + val_image, + onnx_name, + export_params=True, + opset_version=17, + do_constant_folding=True, + input_names=['input'], + output_names=['output'], + dynamic_axes=dynamic_axes + ) From 22bb115683d50682038c8b4a769fb82b78ca4c76 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Sep 2024 11:27:28 +0000 Subject: [PATCH 22/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/models/detector/rtdetr.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index cabf060027..d16bb9e372 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -2,7 +2,7 @@ from typing import Optional import torch -import torch.nn as nn +from torch import nn from kornia.contrib.models.rt_detr import DETRPostProcessor from kornia.contrib.models.rt_detr.model import RTDETR, RTDETRConfig @@ -66,7 +66,7 @@ def build( return ObjectDetector( model, ResizePreProcessor(image_size) if image_size is not None else nn.Identity(), - DETRPostProcessor(confidence_threshold) + DETRPostProcessor(confidence_threshold), ) @staticmethod @@ -119,16 +119,13 @@ def to_onnx( if image_size is None: val_image = rand(1, 3, 640, 640) - dynamic_axes={ - 'input' : {0 : 'batch_size', 2: 'height', 3: 'width'}, - 'output' : {0 : 'batch_size', 2: 'height', 3: 'width'} + dynamic_axes = { + "input": {0: "batch_size", 2: "height", 3: "width"}, + "output": {0: "batch_size", 2: "height", 3: "width"}, } else: val_image = rand(1, 3, image_size, image_size) - dynamic_axes={ - 'input' : {0 : 'batch_size'}, - 'output' : {0 : 'batch_size'} - } + dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}} torch.onnx.export( detector, val_image, @@ -136,7 +133,7 @@ def to_onnx( export_params=True, opset_version=17, do_constant_folding=True, - input_names=['input'], - output_names=['output'], - dynamic_axes=dynamic_axes + input_names=["input"], + output_names=["output"], + dynamic_axes=dynamic_axes, ) From d6390258d2901488b90c306e9553efcc65618b2a Mon Sep 17 00:00:00 2001 From: shijianjian Date: Fri, 6 Sep 2024 23:04:00 +0300 Subject: [PATCH 23/46] update --- kornia/models/detector/rtdetr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index d16bb9e372..d0421ff50d 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -65,8 +65,8 @@ def build( return ObjectDetector( model, - ResizePreProcessor(image_size) if image_size is not None else nn.Identity(), - DETRPostProcessor(confidence_threshold), + ResizePreProcessor((image_size, image_size)) if image_size is not None else nn.Identity(), + DETRPostProcessor(confidence_threshold) ) @staticmethod From a902739010341113b242867a6e5dee8d1ab921d8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Sep 2024 20:04:40 +0000 Subject: [PATCH 24/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/models/detector/rtdetr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index d0421ff50d..bfcb1944f7 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -66,7 +66,7 @@ def build( return ObjectDetector( model, ResizePreProcessor((image_size, image_size)) if image_size is not None else nn.Identity(), - DETRPostProcessor(confidence_threshold) + DETRPostProcessor(confidence_threshold), ) @staticmethod From 292f410ee1629df69013d23f461fe00bb513f7db Mon Sep 17 00:00:00 2001 From: shijianjian Date: Sat, 7 Sep 2024 12:54:12 +0300 Subject: [PATCH 25/46] doc update --- docs/source/models/rt_detr.rst | 22 ++++++++++++++++++++++ kornia/io/io.py | 6 +++++- kornia/models/detector/rtdetr.py | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/docs/source/models/rt_detr.rst b/docs/source/models/rt_detr.rst index cfeaa06c8a..ca67cb06b0 100644 --- a/docs/source/models/rt_detr.rst +++ b/docs/source/models/rt_detr.rst @@ -1,6 +1,28 @@ Real-Time Detection Transformer (RT-DETR) ========================================= +.. code-block:: python + + from kornia.io import load_image + from kornia.models.detector.rtdetr import RTDETRDetectorBuilder + + input_img = load_image(img_path)[None] # Load image to BCHW + + # NOTE: available models: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'. + # NOTE: recommended image scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800] + detector = RTDETRDetectorBuilder.build("rtdetr_r18vd", image_size=640) + + # get the output boxes + boxes = detector(input_img) + + # draw the bounding boxes on the images directly. + output = detector.draw(input_img, output_type="pil") + output[0].save("Kornia-RTDETR-output.png") + + # convert the whole model to ONNX directly + RTDETRDetectorBuilder.to_onnx("RTDETR-640.onnx", model_name="rtdetr_r18vd", image_size=640) + + .. card:: :link: https://arxiv.org/abs/2304.08069 diff --git a/kornia/io/io.py b/kornia/io/io.py index 62d9ed45b9..334ecb2828 100644 --- a/kornia/io/io.py +++ b/kornia/io/io.py @@ -65,7 +65,11 @@ def _to_uint8(image: Tensor) -> Tensor: return image.mul(255.0).byte() -def load_image(path_file: str | Path, desired_type: ImageLoadType, device: Device = "cpu") -> Tensor: +def load_image( + path_file: str | Path, + desired_type: ImageLoadType = ImageLoadType.RGB32, + device: Device = "cpu" +) -> Tensor: """Read an image file and decode using the Kornia Rust backend. Args: diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index bfcb1944f7..e8a963cd0d 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -34,6 +34,7 @@ def build( Args: model_name: Name of the RT-DETR model to load. Can be one of the available pretrained models. + Including 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'. config: A custom configuration object for building the RT-DETR model. pretrained: From 7e87160a3e64dc44177eda566a55985b40764322 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 7 Sep 2024 09:54:38 +0000 Subject: [PATCH 26/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/io/io.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kornia/io/io.py b/kornia/io/io.py index 334ecb2828..1e8797d8b2 100644 --- a/kornia/io/io.py +++ b/kornia/io/io.py @@ -66,9 +66,7 @@ def _to_uint8(image: Tensor) -> Tensor: def load_image( - path_file: str | Path, - desired_type: ImageLoadType = ImageLoadType.RGB32, - device: Device = "cpu" + path_file: str | Path, desired_type: ImageLoadType = ImageLoadType.RGB32, device: Device = "cpu" ) -> Tensor: """Read an image file and decode using the Kornia Rust backend. From db1cb534d99cf87beb704bcb4fd56995cc684f65 Mon Sep 17 00:00:00 2001 From: edgar Date: Sat, 7 Sep 2024 22:42:52 +0200 Subject: [PATCH 27/46] post processor as in the original codew --- .../contrib/models/rt_detr/post_processor.py | 47 ++++++++++++++----- kornia/contrib/object_detection.py | 16 ++++--- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index 95ad6109c7..0ae898ad5e 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -1,11 +1,35 @@ +"""Post-processor for the RT-DETR model.""" + from __future__ import annotations -# TODO: import torch from kornia.core import Module, Tensor, concatenate +def mod(a, b): + """Compute the modulo operation for two numbers. + + This function calculates the remainder of the division of 'a' by 'b' + using the formula: a - (a // b) * b, which is equivalent to the modulo operation. + + Args: + a: The dividend. + b: The divisor. + + Returns: + The remainder of a divided by b. + + Example: + >>> mod(7, 3) + 1 + >>> mod(8.5, 3.2) + 2.1 + """ + return a - (a // b) * b + + +# TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter class DETRPostProcessor(Module): def __init__(self, confidence_threshold: float) -> None: super().__init__() @@ -45,16 +69,13 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list boxes_xy = boxes_xy * sizes_wh scores = logits.sigmoid() # RT-DETR was trained with focal loss. thus sigmoid is used instead of softmax - # the original code is slightly different - # it allows 1 bounding box to have multiple classes (multi-label) - scores, labels = scores.max(-1) - - detections: list[Tensor] = [] - for i in range(scores.shape[0]): - mask = scores[i] >= self.confidence_threshold - labels_i = labels[i, mask].unsqueeze(-1) - scores_i = scores[i, mask].unsqueeze(-1) - boxes_i = boxes_xy[i, mask] - detections.append(concatenate([labels_i, scores_i, boxes_i], -1)) + # retrieve the boxes with the highest score for each class + # https://github.com/lyuwenyu/RT-DETR/blob/b6bf0200b249a6e35b44e0308b6058f55b99696b/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py#L55-L62 + num_top_queries = 300 # TODO: make this configurable + num_classes = 80 # TODO: make this configurable + scores, index = torch.topk(scores.flatten(1), num_top_queries, dim=-1) + labels = mod(index, num_classes) + index = index // num_classes + boxes = boxes_xy.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes_xy.shape[-1])) - return detections + return concatenate([labels[..., None], scores[..., None], boxes], -1) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index b348899a3b..9a289b4f17 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -12,6 +12,7 @@ from kornia.core.check import KORNIA_CHECK_SHAPE from kornia.core.external import PILImage as Image from kornia.core.external import numpy as np +from kornia.geometry.transform import resize from kornia.io import write_image from kornia.utils.draw import draw_rectangle @@ -126,13 +127,12 @@ def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]: """ # TODO: support other input formats e.g. file path, numpy resized_imgs, original_sizes = [], [] - for i in range(len(imgs)): + for i in range(imgs.shape[0]): img = imgs[i] - # NOTE: assume that image layout is CHW original_sizes.append([img.shape[-2], img.shape[-1]]) resized_imgs.append( - # TODO: fix kornia resize to support onnx - torch.nn.functional.interpolate(img.unsqueeze(0), size=self.size, mode=self.interpolation_mode) + # TODO: fix kornia resize warnings + resize(img[None], size=self.size, interpolation=self.interpolation_mode) ) return concatenate(resized_imgs), as_tensor(original_sizes) @@ -181,7 +181,8 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] out_img = image[None].clone() for out in detection: out_img = draw_rectangle( - out_img, torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]) + out_img, + torch.Tensor([[[out[-4], out[-3], out[-4] + out[-2], out[-3] + out[-1]]]]), ) if output_type == "torch": output.append(out_img[0]) @@ -204,7 +205,10 @@ def save(self, images: list[Tensor], directory: Optional[str] = None) -> None: outputs = self.draw(images) os.makedirs(directory, exist_ok=True) for i, out_image in enumerate(outputs): - write_image(os.path.join(directory, f"{str(i).zfill(6)}.jpg"), out_image.mul(255.0).byte()) + write_image( + os.path.join(directory, f"{str(i).zfill(6)}.jpg"), + out_image.mul(255.0).byte(), + ) def compile( self, From 82f206220b31735a44da64d7ba50853876c71a38 Mon Sep 17 00:00:00 2001 From: edgar Date: Sun, 8 Sep 2024 11:45:38 +0200 Subject: [PATCH 28/46] fix typing --- kornia/contrib/models/rt_detr/post_processor.py | 8 ++++---- kornia/contrib/object_detection.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index 0ae898ad5e..3a7d8117c3 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -7,7 +7,7 @@ from kornia.core import Module, Tensor, concatenate -def mod(a, b): +def mod(a: Tensor, b: int) -> Tensor: """Compute the modulo operation for two numbers. This function calculates the remainder of the division of 'a' by 'b' @@ -35,7 +35,7 @@ def __init__(self, confidence_threshold: float) -> None: super().__init__() self.confidence_threshold = confidence_threshold - def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list[Tensor]: + def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tensor: """Post-process outputs from DETR. Args: @@ -71,8 +71,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> list # retrieve the boxes with the highest score for each class # https://github.com/lyuwenyu/RT-DETR/blob/b6bf0200b249a6e35b44e0308b6058f55b99696b/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py#L55-L62 - num_top_queries = 300 # TODO: make this configurable - num_classes = 80 # TODO: make this configurable + num_top_queries: int = 300 # TODO: make this configurable + num_classes: int = 80 # TODO: make this configurable scores, index = torch.topk(scores.flatten(1), num_top_queries, dim=-1) labels = mod(index, num_classes) index = index // num_classes diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 9a289b4f17..c609214837 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -119,7 +119,7 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear") self.size = size self.interpolation_mode = interpolation_mode - def forward(self, imgs: list[Tensor]) -> tuple[Tensor, Tensor]: + def forward(self, imgs: Tensor) -> tuple[Tensor, Tensor]: """ Returns: resized_imgs: resized images in a batch. From fea1f92f85a617ff2e0ef372f0703e825852b13a Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 01:42:13 +0800 Subject: [PATCH 29/46] update --- kornia/color/yuv.py | 2 +- .../contrib/models/rt_detr/post_processor.py | 11 ++++--- kornia/contrib/object_detection.py | 33 ++++++++++++------- kornia/core/external.py | 28 +++++++++++++--- kornia/geometry/transform/affwarp.py | 6 ++-- kornia/models/detector/__init__.py | 1 + kornia/models/detector/rtdetr.py | 25 +++++++------- kornia/utils/image.py | 2 +- 8 files changed, 73 insertions(+), 35 deletions(-) diff --git a/kornia/color/yuv.py b/kornia/color/yuv.py index e250e1ac36..1334be1089 100644 --- a/kornia/color/yuv.py +++ b/kornia/color/yuv.py @@ -122,7 +122,7 @@ def yuv_to_rgb(image: Tensor) -> Tensor: if not isinstance(image, Tensor): raise TypeError(f"Input type is not a Tensor. Got {type(image)}") - if len(image.shape) < 3 or image.shape[-3] != 3: + if image.dim() < 3 or image.shape[-3] != 3: raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}") y: Tensor = image[..., 0, :, :] diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index 3a7d8117c3..8f79953bfc 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -61,10 +61,8 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens cxcy, wh = boxes[..., :2], boxes[..., 2:] boxes_xy = concatenate([cxcy - wh * 0.5, wh], -1) - sizes_wh = torch.empty(1, 1, 2, device=boxes.device, dtype=boxes.dtype) - sizes_wh[..., 0] = original_sizes[0][1] - sizes_wh[..., 1] = original_sizes[0][0] - sizes_wh = sizes_wh.repeat(1, 1, 2) + # Get dynamic size from the input tensor itself + sizes_wh = original_sizes[0].flip(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 2) boxes_xy = boxes_xy * sizes_wh scores = logits.sigmoid() # RT-DETR was trained with focal loss. thus sigmoid is used instead of softmax @@ -78,4 +76,7 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens index = index // num_classes boxes = boxes_xy.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes_xy.shape[-1])) - return concatenate([labels[..., None], scores[..., None], boxes], -1) + all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1) + + return all_boxes[(all_boxes[:, :, 1] > self.confidence_threshold).unsqueeze(-1).expand_as(all_boxes)].view( + all_boxes.shape[0], -1, all_boxes.shape[-1]) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index c609214837..cffbac333c 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -4,7 +4,7 @@ import os from dataclasses import dataclass from enum import Enum -from typing import Optional +from typing import Optional, Union import torch @@ -119,7 +119,7 @@ def __init__(self, size: tuple[int, int], interpolation_mode: str = "bilinear") self.size = size self.interpolation_mode = interpolation_mode - def forward(self, imgs: Tensor) -> tuple[Tensor, Tensor]: + def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]: """ Returns: resized_imgs: resized images in a batch. @@ -127,14 +127,18 @@ def forward(self, imgs: Tensor) -> tuple[Tensor, Tensor]: """ # TODO: support other input formats e.g. file path, numpy resized_imgs, original_sizes = [], [] - for i in range(imgs.shape[0]): + + iters = len(imgs) if isinstance(imgs, list) else imgs.shape[0] + original_sizes = imgs.new_zeros((imgs.shape[0], 2)) + for i in range(iters): img = imgs[i] - original_sizes.append([img.shape[-2], img.shape[-1]]) + original_sizes[i, 0] = img.shape[-2] # Height + original_sizes[i, 1] = img.shape[-1] # Width resized_imgs.append( # TODO: fix kornia resize warnings resize(img[None], size=self.size, interpolation=self.interpolation_mode) ) - return concatenate(resized_imgs), as_tensor(original_sizes) + return concatenate(resized_imgs), original_sizes # TODO: move this to kornia.models as AlgorithmicModel api @@ -155,11 +159,12 @@ def __init__(self, model: Module, pre_processor: Module, post_processor: Module) self.post_processor = post_processor.eval() @torch.inference_mode() - def forward(self, images: list[Tensor]) -> list[Tensor]: + def forward(self, images: Union[Tensor, list[Tensor]]) -> list[Tensor]: """Detect objects in a given list of images. Args: - images: list of RGB images. Each image is a Tensor with shape :math:`(3, H, W)`. + images: If list of RGB images. Each image is a Tensor with shape :math:`(3, H, W)`. + If Tensor, a Tensor with shape :math:`(B, 3, H, W)`. Returns: list of detections found in each image. For item in a batch, shape is :math:`(D, 6)`, where :math:`D` is the @@ -170,12 +175,15 @@ def forward(self, images: list[Tensor]) -> list[Tensor]: detections = self.post_processor(logits, boxes, images_sizes) return detections - def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] | list[Image.Image]: # type: ignore + def draw( + self, images: Union[Tensor, list[Tensor]], detections: Optional[Tensor] = None, output_type: str = "torch" + ) -> Union[Tensor, list[Tensor], list[Image.Image]]: # type: ignore """Very simple drawing. Needs to be more fancy later. """ - detections = self.forward(images) + if detections is None: + detections = self.forward(images) output = [] for image, detection in zip(images, detections): out_img = image[None].clone() @@ -192,7 +200,9 @@ def draw(self, images: list[Tensor], output_type: str = "torch") -> list[Tensor] raise RuntimeError(f"Unsupported output type `{output_type}`.") return output - def save(self, images: list[Tensor], directory: Optional[str] = None) -> None: + def save( + self, images: Union[Tensor, list[Tensor]], detections: Optional[Tensor] = None, directory: Optional[str] = None + ) -> None: """Saves the output image(s) to a directory. Args: @@ -202,13 +212,14 @@ def save(self, images: list[Tensor], directory: Optional[str] = None) -> None: if directory is None: name = f"detection-{datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y%m%d%H%M%S')!s}" directory = os.path.join("Kornia_outputs", name) - outputs = self.draw(images) + outputs = self.draw(images, detections) os.makedirs(directory, exist_ok=True) for i, out_image in enumerate(outputs): write_image( os.path.join(directory, f"{str(i).zfill(6)}.jpg"), out_image.mul(255.0).byte(), ) + print(f"Outputs are saved in {directory}") def compile( self, diff --git a/kornia/core/external.py b/kornia/core/external.py index 4efdbfe189..cee492f250 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -1,4 +1,6 @@ import importlib +import subprocess + from types import ModuleType from typing import List, Optional @@ -14,6 +16,7 @@ class LazyLoader: module_name: The name of the module to be lazily loaded. module: The actual module object, initialized to None and loaded upon first access. """ + auto_install: bool = False def __init__(self, module_name: str) -> None: """Initializes the LazyLoader with the name of the module. @@ -24,6 +27,10 @@ def __init__(self, module_name: str) -> None: self.module_name = module_name self.module: Optional[ModuleType] = None + def _install_package(self, module_name: str) -> None: + print(f"Installing `{self.module_name}` ...") + subprocess.run(["pip", "install", "-U", self.module_name]) + def _load(self) -> None: """Loads the module if it hasn't been loaded yet. @@ -34,10 +41,23 @@ def _load(self) -> None: try: self.module = importlib.import_module(self.module_name) except ImportError as e: - raise ImportError( - f"Optional dependency '{self.module_name}' is not installed. " - f"Please install it to use this functionality." - ) from e + if self.auto_install: + self._install_package(self.module_name) + else: + if_install = input( + f"Optional dependency '{self.module_name}' is not installed. " + "Do you wish to install the dependency? [Y]es, [N]o, [A]ll." + ) + if if_install.lower() == "y": + subprocess.run(["pip", "install", "-U", self.module_name]) + elif if_install.lower() == "a": + subprocess.run(["pip", "install", "-U", self.module_name]) + self.auto_install = True + else: + raise ImportError( + f"Optional dependency '{self.module_name}' is not installed. " + f"Please install it to use this functionality." + ) from e def __getattr__(self, item: str) -> object: """Loads the module (if not already loaded) and returns the requested attribute. diff --git a/kornia/geometry/transform/affwarp.py b/kornia/geometry/transform/affwarp.py index b0abb14fe1..61cf05f4d4 100644 --- a/kornia/geometry/transform/affwarp.py +++ b/kornia/geometry/transform/affwarp.py @@ -570,8 +570,10 @@ def resize( aspect_ratio = w / h size = _side_to_image_size(size, aspect_ratio, side) - if size == input_size: - return input + # Skip this dangerous if-else when converting to ONNX. + if not torch.onnx.is_in_onnx_export(): + if size == input_size: + return input factors = (h / size[0], w / size[1]) diff --git a/kornia/models/detector/__init__.py b/kornia/models/detector/__init__.py index e69de29bb2..55a62efc22 100644 --- a/kornia/models/detector/__init__.py +++ b/kornia/models/detector/__init__.py @@ -0,0 +1 @@ +from .rtdetr import * diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index e8a963cd0d..492d7ea167 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -9,6 +9,8 @@ from kornia.contrib.object_detection import ObjectDetector, ResizePreProcessor from kornia.core import rand +__all__ = ["RTDETRDetectorBuilder"] + class RTDETRDetectorBuilder: """A builder class for constructing RT-DETR object detection models. @@ -72,13 +74,13 @@ def build( @staticmethod def to_onnx( - onnx_name: Optional[str] = None, model_name: Optional[str] = None, + onnx_name: Optional[str] = None, config: Optional[RTDETRConfig] = None, pretrained: bool = True, image_size: Optional[int] = 640, confidence_threshold: float = 0.5, - ) -> None: + ) -> tuple[str, ObjectDetector]: """Exports an RT-DETR object detection model to ONNX format. Either `model_name` or `config` must be provided. If neither is provided, @@ -99,8 +101,8 @@ def to_onnx( The confidence threshold used during post-processing to filter detections. Returns: - ObjectDetector - An object detector instance initialized with the specified model, preprocessor, and post-processor. + - The name of the ONNX model. + - The exported torch model. """ detector = RTDETRDetectorBuilder.build( @@ -118,15 +120,14 @@ def to_onnx( _model_name = "rtdetr_r18vd" onnx_name = f"Kornia-RTDETR-{_model_name}-{image_size}.onnx" + val_image = rand(1, 3, image_size, image_size) if image_size is None: val_image = rand(1, 3, 640, 640) - dynamic_axes = { - "input": {0: "batch_size", 2: "height", 3: "width"}, - "output": {0: "batch_size", 2: "height", 3: "width"}, - } - else: - val_image = rand(1, 3, image_size, image_size) - dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}} + + dynamic_axes = { + "input": {0: "batch_size", 2: "height", 3: "width"}, + "output": {0: "batch_size"} + } torch.onnx.export( detector, val_image, @@ -138,3 +139,5 @@ def to_onnx( output_names=["output"], dynamic_axes=dynamic_axes, ) + + return onnx_name, detector diff --git a/kornia/utils/image.py b/kornia/utils/image.py index a3f6a76393..05293e757d 100644 --- a/kornia/utils/image.py +++ b/kornia/utils/image.py @@ -264,7 +264,7 @@ def _wrapper(input: Tensor, *args: Any, **kwargs: Any) -> Tensor: if not isinstance(input, Tensor): raise TypeError(f"Input input type is not a Tensor. Got {type(input)}") - if input.numel() == 0: + if input.shape.numel() == 0: raise ValueError("Invalid input tensor, it is empty.") input_shape = input.shape From 6f4b5f306e47403e0d49229e50fa0143524b5ecc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Sep 2024 17:42:33 +0000 Subject: [PATCH 30/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/models/rt_detr/post_processor.py | 3 ++- kornia/contrib/object_detection.py | 2 +- kornia/core/external.py | 8 ++++---- kornia/models/detector/rtdetr.py | 5 +---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index 8f79953bfc..17ff06315d 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -79,4 +79,5 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1) return all_boxes[(all_boxes[:, :, 1] > self.confidence_threshold).unsqueeze(-1).expand_as(all_boxes)].view( - all_boxes.shape[0], -1, all_boxes.shape[-1]) + all_boxes.shape[0], -1, all_boxes.shape[-1] + ) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index cffbac333c..ed0131511e 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -8,7 +8,7 @@ import torch -from kornia.core import Module, Tensor, as_tensor, concatenate +from kornia.core import Module, Tensor, concatenate from kornia.core.check import KORNIA_CHECK_SHAPE from kornia.core.external import PILImage as Image from kornia.core.external import numpy as np diff --git a/kornia/core/external.py b/kornia/core/external.py index cee492f250..fa0af1a906 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -1,6 +1,5 @@ import importlib import subprocess - from types import ModuleType from typing import List, Optional @@ -16,6 +15,7 @@ class LazyLoader: module_name: The name of the module to be lazily loaded. module: The actual module object, initialized to None and loaded upon first access. """ + auto_install: bool = False def __init__(self, module_name: str) -> None: @@ -29,7 +29,7 @@ def __init__(self, module_name: str) -> None: def _install_package(self, module_name: str) -> None: print(f"Installing `{self.module_name}` ...") - subprocess.run(["pip", "install", "-U", self.module_name]) + subprocess.run(["pip", "install", "-U", self.module_name], check=False) def _load(self) -> None: """Loads the module if it hasn't been loaded yet. @@ -49,9 +49,9 @@ def _load(self) -> None: "Do you wish to install the dependency? [Y]es, [N]o, [A]ll." ) if if_install.lower() == "y": - subprocess.run(["pip", "install", "-U", self.module_name]) + subprocess.run(["pip", "install", "-U", self.module_name], check=False) elif if_install.lower() == "a": - subprocess.run(["pip", "install", "-U", self.module_name]) + subprocess.run(["pip", "install", "-U", self.module_name], check=False) self.auto_install = True else: raise ImportError( diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index 492d7ea167..d62a638df7 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -124,10 +124,7 @@ def to_onnx( if image_size is None: val_image = rand(1, 3, 640, 640) - dynamic_axes = { - "input": {0: "batch_size", 2: "height", 3: "width"}, - "output": {0: "batch_size"} - } + dynamic_axes = {"input": {0: "batch_size", 2: "height", 3: "width"}, "output": {0: "batch_size"}} torch.onnx.export( detector, val_image, From f4a8b128a0e03169f894f89e950cca1b07c726f7 Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 02:34:51 +0800 Subject: [PATCH 31/46] update --- kornia/contrib/models/rt_detr/post_processor.py | 8 +++++++- kornia/models/detector/rtdetr.py | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index 17ff06315d..9538364def 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -1,6 +1,7 @@ """Post-processor for the RT-DETR model.""" from __future__ import annotations +from typing import Optional import torch @@ -31,9 +32,11 @@ def mod(a: Tensor, b: int) -> Tensor: # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter class DETRPostProcessor(Module): - def __init__(self, confidence_threshold: float) -> None: + def __init__(self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True) -> None: super().__init__() self.confidence_threshold = confidence_threshold + self.num_classes = num_classes + self.confidence_filtering = confidence_filtering def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tensor: """Post-process outputs from DETR. @@ -78,6 +81,9 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1) + if not self.confidence_filtering or self.confidence_threshold == 0: + return all_boxes + return all_boxes[(all_boxes[:, :, 1] > self.confidence_threshold).unsqueeze(-1).expand_as(all_boxes)].view( all_boxes.shape[0], -1, all_boxes.shape[-1] ) diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index d62a638df7..523c4564ea 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -27,6 +27,7 @@ def build( pretrained: bool = True, image_size: Optional[int] = 640, confidence_threshold: float = 0.5, + confidence_filtering: Optional[bool] = None, ) -> ObjectDetector: """Builds and returns an RT-DETR object detector model. @@ -47,6 +48,9 @@ def build( [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]. confidence_threshold: The confidence threshold used during post-processing to filter detections. + confidence_filtering: + If to perform filtering on resulting boxes. If None, the filtering will be blocked when exporting + to ONNX, while it would perform as per confidence_threshold when build the model. Returns: ObjectDetector @@ -69,7 +73,11 @@ def build( return ObjectDetector( model, ResizePreProcessor((image_size, image_size)) if image_size is not None else nn.Identity(), - DETRPostProcessor(confidence_threshold), + DETRPostProcessor( + confidence_threshold, + num_classes=config.num_classes if config is not None else 80, + confidence_filtering=confidence_filtering or not torch.onnx.is_in_onnx_export + ), ) @staticmethod @@ -80,6 +88,7 @@ def to_onnx( pretrained: bool = True, image_size: Optional[int] = 640, confidence_threshold: float = 0.5, + confidence_filtering: Optional[bool] = None, ) -> tuple[str, ObjectDetector]: """Exports an RT-DETR object detection model to ONNX format. @@ -99,6 +108,9 @@ def to_onnx( [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]. confidence_threshold: The confidence threshold used during post-processing to filter detections. + confidence_filtering: + If to perform filtering on resulting boxes. If None, the filtering will be blocked when exporting + to ONNX, while it would perform as per confidence_threshold when build the model. Returns: - The name of the ONNX model. @@ -111,6 +123,7 @@ def to_onnx( pretrained=pretrained, image_size=image_size, confidence_threshold=confidence_threshold, + confidence_filtering=confidence_filtering, ) if onnx_name is None: _model_name = model_name From 9f6a0c2070a1603e9ec0dadbbb5ccce23ecaa7d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Sep 2024 18:35:26 +0000 Subject: [PATCH 32/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/models/rt_detr/post_processor.py | 5 ++++- kornia/models/detector/rtdetr.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index 9538364def..a977e940e3 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -1,6 +1,7 @@ """Post-processor for the RT-DETR model.""" from __future__ import annotations + from typing import Optional import torch @@ -32,7 +33,9 @@ def mod(a: Tensor, b: int) -> Tensor: # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter class DETRPostProcessor(Module): - def __init__(self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True) -> None: + def __init__( + self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True + ) -> None: super().__init__() self.confidence_threshold = confidence_threshold self.num_classes = num_classes diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index 523c4564ea..0e35953b00 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -76,7 +76,7 @@ def build( DETRPostProcessor( confidence_threshold, num_classes=config.num_classes if config is not None else 80, - confidence_filtering=confidence_filtering or not torch.onnx.is_in_onnx_export + confidence_filtering=confidence_filtering or not torch.onnx.is_in_onnx_export, ), ) From 33e454c75ed89b523c24b5378616d834b6a95ee0 Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 02:44:59 +0800 Subject: [PATCH 33/46] update --- kornia/contrib/object_detection.py | 11 +++++++---- kornia/core/external.py | 5 ++++- kornia/models/detector/rtdetr.py | 3 ++- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index ed0131511e..322e787839 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -4,6 +4,7 @@ import os from dataclasses import dataclass from enum import Enum +import logging from typing import Optional, Union import torch @@ -25,6 +26,8 @@ "ObjectDetectorResult", ] +logger = logging.getLogger(__name__) + class BoundingBoxDataFormat(Enum): """Enum class that maps bounding box data format.""" @@ -126,10 +129,10 @@ def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]: original_sizes: the original image sizes of (height, width). """ # TODO: support other input formats e.g. file path, numpy - resized_imgs, original_sizes = [], [] + resized_imgs: list[Tensor] = [] iters = len(imgs) if isinstance(imgs, list) else imgs.shape[0] - original_sizes = imgs.new_zeros((imgs.shape[0], 2)) + original_sizes = imgs.new_zeros((iters, 2)) for i in range(iters): img = imgs[i] original_sizes[i, 0] = img.shape[-2] # Height @@ -159,7 +162,7 @@ def __init__(self, model: Module, pre_processor: Module, post_processor: Module) self.post_processor = post_processor.eval() @torch.inference_mode() - def forward(self, images: Union[Tensor, list[Tensor]]) -> list[Tensor]: + def forward(self, images: Union[Tensor, list[Tensor]]) -> Tensor: """Detect objects in a given list of images. Args: @@ -219,7 +222,7 @@ def save( os.path.join(directory, f"{str(i).zfill(6)}.jpg"), out_image.mul(255.0).byte(), ) - print(f"Outputs are saved in {directory}") + logger.info(f"Outputs are saved in {directory}") def compile( self, diff --git a/kornia/core/external.py b/kornia/core/external.py index fa0af1a906..2d61c2fb30 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -2,6 +2,9 @@ import subprocess from types import ModuleType from typing import List, Optional +import logging + +logger = logging.getLogger(__name__) class LazyLoader: @@ -28,7 +31,7 @@ def __init__(self, module_name: str) -> None: self.module: Optional[ModuleType] = None def _install_package(self, module_name: str) -> None: - print(f"Installing `{self.module_name}` ...") + logger.info(f"Installing `{self.module_name}` ...") subprocess.run(["pip", "install", "-U", self.module_name], check=False) def _load(self) -> None: diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index 523c4564ea..c84c3f876a 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -133,9 +133,10 @@ def to_onnx( _model_name = "rtdetr_r18vd" onnx_name = f"Kornia-RTDETR-{_model_name}-{image_size}.onnx" - val_image = rand(1, 3, image_size, image_size) if image_size is None: val_image = rand(1, 3, 640, 640) + else: + val_image = rand(1, 3, image_size, image_size) dynamic_axes = {"input": {0: "batch_size", 2: "height", 3: "width"}, "output": {0: "batch_size"}} torch.onnx.export( From 2f3f531a4f9c9aab757951db7a7d8fbea687d025 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Sep 2024 18:45:48 +0000 Subject: [PATCH 34/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/object_detection.py | 2 +- kornia/core/external.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 322e787839..2fdb50cae2 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -1,10 +1,10 @@ from __future__ import annotations import datetime +import logging import os from dataclasses import dataclass from enum import Enum -import logging from typing import Optional, Union import torch diff --git a/kornia/core/external.py b/kornia/core/external.py index 2d61c2fb30..0f467eb60f 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -1,8 +1,8 @@ import importlib +import logging import subprocess from types import ModuleType from typing import List, Optional -import logging logger = logging.getLogger(__name__) From 6790ed1c65f6745a4bcb9952e00f48eebb08c365 Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 02:51:40 +0800 Subject: [PATCH 35/46] update --- kornia/contrib/object_detection.py | 3 +-- kornia/core/external.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index 322e787839..d669b426e8 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -132,13 +132,12 @@ def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]: resized_imgs: list[Tensor] = [] iters = len(imgs) if isinstance(imgs, list) else imgs.shape[0] - original_sizes = imgs.new_zeros((iters, 2)) + original_sizes = imgs[0].new_zeros((iters, 2)) for i in range(iters): img = imgs[i] original_sizes[i, 0] = img.shape[-2] # Height original_sizes[i, 1] = img.shape[-1] # Width resized_imgs.append( - # TODO: fix kornia resize warnings resize(img[None], size=self.size, interpolation=self.interpolation_mode) ) return concatenate(resized_imgs), original_sizes diff --git a/kornia/core/external.py b/kornia/core/external.py index 2d61c2fb30..33ccf18c01 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -32,7 +32,7 @@ def __init__(self, module_name: str) -> None: def _install_package(self, module_name: str) -> None: logger.info(f"Installing `{self.module_name}` ...") - subprocess.run(["pip", "install", "-U", self.module_name], check=False) + subprocess.run(["pip", "install", "-U", self.module_name], shell=False) def _load(self) -> None: """Loads the module if it hasn't been loaded yet. @@ -52,10 +52,10 @@ def _load(self) -> None: "Do you wish to install the dependency? [Y]es, [N]o, [A]ll." ) if if_install.lower() == "y": - subprocess.run(["pip", "install", "-U", self.module_name], check=False) + self._install_package(self.module_name) elif if_install.lower() == "a": - subprocess.run(["pip", "install", "-U", self.module_name], check=False) self.auto_install = True + self._install_package(self.module_name) else: raise ImportError( f"Optional dependency '{self.module_name}' is not installed. " From fb07bb9ff99dbd765ab39a30d0ddf8afc51e6591 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Sep 2024 18:52:07 +0000 Subject: [PATCH 36/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/object_detection.py | 4 +--- kornia/core/external.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/kornia/contrib/object_detection.py b/kornia/contrib/object_detection.py index acdbae0c9f..4a4b965dc2 100644 --- a/kornia/contrib/object_detection.py +++ b/kornia/contrib/object_detection.py @@ -137,9 +137,7 @@ def forward(self, imgs: Union[Tensor, list[Tensor]]) -> tuple[Tensor, Tensor]: img = imgs[i] original_sizes[i, 0] = img.shape[-2] # Height original_sizes[i, 1] = img.shape[-1] # Width - resized_imgs.append( - resize(img[None], size=self.size, interpolation=self.interpolation_mode) - ) + resized_imgs.append(resize(img[None], size=self.size, interpolation=self.interpolation_mode)) return concatenate(resized_imgs), original_sizes diff --git a/kornia/core/external.py b/kornia/core/external.py index 5ecf97a5b8..40bb7ca7b3 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -32,7 +32,7 @@ def __init__(self, module_name: str) -> None: def _install_package(self, module_name: str) -> None: logger.info(f"Installing `{self.module_name}` ...") - subprocess.run(["pip", "install", "-U", self.module_name], shell=False) + subprocess.run(["pip", "install", "-U", self.module_name], shell=False, check=False) def _load(self) -> None: """Loads the module if it hasn't been loaded yet. From e84ab3a761c6322e1bf631f5987de1a9e0687a8a Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 02:55:35 +0800 Subject: [PATCH 37/46] update --- kornia/core/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kornia/core/external.py b/kornia/core/external.py index 5ecf97a5b8..66c022d779 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -32,7 +32,7 @@ def __init__(self, module_name: str) -> None: def _install_package(self, module_name: str) -> None: logger.info(f"Installing `{self.module_name}` ...") - subprocess.run(["pip", "install", "-U", self.module_name], shell=False) + subprocess.run(["pip", "install", "-U", self.module_name], shell=True) def _load(self) -> None: """Loads the module if it hasn't been loaded yet. From 5f49f2801d4fed242e102c42f6b5544856080b1b Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 02:56:29 +0800 Subject: [PATCH 38/46] update --- kornia/core/external.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kornia/core/external.py b/kornia/core/external.py index 66c022d779..ab992fb4a7 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -31,8 +31,8 @@ def __init__(self, module_name: str) -> None: self.module: Optional[ModuleType] = None def _install_package(self, module_name: str) -> None: - logger.info(f"Installing `{self.module_name}` ...") - subprocess.run(["pip", "install", "-U", self.module_name], shell=True) + logger.info(f"Installing `{module_name}` ...") + subprocess.run(["pip", "install", "-U", module_name], shell=True) def _load(self) -> None: """Loads the module if it hasn't been loaded yet. From 41d94fffd449325e324bcf575da3d73d5ddf3c69 Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 02:58:29 +0800 Subject: [PATCH 39/46] update --- kornia/core/external.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kornia/core/external.py b/kornia/core/external.py index 6d793a61cf..5f56c669c6 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -31,13 +31,8 @@ def __init__(self, module_name: str) -> None: self.module: Optional[ModuleType] = None def _install_package(self, module_name: str) -> None: -<<<<<<< HEAD logger.info(f"Installing `{module_name}` ...") - subprocess.run(["pip", "install", "-U", module_name], shell=True) -======= - logger.info(f"Installing `{self.module_name}` ...") - subprocess.run(["pip", "install", "-U", self.module_name], shell=False, check=False) ->>>>>>> fb07bb9ff99dbd765ab39a30d0ddf8afc51e6591 + subprocess.run(["pip", "install", "-U", module_name], shell=False, check=False) def _load(self) -> None: """Loads the module if it hasn't been loaded yet. From 0dd67b5690bb50fbe93b2db4279cddf56695c484 Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 05:22:59 +0800 Subject: [PATCH 40/46] update --- kornia/contrib/models/rt_detr/post_processor.py | 12 ++++++------ tests/contrib/test_object_detector.py | 4 ++-- tests/core/test_lazyloader.py | 4 +++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index a977e940e3..5a600a3765 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -34,12 +34,14 @@ def mod(a: Tensor, b: int) -> Tensor: # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter class DETRPostProcessor(Module): def __init__( - self, confidence_threshold: Optional[float] = None, num_classes: int = 80, confidence_filtering: bool = True + self, confidence_threshold: Optional[float] = None, num_classes: int = 80, + num_top_queries: int = 300, confidence_filtering: bool = True ) -> None: super().__init__() self.confidence_threshold = confidence_threshold self.num_classes = num_classes self.confidence_filtering = confidence_filtering + self.num_top_queries = num_top_queries def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tensor: """Post-process outputs from DETR. @@ -75,11 +77,9 @@ def forward(self, logits: Tensor, boxes: Tensor, original_sizes: Tensor) -> Tens # retrieve the boxes with the highest score for each class # https://github.com/lyuwenyu/RT-DETR/blob/b6bf0200b249a6e35b44e0308b6058f55b99696b/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py#L55-L62 - num_top_queries: int = 300 # TODO: make this configurable - num_classes: int = 80 # TODO: make this configurable - scores, index = torch.topk(scores.flatten(1), num_top_queries, dim=-1) - labels = mod(index, num_classes) - index = index // num_classes + scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1) + labels = mod(index, self.num_classes) + index = index // self.num_classes boxes = boxes_xy.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes_xy.shape[-1])) all_boxes = concatenate([labels[..., None], scores[..., None], boxes], -1) diff --git a/tests/contrib/test_object_detector.py b/tests/contrib/test_object_detector.py index dd5c07aacb..e82facace0 100644 --- a/tests/contrib/test_object_detector.py +++ b/tests/contrib/test_object_detector.py @@ -17,7 +17,7 @@ def test_smoke(self, device, dtype): config = RTDETRConfig("resnet50d", 10, head_num_queries=10) model = RTDETR.from_config(config).to(device, dtype).eval() pre_processor = kornia.contrib.object_detection.ResizePreProcessor((32, 32)) - post_processor = DETRPostProcessor(confidence).to(device, dtype).eval() + post_processor = DETRPostProcessor(confidence, num_top_queries=3).to(device, dtype).eval() detector = kornia.contrib.ObjectDetector(model, pre_processor, post_processor) sizes = torch.randint(5, 10, (batch_size, 2)) * 32 @@ -40,7 +40,7 @@ def test_onnx(self, device, dtype, tmp_path: Path, variant: str): config = RTDETRConfig(variant, 1) model = RTDETR.from_config(config).to(device=device, dtype=dtype).eval() pre_processor = kornia.contrib.object_detection.ResizePreProcessor(640) - post_processor = DETRPostProcessor(0.3) + post_processor = DETRPostProcessor(0.3, num_top_queries=3) detector = kornia.contrib.ObjectDetector(model, pre_processor, post_processor) data = torch.rand(3, 400, 640, device=device, dtype=dtype) diff --git a/tests/core/test_lazyloader.py b/tests/core/test_lazyloader.py index 1025f97827..13c227a32a 100644 --- a/tests/core/test_lazyloader.py +++ b/tests/core/test_lazyloader.py @@ -1,4 +1,5 @@ import pytest +from io import StringIO from kornia.core.external import LazyLoader @@ -19,7 +20,8 @@ def test_lazy_loader_loading_module(self): assert loader.sqrt(4) == 2.0 assert loader.module is not None # Should be loaded now - def test_lazy_loader_invalid_module(self): + def test_lazy_loader_invalid_module(self, monkeypatch): + monkeypatch.setattr('sys.stdin', StringIO("n")) # Test that LazyLoader raises an ImportError for an invalid module loader = LazyLoader("non_existent_module") with pytest.raises(ImportError) as excinfo: From 03edce3f371a43d724ea09173424ce5599860afb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Sep 2024 21:23:15 +0000 Subject: [PATCH 41/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/models/rt_detr/post_processor.py | 7 +++++-- tests/core/test_lazyloader.py | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index 5a600a3765..ca1d37cd08 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -34,8 +34,11 @@ def mod(a: Tensor, b: int) -> Tensor: # TODO: deprecate the confidence threshold and add the num_top_queries as a parameter and num_classes as a parameter class DETRPostProcessor(Module): def __init__( - self, confidence_threshold: Optional[float] = None, num_classes: int = 80, - num_top_queries: int = 300, confidence_filtering: bool = True + self, + confidence_threshold: Optional[float] = None, + num_classes: int = 80, + num_top_queries: int = 300, + confidence_filtering: bool = True, ) -> None: super().__init__() self.confidence_threshold = confidence_threshold diff --git a/tests/core/test_lazyloader.py b/tests/core/test_lazyloader.py index 13c227a32a..b832de99c8 100644 --- a/tests/core/test_lazyloader.py +++ b/tests/core/test_lazyloader.py @@ -1,6 +1,7 @@ -import pytest from io import StringIO +import pytest + from kornia.core.external import LazyLoader @@ -21,7 +22,7 @@ def test_lazy_loader_loading_module(self): assert loader.module is not None # Should be loaded now def test_lazy_loader_invalid_module(self, monkeypatch): - monkeypatch.setattr('sys.stdin', StringIO("n")) + monkeypatch.setattr("sys.stdin", StringIO("n")) # Test that LazyLoader raises an ImportError for an invalid module loader = LazyLoader("non_existent_module") with pytest.raises(ImportError) as excinfo: From 11c6f7f9ecea37d41aa77115e816078dd880df8b Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 05:26:27 +0800 Subject: [PATCH 42/46] update --- kornia/core/external.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kornia/core/external.py b/kornia/core/external.py index 5f56c669c6..1e0160035d 100644 --- a/kornia/core/external.py +++ b/kornia/core/external.py @@ -1,6 +1,7 @@ import importlib import logging import subprocess +import sys from types import ModuleType from typing import List, Optional @@ -32,7 +33,7 @@ def __init__(self, module_name: str) -> None: def _install_package(self, module_name: str) -> None: logger.info(f"Installing `{module_name}` ...") - subprocess.run(["pip", "install", "-U", module_name], shell=False, check=False) + subprocess.run([sys.executable, "-m", "pip", "install", "-U", module_name], shell=False, check=False) # noqa: S603 def _load(self) -> None: """Loads the module if it hasn't been loaded yet. From 365de3cb5b8b8520f208cc6a4bb327e60fc546db Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 06:08:21 +0800 Subject: [PATCH 43/46] update --- kornia/contrib/models/rt_detr/post_processor.py | 2 -- kornia/geometry/transform/affwarp.py | 3 +++ tests/contrib/test_object_detector.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/kornia/contrib/models/rt_detr/post_processor.py b/kornia/contrib/models/rt_detr/post_processor.py index ca1d37cd08..95b35623df 100644 --- a/kornia/contrib/models/rt_detr/post_processor.py +++ b/kornia/contrib/models/rt_detr/post_processor.py @@ -25,8 +25,6 @@ def mod(a: Tensor, b: int) -> Tensor: Example: >>> mod(7, 3) 1 - >>> mod(8.5, 3.2) - 2.1 """ return a - (a // b) * b diff --git a/kornia/geometry/transform/affwarp.py b/kornia/geometry/transform/affwarp.py index 61cf05f4d4..3e85a1da72 100644 --- a/kornia/geometry/transform/affwarp.py +++ b/kornia/geometry/transform/affwarp.py @@ -1,6 +1,7 @@ from typing import Optional, Tuple, Union import torch +import warnings from torch import nn from kornia.core import ones, ones_like, zeros @@ -567,6 +568,8 @@ def resize( input_size = h, w = input.shape[-2:] if isinstance(size, int): + if torch.onnx.is_in_onnx_export(): + warnings.warn("Please pass the size with a tuple when exporting to ONNX to correct the tracing.") aspect_ratio = w / h size = _side_to_image_size(size, aspect_ratio, side) diff --git a/tests/contrib/test_object_detector.py b/tests/contrib/test_object_detector.py index e82facace0..00eee98b81 100644 --- a/tests/contrib/test_object_detector.py +++ b/tests/contrib/test_object_detector.py @@ -39,7 +39,7 @@ def test_smoke(self, device, dtype): def test_onnx(self, device, dtype, tmp_path: Path, variant: str): config = RTDETRConfig(variant, 1) model = RTDETR.from_config(config).to(device=device, dtype=dtype).eval() - pre_processor = kornia.contrib.object_detection.ResizePreProcessor(640) + pre_processor = kornia.contrib.object_detection.ResizePreProcessor((640, 640)) post_processor = DETRPostProcessor(0.3, num_top_queries=3) detector = kornia.contrib.ObjectDetector(model, pre_processor, post_processor) @@ -55,7 +55,7 @@ def test_onnx(self, device, dtype, tmp_path: Path, variant: str): input_names=["images"], output_names=["detections"], dynamic_axes=dynamic_axes, - opset_version=16, + opset_version=17, ) assert model_path.is_file() From 224392ee54710aac17bcd08960b8487e14d7adc6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Sep 2024 22:08:39 +0000 Subject: [PATCH 44/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/geometry/transform/affwarp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kornia/geometry/transform/affwarp.py b/kornia/geometry/transform/affwarp.py index 3e85a1da72..c85c5ddf3d 100644 --- a/kornia/geometry/transform/affwarp.py +++ b/kornia/geometry/transform/affwarp.py @@ -1,7 +1,7 @@ +import warnings from typing import Optional, Tuple, Union import torch -import warnings from torch import nn from kornia.core import ones, ones_like, zeros From 0a2da30fbc72ac97891d8cb50dfad0524b4bc950 Mon Sep 17 00:00:00 2001 From: Jian S Date: Mon, 9 Sep 2024 15:33:30 +0800 Subject: [PATCH 45/46] update --- kornia/contrib/models/rt_detr/model.py | 10 +++++++++- kornia/models/detector/rtdetr.py | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 6871b85260..512585b9e6 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -34,6 +34,7 @@ class RTDETRModelType(Enum): resnet101d = 3 hgnetv2_l = 4 hgnetv2_x = 5 + resnet50d_m = 6 @dataclass @@ -130,6 +131,13 @@ def from_config(config: RTDETRConfig) -> RTDETR: head_num_decoder_layers = config.head_num_decoder_layers or 6 neck_expansion = config.neck_expansion or 1.0 + elif model_type == RTDETRModelType.resnet50d_m: + backbone = ResNetD.from_config(50) + neck_hidden_dim = config.neck_hidden_dim or 256 + neck_dim_feedforward = config.neck_dim_feedforward or 1024 + head_num_decoder_layers = config.head_num_decoder_layers or 6 + neck_expansion = config.neck_expansion or .5 + elif model_type == RTDETRModelType.resnet101d: backbone = ResNetD.from_config(101) neck_hidden_dim = config.neck_hidden_dim or 384 @@ -227,7 +235,7 @@ def from_name(model_name: str, num_classes: int = 80) -> RTDETR: elif model_name == "rtdetr_r34vd": model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet34d, num_classes)) elif model_name == "rtdetr_r50vd_m": - model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes)) + model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d_m, num_classes)) elif model_name == "rtdetr_r50vd": model = RTDETR.from_config(RTDETRConfig(RTDETRModelType.resnet50d, num_classes)) elif model_name == "rtdetr_r101vd": diff --git a/kornia/models/detector/rtdetr.py b/kornia/models/detector/rtdetr.py index d8ce489586..4bc1c6bd2d 100644 --- a/kornia/models/detector/rtdetr.py +++ b/kornia/models/detector/rtdetr.py @@ -45,7 +45,7 @@ def build( image_size: The size to which input images will be resized during preprocessing. If None, no resizing will be performed before passing to the model. Recommended scales include - [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]. + [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]. confidence_threshold: The confidence threshold used during post-processing to filter detections. confidence_filtering: @@ -105,7 +105,7 @@ def to_onnx( image_size: The size to which input images will be resized during preprocessing. If None, image_size will be dynamic. Recommended scales include - [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]. + [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]. confidence_threshold: The confidence threshold used during post-processing to filter detections. confidence_filtering: From ab5fb53a3440287ecfd116c6e6124151a4a72197 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 07:33:50 +0000 Subject: [PATCH 46/46] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kornia/contrib/models/rt_detr/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kornia/contrib/models/rt_detr/model.py b/kornia/contrib/models/rt_detr/model.py index 512585b9e6..3e6e6226bb 100644 --- a/kornia/contrib/models/rt_detr/model.py +++ b/kornia/contrib/models/rt_detr/model.py @@ -136,7 +136,7 @@ def from_config(config: RTDETRConfig) -> RTDETR: neck_hidden_dim = config.neck_hidden_dim or 256 neck_dim_feedforward = config.neck_dim_feedforward or 1024 head_num_decoder_layers = config.head_num_decoder_layers or 6 - neck_expansion = config.neck_expansion or .5 + neck_expansion = config.neck_expansion or 0.5 elif model_type == RTDETRModelType.resnet101d: backbone = ResNetD.from_config(101)