diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 1e2dd3bad914..b70e0eb89245 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -18,18 +18,18 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ["3.11"] # requires python<=3.10 + python-version: ["3.11"] # requires python<=3.11 model: [yolov5n] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: "pip" # caching pip dependencies + cache: "pip" # cache pip dependencies - name: Install requirements run: | python -m pip install --upgrade pip wheel - pip install -r requirements.txt coremltools openvino-dev tensorflow-cpu --extra-index-url https://download.pytorch.org/whl/cpu + pip install -r requirements.txt coremltools openvino-dev "tensorflow-cpu<2.15.1" --extra-index-url https://download.pytorch.org/whl/cpu yolo checks pip list - name: Benchmark DetectionModel @@ -51,16 +51,10 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] # macos-latest bug https://github.com/ultralytics/yolov5/pull/9049 + os: [ubuntu-latest, windows-latest, macos-14] # macos-latest bug https://github.com/ultralytics/yolov5/pull/9049 python-version: ["3.11"] model: [yolov5n] include: - - os: ubuntu-latest - python-version: "3.8" # '3.6.8' min - model: yolov5n - - os: ubuntu-latest - python-version: "3.9" - model: yolov5n - os: ubuntu-latest python-version: "3.8" # torch 1.8.0 requires python >=3.6, <=3.8 model: yolov5n @@ -147,7 +141,7 @@ jobs: steps: - name: Check for failure and notify if: (needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure' || needs.Benchmarks.result == 'cancelled' || needs.Tests.result == 'cancelled') && github.repository == 'ultralytics/yolov5' && (github.event_name == 'schedule' || github.event_name == 'push') - uses: slackapi/slack-github-action@v1.25.0 + uses: slackapi/slack-github-action@v1.26.0 with: payload: | {"text": " GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"} diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml new file mode 100644 index 000000000000..4f87c77701dd --- /dev/null +++ b/.github/workflows/cla.yml @@ -0,0 +1,39 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Ultralytics Contributor License Agreement (CLA) action https://docs.ultralytics.com/help/CLA +# This workflow automatically requests Pull Requests (PR) authors to sign the Ultralytics CLA before PRs can be merged + +name: CLA Assistant +on: + issue_comment: + types: + - created + pull_request_target: + types: + - reopened + - opened + - synchronize + +jobs: + CLA: + if: github.repository == 'ultralytics/yolov5' + runs-on: ubuntu-latest + steps: + - name: CLA Assistant + if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I sign the CLA') || github.event_name == 'pull_request_target' + uses: contributor-assistant/github-action@v2.3.2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # must be repository secret token + PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + with: + path-to-signatures: "signatures/version1/cla.json" + path-to-document: "https://docs.ultralytics.com/help/CLA" # CLA document + # branch should not be protected + branch: "main" + allowlist: dependabot[bot],github-actions,[pre-commit*,pre-commit*,bot* + + remote-organization-name: ultralytics + remote-repository-name: cla + custom-pr-sign-comment: "I have read the CLA Document and I sign the CLA" + custom-allsigned-prcomment: All Contributors have signed the CLA. ✅ + #custom-notsigned-prcomment: 'pull request comment with Introductory message to ask new contributors to sign' diff --git a/.github/workflows/merge-main-into-prs.yml b/.github/workflows/merge-main-into-prs.yml new file mode 100644 index 000000000000..9ed945c78978 --- /dev/null +++ b/.github/workflows/merge-main-into-prs.yml @@ -0,0 +1,56 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# Automatically merges repository 'main' branch into all open PRs to keep them up-to-date +# Action runs on updates to main branch so when one PR merges to main all others update + +name: Merge main into PRs + +on: + workflow_dispatch: + push: + branches: + - main + - master + +jobs: + Merge: + if: github.repository == 'ultralytics/yolov5' + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" # caching pip dependencies + - name: Install requirements + run: | + pip install pygithub + - name: Merge main into PRs + shell: python + run: | + from github import Github + import os + + # Authenticate with the GitHub Token + g = Github(os.getenv('GITHUB_TOKEN')) + + # Get the repository dynamically + repo = g.get_repo(os.getenv('GITHUB_REPOSITORY')) + + # List all open pull requests + open_pulls = repo.get_pulls(state='open', sort='created') + + for pr in open_pulls: + # Compare PR head with main to see if it's behind + try: + # Merge main into the PR branch + success = pr.update_branch() + assert success, "Branch update failed" + print(f"Merged 'master' into PR #{pr.number} ({pr.head.ref}) successfully.") + except Exception as e: + print(f"Could not merge 'master' into PR #{pr.number} ({pr.head.ref}): {e}") + env: + GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/README.md b/README.md index dc4f9dd7169f..5cf1e00a1e7f 100644 --- a/README.md +++ b/README.md @@ -78,12 +78,13 @@ python test_patch.py -h # to get a list of all testing options -->

-[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/) +[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)
YOLOv5 CI YOLOv5 Citation Docker Pulls + Discord
Run on Gradient Open In Colab diff --git a/README.zh-CN.md b/README.zh-CN.md index c81feb86c202..69ce9b72d332 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -8,7 +8,7 @@ -->

-[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/) +[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)
YOLOv5 CI diff --git a/export.py b/export.py index 8fe7ce8fdce1..214d903c2998 100644 --- a/export.py +++ b/export.py @@ -346,6 +346,7 @@ def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose onnx = file.with_suffix(".onnx") LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...") + is_trt10 = int(trt.__version__.split(".")[0]) >= 10 # is TensorRT >= 10 assert onnx.exists(), f"failed to export ONNX file: {onnx}" f = file.with_suffix(".engine") # TensorRT engine file logger = trt.Logger(trt.Logger.INFO) @@ -354,9 +355,10 @@ def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose builder = trt.Builder(logger) config = builder.create_builder_config() - config.max_workspace_size = workspace * 1 << 30 - # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice - + if is_trt10: + config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) + else: # TensorRT versions 7, 8 + config.max_workspace_size = workspace * 1 << 30 flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) @@ -381,8 +383,10 @@ def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose LOGGER.info(f"{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine as {f}") if builder.platform_has_fast_fp16 and half: config.set_flag(trt.BuilderFlag.FP16) - with builder.build_engine(network, config) as engine, open(f, "wb") as t: - t.write(engine.serialize()) + + build = builder.build_serialized_network if is_trt10 else builder.build_engine + with build(network, config) as engine, open(f, "wb") as t: + t.write(engine if is_trt10 else engine.serialize()) return f, None @@ -405,7 +409,8 @@ def export_saved_model( try: import tensorflow as tf except Exception: - check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}") + check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}<=2.15.1") + import tensorflow as tf from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 diff --git a/models/common.py b/models/common.py index fd8c998149f5..12244fd4b3cf 100644 --- a/models/common.py +++ b/models/common.py @@ -57,8 +57,12 @@ from utils.torch_utils import copy_attr, smart_inference_mode -def autopad(k, p=None, d=1): # kernel, padding, dilation - # Pad to 'same' shape outputs +def autopad(k, p=None, d=1): + """ + Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size. + + `k`: kernel, `p`: padding, `d`: dilation. + """ if d > 1: k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size if p is None: @@ -88,13 +92,19 @@ def forward_fuse(self, x): class DWConv(Conv): # Depth-wise convolution - def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation + def __init__(self, c1, c2, k=1, s=1, d=1, act=True): + """Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), output + channels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act). + """ super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) class DWConvTranspose2d(nn.ConvTranspose2d): # Depth-wise transpose convolution - def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): + """Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels + (c2), kernel size (k), stride (s), input padding (p1), output padding (p2). + """ super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) @@ -148,7 +158,10 @@ def forward(self, x): class Bottleneck(nn.Module): # Standard bottleneck - def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): + """Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channel + expansion. + """ super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) @@ -164,7 +177,10 @@ def forward(self, x): class BottleneckCSP(nn.Module): # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks - def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool, + groups, expansion. + """ super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) @@ -206,7 +222,10 @@ def forward(self, x): class C3(nn.Module): # CSP Bottleneck with 3 convolutions - def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, group + convolutions, and expansion. + """ super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) @@ -283,7 +302,13 @@ def forward(self, x): class SPPF(nn.Module): # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher - def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) + def __init__(self, c1, c2, k=5): + """ + Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution and + max pooling. + + Equivalent to SPP(k=(5, 9, 13)). + """ super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = Conv(c1, c_, 1, 1) @@ -302,19 +327,26 @@ def forward(self, x): class Focus(nn.Module): # Focus wh information into c-space - def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): + """Initializes Focus module to concentrate width-height info into channel space with configurable convolution + parameters. + """ super().__init__() self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act) # self.contract = Contract(gain=2) - def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + def forward(self, x): + """Processes input through Focus mechanism, reshaping (b,c,w,h) to (b,4c,w/2,h/2) then applies convolution.""" return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1)) # return self.conv(self.contract(x)) class GhostConv(nn.Module): # Ghost Convolution https://github.com/huawei-noah/ghostnet - def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups + def __init__(self, c1, c2, k=1, s=1, g=1, act=True): + """Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channels + for efficiency. + """ super().__init__() c_ = c2 // 2 # hidden channels self.cv1 = Conv(c1, c_, k, s, None, g, act=act) @@ -328,7 +360,8 @@ def forward(self, x): class GhostBottleneck(nn.Module): # Ghost Bottleneck https://github.com/huawei-noah/ghostnet - def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride + def __init__(self, c1, c2, k=3, s=1): + """Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see https://github.com/huawei-noah/ghostnet.""" super().__init__() c_ = c2 // 2 self.conv = nn.Sequential( @@ -494,18 +527,34 @@ def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, output_names = [] fp16 = False # default updated below dynamic = False - for i in range(model.num_bindings): - name = model.get_binding_name(i) - dtype = trt.nptype(model.get_binding_dtype(i)) - if model.binding_is_input(i): - if -1 in tuple(model.get_binding_shape(i)): # dynamic - dynamic = True - context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2])) - if dtype == np.float16: - fp16 = True - else: # output - output_names.append(name) - shape = tuple(context.get_binding_shape(i)) + is_trt10 = not hasattr(model, "num_bindings") + num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings) + for i in num: + if is_trt10: + name = model.get_tensor_name(i) + dtype = trt.nptype(model.get_tensor_dtype(name)) + is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUT + if is_input: + if -1 in tuple(model.get_tensor_shape(name)): # dynamic + dynamic = True + context.set_input_shape(name, tuple(model.get_profile_shape(name, 0)[2])) + if dtype == np.float16: + fp16 = True + else: # output + output_names.append(name) + shape = tuple(context.get_tensor_shape(name)) + else: + name = model.get_binding_name(i) + dtype = trt.nptype(model.get_binding_dtype(i)) + if model.binding_is_input(i): + if -1 in tuple(model.get_binding_shape(i)): # dynamic + dynamic = True + context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2])) + if dtype == np.float16: + fp16 = True + else: # output + output_names.append(name) + shape = tuple(context.get_binding_shape(i)) im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) @@ -982,10 +1031,14 @@ def print(self): """Logs the string representation of the current object's state via the LOGGER.""" LOGGER.info(self.__str__()) - def __len__(self): # override len(results) + def __len__(self): + """Returns the number of results stored, overrides the default len(results).""" return self.n - def __str__(self): # override print(results) + def __str__(self): + """Returns a string representation of the model's results, suitable for printing, overrides default + print(results). + """ return self._run(pprint=True) # print results def __repr__(self): @@ -995,7 +1048,8 @@ def __repr__(self): class Proto(nn.Module): # YOLOv5 mask Proto module for segmentation models - def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks + def __init__(self, c1, c_=256, c2=32): + """Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration.""" super().__init__() self.cv1 = Conv(c1, c_, k=3) self.upsample = nn.Upsample(scale_factor=2, mode="nearest") diff --git a/models/experimental.py b/models/experimental.py index 62ee802f6248..6152cef1b389 100644 --- a/models/experimental.py +++ b/models/experimental.py @@ -11,8 +11,12 @@ class Sum(nn.Module): - # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, n, weight=False): # n: number of inputs + """Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070.""" + + def __init__(self, n, weight=False): + """Initializes a module to sum outputs of layers with number of inputs `n` and optional weighting, supporting 2+ + inputs. + """ super().__init__() self.weight = weight # apply weights boolean self.iter = range(n - 1) # iter object @@ -33,8 +37,12 @@ def forward(self, x): class MixConv2d(nn.Module): - # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 - def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy + """Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595.""" + + def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): + """Initializes MixConv2d with mixed depth-wise convolutional layers, taking input and output channels (c1, c2), + kernel sizes (k), stride (s), and channel distribution strategy (equal_ch). + """ super().__init__() n = len(k) # number of convolutions if equal_ch: # equal c_ per group diff --git a/models/tf.py b/models/tf.py index 006a66d2b0f6..2a5cd566c406 100644 --- a/models/tf.py +++ b/models/tf.py @@ -190,15 +190,25 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): super().__init__() self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) - def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) - # inputs = inputs / 255 # normalize 0-255 to 0-1 + def call(self, inputs): + """ + Performs pixel shuffling and convolution on input tensor, downsampling by 2 and expanding channels by 4. + + Example x(b,w,h,c) -> y(b,w/2,h/2,4c). + """ inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]] return self.conv(tf.concat(inputs, 3)) class TFBottleneck(keras.layers.Layer): # Standard bottleneck - def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): + """ + Initializes a standard bottleneck layer for TensorFlow models, expanding and contracting channels with optional + shortcut. + + Arguments are ch_in, ch_out, shortcut, groups, expansion. + """ super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) @@ -364,7 +374,10 @@ def call(self, inputs): class TFDetect(keras.layers.Layer): # TF YOLOv5 Detect layer - def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer + def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): + """Initializes YOLOv5 detection layer for TensorFlow with configurable classes, anchors, channels, and image + size. + """ super().__init__() self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32) self.nc = nc # number of classes @@ -454,7 +467,13 @@ def call(self, inputs): class TFUpsample(keras.layers.Layer): # TF version of torch.nn.Upsample() - def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' + def __init__(self, size, scale_factor, mode, w=None): + """ + Initializes a TensorFlow upsampling layer with specified size, scale_factor, and mode, ensuring scale_factor is + even. + + Warning: all arguments needed including 'w' + """ super().__init__() assert scale_factor % 2 == 0, "scale_factor must be multiple of 2" self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode) @@ -481,7 +500,8 @@ def call(self, inputs): return tf.concat(inputs, self.d) -def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) +def parse_model(d, ch, model, imgsz): + """Parses a model definition dict `d` to create YOLOv5 model layers, including dynamic channel adjustments.""" LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") anchors, nc, gd, gw, ch_mul = ( d["anchors"], @@ -562,7 +582,10 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) class TFModel: # TF YOLOv5 model - def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes + def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)): + """Initializes TF YOLOv5 model with specified configuration, channels, classes, model instance, and input + size. + """ super().__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict @@ -640,7 +663,10 @@ def call(self, input, topk_all, iou_thres, conf_thres): ) @staticmethod - def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS + def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): + """Performs agnostic non-maximum suppression (NMS) on detected objects, filtering based on IoU and confidence + thresholds. + """ boxes, classes, scores = x class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32) scores_inp = tf.reduce_max(scores, -1) diff --git a/models/yolo.py b/models/yolo.py index ef6c1015f41e..ca62f934fc3f 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -75,7 +75,8 @@ class Detect(nn.Module): dynamic = False # force grid reconstruction export = False # export mode - def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer + def __init__(self, nc=80, anchors=(), ch=(), inplace=True): + """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations.""" super().__init__() self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor @@ -183,7 +184,8 @@ def _profile_one_layer(self, m, x, dt): if c: LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") - def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers + def fuse(self): + """Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed.""" LOGGER.info("Fusing layers... ") for m in self.model.modules(): if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"): @@ -193,7 +195,8 @@ def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers self.info() return self - def info(self, verbose=False, img_size=640): # print model information + def info(self, verbose=False, img_size=640): + """Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`.""" model_info(self, verbose, img_size) def _apply(self, fn): @@ -212,7 +215,8 @@ def _apply(self, fn): class DetectionModel(BaseModel): # YOLOv5 detection model - def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None): # model, input channels, number of classes + def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None): + """Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors.""" super().__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict @@ -303,8 +307,12 @@ def _clip_augmented(self, y): y[-1] = y[-1][:, i:] # small return y - def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency - # https://arxiv.org/abs/1708.02002 section 3.3 + def _initialize_biases(self, cf=None): + """ + Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf). + + For details see https://arxiv.org/abs/1708.02002 section 3.3. + """ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. m = self.model[-1] # Detect() module for mi, s in zip(m.m, m.stride): # from @@ -328,7 +336,10 @@ def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None): class ClassificationModel(BaseModel): # YOLOv5 classification model - def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index + def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): + """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff` + index. + """ super().__init__() self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) @@ -354,8 +365,8 @@ def _from_yaml(self, cfg): self.model = None -def parse_model(d, ch): # model_dict, input_channels(3) - # Parse a YOLOv5 model.yaml dictionary +def parse_model(d, ch): + """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture.""" LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") anchors, nc, gd, gw, act, ch_mul = ( d["anchors"], diff --git a/pyproject.toml b/pyproject.toml index 26765dfadada..5748b907cf30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ build-backend = "setuptools.build_meta" # Project settings ----------------------------------------------------------------------------------------------------- [project] +version = "7.0.0" name = "YOLOv5" description = "Ultralytics YOLOv5 for SOTA object detection, instance segmentation and image classification." readme = "README.md" @@ -77,7 +78,7 @@ dependencies = [ "thop>=0.1.1", # FLOPs computation "pandas>=1.1.4", "seaborn>=0.11.0", # plotting - "ultralytics>=8.0.232" + "ultralytics>=8.1.47" ] # Optional dependencies ------------------------------------------------------------------------------------------------ diff --git a/requirements.txt b/requirements.txt index 545d3c1c2921..3892abe07308 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ gitpython>=3.1.30 matplotlib>=3.3 numpy>=1.23.5 opencv-python>=4.1.1 -Pillow>=9.4.0 +pillow>=10.3.0 psutil # system resources PyYAML>=5.3.1 requests>=2.23.0 @@ -47,3 +47,4 @@ setuptools>=65.5.1 # Snyk vulnerability fix # mss # screenshots # albumentations>=1.0.3 # pycocotools>=2.0.6 # COCO mAP +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/segment/train.py b/segment/train.py index ce59df9c635b..5a6e9afb8ec0 100644 --- a/segment/train.py +++ b/segment/train.py @@ -95,7 +95,12 @@ GIT_INFO = check_git_info() -def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary +def train(hyp, opt, device, callbacks): + """ + Trains the YOLOv5 model on a dataset, managing hyperparameters, model optimization, logging, and validation. + + `hyp` is path/to/hyp.yaml or hyp dictionary. + """ ( save_dir, epochs, diff --git a/train.py b/train.py index dfc45d78e5ce..493642adbcc7 100644 --- a/train.py +++ b/train.py @@ -100,7 +100,13 @@ GIT_INFO = check_git_info() -def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary +def train(hyp, opt, device, callbacks): + """ + Trains YOLOv5 model with given hyperparameters, options, and device, managing datasets, model architecture, loss + computation, and optimizer steps. + + `hyp` argument is path/to/hyp.yaml or hyp dictionary. + """ save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = ( Path(opt.save_dir), opt.epochs, diff --git a/utils/activations.py b/utils/activations.py index 6218eb58440a..928ae55a0b60 100644 --- a/utils/activations.py +++ b/utils/activations.py @@ -59,8 +59,10 @@ def forward(self, x): class FReLU(nn.Module): - # FReLU activation https://arxiv.org/abs/2007.11824 + """FReLU activation https://arxiv.org/abs/2007.11824.""" + def __init__(self, c1, k=3): # ch_in, kernel + """Initializes FReLU activation with channel `c1` and kernel size `k`.""" super().__init__() self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) self.bn = nn.BatchNorm2d(c1) @@ -103,7 +105,8 @@ class MetaAconC(nn.Module): See "Activate or Not: Learning Customized Activation" https://arxiv.org/pdf/2009.04759.pdf. """ - def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r + def __init__(self, c1, k=1, s=1, r=16): + """Initializes MetaAconC with params: channel_in (c1), kernel size (k=1), stride (s=1), reduction (r=16).""" super().__init__() c2 = max(r, c1 // r) self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) diff --git a/utils/augmentations.py b/utils/augmentations.py index dfac11f28776..872e7259560e 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -412,8 +412,13 @@ def mixup(im, labels, im2, labels2): return im, labels -def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) - # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio +def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): + """ + Filters bounding box candidates by minimum width-height threshold `wh_thr` (pixels), aspect ratio threshold + `ar_thr`, and area ratio threshold `area_thr`. + + box1(4,n) is before augmentation, box2(4,n) is after augmentation. + """ w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio @@ -482,7 +487,12 @@ def __init__(self, size=(640, 640), auto=False, stride=32): self.auto = auto # pass max size integer, automatically solve for short side using stride self.stride = stride # used with auto - def __call__(self, im): # im = np.array HWC + def __call__(self, im): + """ + Resizes and pads input image `im` (HWC format) to specified dimensions, maintaining aspect ratio. + + im = np.array HWC + """ imh, imw = im.shape[:2] r = min(self.h / imh, self.w / imw) # ratio of new/old h, w = round(imh * r), round(imw * r) # resized image @@ -500,7 +510,12 @@ def __init__(self, size=640): super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size - def __call__(self, im): # im = np.array HWC + def __call__(self, im): + """ + Applies center crop to the input image and resizes it to a specified size, maintaining aspect ratio. + + im = np.array HWC + """ imh, imw = im.shape[:2] m = min(imh, imw) # min dimension top, left = (imh - m) // 2, (imw - m) // 2 @@ -514,7 +529,13 @@ def __init__(self, half=False): super().__init__() self.half = half - def __call__(self, im): # im = np.array HWC in BGR order + def __call__(self, im): + """ + Converts BGR np.array image from HWC to RGB CHW format, and normalizes to [0, 1], with support for FP16 if + `half=True`. + + im = np.array HWC in BGR order + """ im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous im = torch.from_numpy(im) # to torch im = im.half() if self.half else im.float() # uint8 to fp16/32 diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 3991563e7910..6dba2c8d897a 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -1069,8 +1069,13 @@ def flatten_recursive(path=DATASETS_DIR / "coco128"): shutil.copyfile(file, new_path / Path(file).name) -def extract_boxes(path=DATASETS_DIR / "coco128"): # from utils.dataloaders import *; extract_boxes() - # Convert detection dataset into classification dataset, with one directory per class +def extract_boxes(path=DATASETS_DIR / "coco128"): + """ + Converts a detection dataset to a classification dataset, creating a directory for each class and extracting + bounding boxes. + + Example: from utils.dataloaders import *; extract_boxes() + """ path = Path(path) # images dir shutil.rmtree(path / "classification") if (path / "classification").is_dir() else None # remove existing files = list(path.rglob("*.*")) @@ -1089,7 +1094,7 @@ def extract_boxes(path=DATASETS_DIR / "coco128"): # from utils.dataloaders impo for j, x in enumerate(lb): c = int(x[0]) # class - f = (path / "classifier") / f"{c}" / f"{path.stem}_{im_file.stem}_{j}.jpg" # new filename + f = (path / "classification") / f"{c}" / f"{path.stem}_{im_file.stem}_{j}.jpg" # new filename if not f.parent.is_dir(): f.parent.mkdir(parents=True) @@ -1262,7 +1267,7 @@ def get_json(self, save=False, verbose=False): """Generates dataset JSON for Ultralytics HUB, optionally saves or prints it; save=bool, verbose=bool.""" def _round(labels): - # Update labels to integer class and 6 decimal place floats + """Rounds class labels to integers and coordinates to 4 decimal places for improved label accuracy.""" return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels] for split in "train", "val", "test": diff --git a/utils/general.py b/utils/general.py index 661475354adc..5a9325eec757 100644 --- a/utils/general.py +++ b/utils/general.py @@ -351,8 +351,12 @@ def run_once(): return run_once() or run_once() # check twice to increase robustness to intermittent connectivity issues -def git_describe(path=ROOT): # path must be a directory - # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe +def git_describe(path=ROOT): + """ + Returns a human-readable git description of the repository at `path`, or an empty string on failure. + + Example output is 'fv5.0-5-g3e25f1e'. See https://git-scm.com/docs/git-describe. + """ try: assert (Path(path) / ".git").is_dir() return check_output(f"git -C {path} describe --tags --long --always", shell=True).decode()[:-1] @@ -767,8 +771,12 @@ def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): return (class_weights.reshape(1, nc) * class_counts).sum(1) -def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) - # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ +def coco80_to_coco91_class(): + """ + Converts COCO 80-class index to COCO 91-class index used in the paper. + + Reference: https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ + """ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco @@ -1108,8 +1116,13 @@ def non_max_suppression( return output -def strip_optimizer(f="best.pt", s=""): # from utils.general import *; strip_optimizer() - # Strip optimizer from 'f' to finalize training, optionally save as 's' +def strip_optimizer(f="best.pt", s=""): + """ + Strips optimizer and optionally saves checkpoint to finalize training; arguments are file path 'f' and save path + 's'. + + Example: from utils.general import *; strip_optimizer() + """ x = torch.load(f, map_location=torch.device("cpu")) if x.get("ema"): x["model"] = x["ema"] # replace model with ema diff --git a/utils/google_app_engine/additional_requirements.txt b/utils/google_app_engine/additional_requirements.txt index c1a2af2c1145..821c3caf3cbf 100644 --- a/utils/google_app_engine/additional_requirements.txt +++ b/utils/google_app_engine/additional_requirements.txt @@ -1,5 +1,5 @@ # add these requirements in your app on top of the existing ones pip==23.3 Flask==2.3.2 -gunicorn==19.10.0 +gunicorn==22.0.0 werkzeug>=3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 2a2c5d734c2e..011ec7c8915b 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -382,7 +382,7 @@ def __init__(self, opt, console_logger, include=("tb", "wandb", "clearml")): prefix = colorstr("ClearML: ") LOGGER.warning( f"{prefix}WARNING ⚠️ ClearML is installed but not configured, skipping ClearML logging." - f" See https://github.com/ultralytics/yolov5/tree/master/utils/loggers/clearml#readme" + f" See https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration" ) else: self.clearml = None diff --git a/utils/loss.py b/utils/loss.py index 8a910e12ad6f..9d09f9df0261 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -8,8 +8,8 @@ from utils.torch_utils import de_parallel -def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 - # return positive, negative label smoothing BCE targets +def smooth_BCE(eps=0.1): + """Returns label smoothing BCE targets for reducing overfitting; pos: `1.0 - 0.5*eps`, neg: `0.5*eps`. For details see https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441""" return 1.0 - 0.5 * eps, 0.5 * eps @@ -132,6 +132,7 @@ def __init__(self, model, autobalance=False): self.device = device def __call__(self, p, targets): # predictions, targets + """Performs forward pass, calculating class, box, and object loss for given predictions and targets.""" lcls = torch.zeros(1, device=self.device) # class loss lbox = torch.zeros(1, device=self.device) # box loss lobj = torch.zeros(1, device=self.device) # object loss diff --git a/utils/plots.py b/utils/plots.py index e1b073dfb1ad..cb5edabc6c41 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -67,7 +67,8 @@ def __call__(self, i, bgr=False): return (c[2], c[1], c[0]) if bgr else c @staticmethod - def hex2rgb(h): # rgb order (PIL) + def hex2rgb(h): + """Converts hexadecimal color `h` to an RGB tuple (PIL-compatible) with order (R, G, B).""" return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)) @@ -225,8 +226,13 @@ def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""): plt.close() -def plot_val_txt(): # from utils.plots import *; plot_val() - # Plot val.txt histograms +def plot_val_txt(): + """ + Plots 2D and 1D histograms of bounding box centers from 'val.txt' using matplotlib, saving as 'hist2d.png' and + 'hist1d.png'. + + Example: from utils.plots import *; plot_val() + """ x = np.loadtxt("val.txt", dtype=np.float32) box = xyxy2xywh(x[:, :4]) cx, cy = box[:, 0], box[:, 1] @@ -242,8 +248,12 @@ def plot_val_txt(): # from utils.plots import *; plot_val() plt.savefig("hist1d.png", dpi=200) -def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() - # Plot targets.txt histograms +def plot_targets_txt(): + """ + Plots histograms of object detection targets from 'targets.txt', saving the figure as 'targets.jpg'. + + Example: from utils.plots import *; plot_targets_txt() + """ x = np.loadtxt("targets.txt", dtype=np.float32).T s = ["x targets", "y targets", "width targets", "height targets"] fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) @@ -255,8 +265,13 @@ def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() plt.savefig("targets.jpg", dpi=200) -def plot_val_study(file="", dir="", x=None): # from utils.plots import *; plot_val_study() - # Plot file=study.txt generated by val.py (or plot all study*.txt in dir) +def plot_val_study(file="", dir="", x=None): + """ + Plots validation study results from 'study*.txt' files in a directory or a specific file, comparing model + performance and speed. + + Example: from utils.plots import *; plot_val_study() + """ save_dir = Path(file).parent if file else Path(dir) plot2 = False # plot additional results if plot2: @@ -381,8 +396,12 @@ def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f return f -def plot_evolve(evolve_csv="path/to/evolve.csv"): # from utils.plots import *; plot_evolve() - # Plot evolve.csv hyp evolution results +def plot_evolve(evolve_csv="path/to/evolve.csv"): + """ + Plots hyperparameter evolution results from a given CSV, saving the plot and displaying best results. + + Example: from utils.plots import *; plot_evolve() + """ evolve_csv = Path(evolve_csv) data = pd.read_csv(evolve_csv) keys = [x.strip() for x in data.columns] diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 29f1bcbb7e77..fa0c10939b70 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -44,6 +44,7 @@ def __init__(self, model, autobalance=False, overlap=False): self.device = device def __call__(self, preds, targets, masks): # predictions, targets, model + """Evaluates YOLOv5 model's loss for given predictions, targets, and masks; returns total loss components.""" p, proto = preds bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width lcls = torch.zeros(1, device=self.device) diff --git a/utils/torch_utils.py b/utils/torch_utils.py index c2c760efa404..4929d21cdf83 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -325,7 +325,9 @@ def model_info(model, verbose=False, imgsz=640): def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) - # Scales img(bs,3,y,x) by ratio constrained to gs-multiple + """Scales an image tensor `img` of shape (bs,3,y,x) by `ratio`, optionally maintaining the original shape, padded to + multiples of `gs`. + """ if ratio == 1.0: return img h, w = img.shape[2:]