diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 000000000000..7c5100942aae --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,3 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ diff --git a/.gitattributes b/.gitattributes index 82d52fc38c9e..9841775c5e3b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -11,3 +11,6 @@ *.png binary *.jpg binary *.tiff binary + +# Freeze version information in git archives +.git_archival.txt export-subst diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index fb2b77da87a9..457527bcc98c 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -6,227 +6,254 @@ name: Build PyPI package on: push: - branches: [ main ] + branches: [ main, build/pip-packaging ] release: types: [ created ] +env: + # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 + LLVM_VERSION: 19.1.0 + FLATBUFFERS_VERSION: 23.5.26 + WABT_VERSION: 1.0.36 + concurrency: group: '${{ github.workflow }}-${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' cancel-in-progress: true -env: - LLVM_VER: 15.0.7 - permissions: contents: read # to fetch code (actions/checkout) packages: read # to fetch packages (docker) jobs: - # When creating 'dev' (e.g. nightly) PyPI packages, we need to create a unique - # label for each upload. For simplicity, we choose the Unix time-since-epoch in - # UTC form (aka `date +%s`). - pip-labels: - name: Create Label for PyPI Packages - runs-on: ubuntu-latest - outputs: - halide_pypi_label: ${{ steps.make_label.outputs.unix_time_utc }} - steps: - - id: make_label - run: echo "unix_time_utc=$(date +%s)" >> "$GITHUB_OUTPUT" - - pip-linux: - name: Package Halide Python bindings - - runs-on: ubuntu-latest - needs: pip-labels + build-wheels: + name: Build Halide wheels for ${{ matrix.platform_tag }} + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - arch: [ x86_64, aarch64 ] + include: + - os: ubuntu-latest + platform_tag: manylinux_x86_64 + - os: windows-latest + platform_tag: win_amd64 + - os: macos-13 + platform_tag: macosx_x86_64 + - os: macos-14 + platform_tag: macosx_arm64 + + env: + MACOSX_DEPLOYMENT_TARGET: 11 steps: - - uses: actions/checkout@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v2.1.0 + - name: Login to GitHub Container Registry + if: matrix.os == 'ubuntu-latest' + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Set up QEMU - uses: docker/setup-qemu-action@v2.1.0 + - uses: actions/checkout@v4 with: - platforms: all + fetch-depth: 0 + fetch-tags: true - - name: Build wheels - uses: pypa/cibuildwheel@v2.11.2 - env: - CIBW_ARCHS_LINUX: "${{ matrix.arch }}" - CIBW_BUILD: "cp38-manylinux* cp39-manylinux* cp310-manylinux*" - CIBW_CONFIG_SETTINGS: "--global-option=egg_info --global-option=-b.dev${{ needs.pip-labels.outputs.halide_pypi_label }} --global-option=--build-number --global-option=${{github.run_id}}" - CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/halide/manylinux2014_x86_64-llvm:${{ env.LLVM_VER }} - # CIBW_MANYLINUX_I686_IMAGE: ghcr.io/halide/manylinux2014_i686-llvm:${{ env.LLVM_VER }} - CIBW_MANYLINUX_AARCH64_IMAGE: ghcr.io/halide/manylinux2014_aarch64-llvm:${{ env.LLVM_VER }} - CIBW_BEFORE_ALL_LINUX: > - cmake -G Ninja -S . -B build - -DCMAKE_BUILD_TYPE=Release -DWITH_DOCS=NO -DWITH_PYTHON_BINDINGS=NO -DWITH_TESTS=NO - -DWITH_TUTORIALS=NO -DWITH_UTILS=NO -DWITH_PYTHON_STUBS=NO && - cmake --build build --target install - - - uses: actions/upload-artifact@v3 + # See: https://github.com/pypa/setuptools-scm/issues/455 + - name: Suppress git version tag + if: github.event_name == 'push' && github.ref_name == 'main' + run: | + echo 'local_scheme = "no-local-version"' >> pyproject.toml + git update-index --assume-unchanged pyproject.toml + + - uses: ilammy/msvc-dev-cmd@v1 + - uses: lukka/get-cmake@v3.28.4 + + - uses: actions/setup-python@v5 with: - name: wheels - path: ./wheelhouse/*.whl + python-version: 3.8 + if: runner.os == 'macOS' && runner.arch == 'ARM64' - pip-other: - name: Package Halide Python bindings + ######################################################################## + # flatbuffers + ######################################################################## - runs-on: ${{ matrix.runner }} - needs: pip-labels + - name: Cache flatbuffers build folder + if: matrix.os != 'ubuntu-latest' + id: cache-flatbuffers + uses: actions/cache@v4 + with: + path: opt/flatbuffers + key: flatbuffers-${{ env.FLATBUFFERS_VERSION }}-${{ matrix.platform_tag }} - strategy: - fail-fast: false - matrix: - include: - - runner: windows-latest - pytag: win_amd64 - arch: x64 + - uses: actions/checkout@v4 + if: matrix.os != 'ubuntu-latest' && steps.cache-flatbuffers.outputs.cache-hit != 'true' + with: + path: flatbuffers-src + repository: google/flatbuffers + ref: v${{ env.FLATBUFFERS_VERSION }} - - runner: macos-latest - pytag: macosx_universal2 - arch: x86_64;arm64 + - name: Configure flatbuffers + if: matrix.os != 'ubuntu-latest' && steps.cache-flatbuffers.outputs.cache-hit != 'true' + run: > + cmake -G Ninja -S flatbuffers-src -B flatbuffers-build + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/opt/flatbuffers" + "-DFLATBUFFERS_BUILD_TESTS=NO" + + - name: Install flatbuffers + if: matrix.os != 'ubuntu-latest' && steps.cache-flatbuffers.outputs.cache-hit != 'true' + run: | + cmake --build flatbuffers-build --target install + cmake -E rm -rf flatbuffers-src flatbuffers-build + + ######################################################################## + # wabt + ######################################################################## + + - name: Cache wabt build folder + if: matrix.os != 'ubuntu-latest' && matrix.os != 'windows-latest' + id: cache-wabt + uses: actions/cache@v4 + with: + path: opt/wabt + key: wabt-${{ env.WABT_VERSION }}-${{ matrix.platform_tag }} - steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + if: > + matrix.os != 'ubuntu-latest' && matrix.os != 'windows-latest' + && steps.cache-wabt.outputs.cache-hit != 'true' + with: + submodules: 'true' + path: wabt-src + repository: WebAssembly/wabt + ref: ${{ env.WABT_VERSION }} + + - name: Configure wabt + if: > + matrix.os != 'ubuntu-latest' && matrix.os != 'windows-latest' + && steps.cache-wabt.outputs.cache-hit != 'true' + run: > + cmake -G Ninja -S wabt-src -B wabt-build + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/opt/wabt" + "-DWITH_EXCEPTIONS=ON" + "-DBUILD_TESTS=OFF" + "-DBUILD_TOOLS=OFF" + "-DBUILD_LIBWASM=OFF" + "-DUSE_INTERNAL_SHA256=ON" + + - name: Install wabt + if: > + matrix.os != 'ubuntu-latest' && matrix.os != 'windows-latest' + && steps.cache-wabt.outputs.cache-hit != 'true' + run: | + cmake --build wabt-build --target install + cmake -E rm -rf wabt-src wabt-build + + ######################################################################## + # LLVM + ######################################################################## - name: Cache LLVM build folder + if: matrix.os != 'ubuntu-latest' id: cache-llvm - uses: actions/cache@v3.0.11 + uses: actions/cache@v4 with: - path: local-llvm - key: llvmorg-${{ env.LLVM_VER }}-${{ runner.os }} - - - uses: ilammy/msvc-dev-cmd@v1 - - uses: lukka/get-cmake@latest + path: opt/llvm + key: llvm-${{ env.LLVM_VERSION }}-${{ matrix.platform_tag }} - - uses: actions/checkout@v3 - if: steps.cache-llvm.outputs.cache-hit != 'true' + - uses: actions/checkout@v4 + if: matrix.os != 'ubuntu-latest' && steps.cache-llvm.outputs.cache-hit != 'true' with: path: llvm-src repository: llvm/llvm-project - ref: llvmorg-${{ env.LLVM_VER }} + ref: llvmorg-${{ env.LLVM_VERSION }} - name: Configure LLVM - if: steps.cache-llvm.outputs.cache-hit != 'true' + if: matrix.os != 'ubuntu-latest' && steps.cache-llvm.outputs.cache-hit != 'true' run: > cmake -G Ninja -S llvm-src/llvm -B llvm-build - -DCMAKE_BUILD_TYPE=Release - "-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64" - "-DLLVM_TARGETS_TO_BUILD=X86;ARM;NVPTX;AArch64;Hexagon;WebAssembly" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/opt/llvm" + "-DLLVM_ENABLE_ASSERTIONS=ON" + "-DLLVM_ENABLE_BINDINGS=OFF" + "-DLLVM_ENABLE_CURL=OFF" + "-DLLVM_ENABLE_DIA_SDK=OFF" + "-DLLVM_ENABLE_EH=ON" + "-DLLVM_ENABLE_HTTPLIB=OFF" + "-DLLVM_ENABLE_IDE=OFF" + "-DLLVM_ENABLE_LIBEDIT=OFF" + "-DLLVM_ENABLE_LIBXML2=OFF" + "-DLLVM_ENABLE_OCAMLDOC=OFF" "-DLLVM_ENABLE_PROJECTS=clang;lld" - -DLLVM_ENABLE_ASSERTIONS=ON - -DLLVM_ENABLE_RTTI=ON - -DLLVM_ENABLE_EH=ON - -DLLVM_ENABLE_LIBXML2=OFF - -DLLVM_ENABLE_TERMINFO=OFF - -DLLVM_ENABLE_ZSTD=OFF - -DLLVM_ENABLE_ZLIB=OFF - -DLLVM_ENABLE_OCAMLDOC=OFF - -DLLVM_ENABLE_BINDINGS=OFF - -DLLVM_ENABLE_IDE=OFF - - - name: Build LLVM - if: steps.cache-llvm.outputs.cache-hit != 'true' - run: cmake --build llvm-build + "-DLLVM_ENABLE_RTTI=ON" + "-DLLVM_ENABLE_RUNTIMES=compiler-rt" + "-DLLVM_ENABLE_TERMINFO=OFF" + "-DLLVM_ENABLE_WARNINGS=OFF" + "-DLLVM_ENABLE_ZLIB=OFF" + "-DLLVM_ENABLE_ZSTD=OFF" + "-DLLVM_INCLUDE_BENCHMARKS=OFF" + "-DLLVM_INCLUDE_EXAMPLES=OFF" + "-DLLVM_INCLUDE_TESTS=OFF" + "-DLLVM_TARGETS_TO_BUILD=WebAssembly;X86;AArch64;ARM;Hexagon;NVPTX;PowerPC;RISCV" - name: Install LLVM - if: steps.cache-llvm.outputs.cache-hit != 'true' - run: cmake --install llvm-build --prefix local-llvm - - # Remove the LLVM source tree after building it, otherwise we can - # run out of local space while building halide - - name: Clean LLVM Source - if: steps.cache-llvm.outputs.cache-hit != 'true' - shell: bash - run: rm -rf llvm-src - - - name: Configure Halide - if: runner.os == 'Windows' - run: > - cmake -G "Visual Studio 17 2022" -T ClangCL -A "${{ matrix.arch }}" -S . -B halide-build - -DWITH_DOCS=NO - -DWITH_PYTHON_BINDINGS=NO - -DWITH_TESTS=NO - -DWITH_TUTORIALS=NO - -DWITH_UTILS=NO - -DWITH_PYTHON_STUBS=NO - -DLLVM_DIR=${{ github.workspace }}/local-llvm/lib/cmake/llvm - - - name: Configure Halide - if: runner.os != 'Windows' - run: > - cmake -G Ninja -S . -B halide-build - -DCMAKE_BUILD_TYPE=Release - "-DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }}" - -DWITH_DOCS=NO - -DWITH_PYTHON_BINDINGS=NO - -DWITH_TESTS=NO - -DWITH_TUTORIALS=NO - -DWITH_UTILS=NO - -DWITH_PYTHON_STUBS=NO - -DLLVM_DIR=${{ github.workspace }}/local-llvm/lib/cmake/llvm - - - name: Build Halide - run: cmake --build halide-build --config Release - - - name: Install Halide - run: cmake --install halide-build --config Release --prefix local-halide + if: matrix.os != 'ubuntu-latest' && steps.cache-llvm.outputs.cache-hit != 'true' + run: | + cmake --build llvm-build --target install + cmake -E rm -rf llvm-src llvm-build + + ######################################################################## + # Wheels + ######################################################################## + + #- uses: mxschmitt/action-tmate@v3 - name: Build wheels - uses: pypa/cibuildwheel@v2.10.2 + uses: pypa/cibuildwheel@v2.20.0 env: - CMAKE_PREFIX_PATH: ${{ github.workspace }}/local-halide - CIBW_BUILD: "cp38-${{ matrix.pytag }} cp39-${{ matrix.pytag }} cp310-${{ matrix.pytag }}" - CIBW_CONFIG_SETTINGS: "--global-option=egg_info --global-option=-b.dev${{ needs.pip-labels.outputs.halide_pypi_label }} --global-option=--build-number --global-option=${{github.run_id}}" - CIBW_ARCHS_MACOS: "universal2" - - - uses: actions/upload-artifact@v3 + CIBW_BUILD: "cp3*-${{ matrix.platform_tag }}" + CIBW_SKIP: "cp3{5,6,7}*" + CIBW_ENVIRONMENT_MACOS: > + CMAKE_PREFIX_PATH='${{ github.workspace }}/opt' + Python_ROOT_DIR='' + CIBW_ENVIRONMENT_WINDOWS: > + CMAKE_GENERATOR=Ninja + CMAKE_PREFIX_PATH='${{ github.workspace }}\opt' + CIBW_MANYLINUX_X86_64_IMAGE: "ghcr.io/halide/manylinux_2_28_x86_64-llvm:${{ env.LLVM_VERSION }}" + CIBW_TEST_COMMAND: > + cmake -G Ninja -S {project}/python_bindings/apps -B build -DCMAKE_BUILD_TYPE=Release && + cmake --build build && + ctest --test-dir build --output-on-failure + # Windows puts the Python interpreter in /Scripts, rather than /bin, which CMake doesn't understand. + CIBW_TEST_COMMAND_WINDOWS: > + cmake -G Ninja -S {project}/python_bindings/apps -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=%VIRTUAL_ENV% && + cmake --build build && + ctest --test-dir build --output-on-failure + + - uses: actions/upload-artifact@v4 with: - name: wheels + name: wheels-${{ matrix.platform_tag }} path: ./wheelhouse/*.whl - pip-sdist: - name: Make SDist - runs-on: ubuntu-latest - needs: pip-labels - steps: - - uses: actions/checkout@v3 - - run: pipx run build --sdist -C--global-option=egg_info -C--global-option=-b.dev${{ needs.pip-labels.outputs.halide_pypi_label }} - - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist/*.tar.gz - publish: name: Publish on PyPI - needs: [ pip-linux, pip-other, pip-sdist ] + needs: build-wheels runs-on: ubuntu-latest + permissions: + id-token: write steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: - name: wheels + pattern: wheels-* + merge-multiple: true path: dist - - uses: pypa/gh-action-pypi-publish@v1.5.1 + - uses: pypa/gh-action-pypi-publish@release/v1 + if: github.event_name == 'push' && github.ref_name == 'main' with: - user: __token__ - password: ${{ secrets.TEST_PYPI_TOKEN }} repository_url: https://test.pypi.org/legacy/ - - uses: pypa/gh-action-pypi-publish@v1.5.1 + - uses: pypa/gh-action-pypi-publish@release/v1 if: github.event_name == 'release' && github.event.action == 'published' - with: - user: __token__ - password: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index e30a606bd8d0..1b6b549214f4 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -1,13 +1,14 @@ name: Halide Presubmit Checks on: - # We don't want 'edited' (that's basically just the description, title, etc) + # We don't want 'edited' (that's basically just the description, title, etc.) # We don't want 'review_requested' (that's redundant to the ones below for our purposes) pull_request: - types: [opened, synchronize, reopened] + types: [ opened, synchronize, reopened ] paths: - '**.h' - '**.c' - '**.cpp' + - 'run-clang-tidy.sh' permissions: contents: read @@ -23,29 +24,17 @@ jobs: source: '.' extensions: 'h,c,cpp' clangFormatVersion: 17 - # As of Aug 2023, the macOS runners have more RAM (14GB vs 7GB) and CPU (3 cores vs 2) - # than the Linux and Windows runners, so let's use those instead, since clang-tidy is - # a bit of a sluggard check_clang_tidy: name: Check clang-tidy - runs-on: ubuntu-20.04 + runs-on: macos-14 steps: - uses: actions/checkout@v3 - name: Install clang-tidy - run: | - # from apt.llvm.org - # wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 15CF4D18AF4F7421 - sudo apt-add-repository "deb https://apt.llvm.org/$(lsb_release -sc)/ llvm-toolchain-$(lsb_release -sc)-17 main" - sudo apt-get update - sudo apt-get install llvm-17 clang-17 liblld-17-dev libclang-17-dev clang-tidy-17 ninja-build + run: brew install llvm@17 ninja - name: Run clang-tidy - run: | - export CC=clang-17 - export CXX=clang++-17 - export CLANG_TIDY_LLVM_INSTALL_DIR=/usr/lib/llvm-17 - export CMAKE_GENERATOR=Ninja - ./run-clang-tidy.sh + run: ./run-clang-tidy.sh + env: + CLANG_TIDY_LLVM_INSTALL_DIR: /opt/homebrew/opt/llvm@17 check_cmake_file_lists: name: Check CMake file lists runs-on: ubuntu-20.04 diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 2fafbbfc0b1d..000000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,6 +0,0 @@ -graft python_bindings -prune python_bindings/apps -prune python_bindings/test -prune python_bindings/tutorial -prune python_bindings/stub -include README_python.md diff --git a/Makefile b/Makefile index c12f2c8ad947..e433db1ee900 100644 --- a/Makefile +++ b/Makefile @@ -2560,15 +2560,17 @@ CLASS_DIAGRAMS = NO DISTRIBUTE_GROUP_DOC = YES EXAMPLE_PATH = "${Halide_SOURCE_DIR}/tutorial" EXCLUDE = bin +EXCLUDE_PATTERNS = README.md EXTRACT_ALL = YES EXTRACT_LOCAL_CLASSES = NO -FILE_PATTERNS = *.h +FILE_PATTERNS = *.h *.md GENERATE_TREEVIEW = YES HIDE_FRIEND_COMPOUNDS = YES HIDE_IN_BODY_DOCS = YES HIDE_UNDOC_CLASSES = YES HIDE_UNDOC_MEMBERS = YES JAVADOC_AUTOBRIEF = YES +MARKDOWN_ID_STYLE = GITHUB QT_AUTOBRIEF = YES QUIET = YES RECURSIVE = YES @@ -2583,7 +2585,7 @@ STRIP_CODE_COMMENTS = NO GENERATE_LATEX = NO HAVE_DOT = NO HTML_OUTPUT = . -INPUT = "${Halide_SOURCE_DIR}/src" "${Halide_SOURCE_DIR}/test" +INPUT = "${Halide_SOURCE_DIR}/doc" "${Halide_SOURCE_DIR}/src" "${Halide_SOURCE_DIR}/test" OUTPUT_DIRECTORY = ${DOC_DIR} PROJECT_NAME = Halide endef diff --git a/README.md b/README.md index 839785441292..dfbf68324742 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,7 @@ currently targets: - CPU architectures: X86, ARM, Hexagon, PowerPC, RISC-V - Operating systems: Linux, Windows, macOS, Android, iOS, Qualcomm QuRT -- GPU Compute APIs: CUDA, OpenCL, Apple Metal, Microsoft - Direct X 12, Vulkan +- GPU Compute APIs: CUDA, OpenCL, Apple Metal, Microsoft Direct X 12, Vulkan Rather than being a standalone programming language, Halide is embedded in C++. This means you write C++ code that builds an in-memory representation of a @@ -18,26 +17,80 @@ in Python without C++. Halide requires C++17 (or later) to use. -For more detail about what Halide is, see http://halide-lang.org. +For more detail about what Halide is, see https://halide-lang.org. -For API documentation see http://halide-lang.org/docs +For API documentation see https://halide-lang.org/docs. -To see some example code, look in the tutorials directory. +For some example code, read through the tutorials online +at https://halide-lang.org/tutorials. The corresponding code is in the +`tutorials/` directory. Larger examples are in the `apps/` directory. If you've acquired a full source distribution and want to build Halide, see the -[notes below](#building-halide-with-cmake). +[notes below](#building-halide). # Getting Halide +## Pip + +As of Halide 19.0.0, we provide binary wheels on PyPI. Halide provides bindings +for C++ and Python. Even if you only intend to use Halide from C++, pip may be +the easiest way to get a binary build of Halide. + +Full releases may be installed with `pip` like so: + +```shell +$ pip install halide +``` + +Every commit to `main` is published to Test PyPI as a development version and +these may be installed with a few extra flags: + +```shell +$ pip install halide --pre --extra-index-url https://test.pypi.org/simple +``` + +Currently, we provide wheels for: Windows x86-64, macOS x86-64, macOS arm64, and +Linux x86-64. The Linux wheels are built for manylinux_2_28, which makes them +broadly compatible (Debian 10, Ubuntu 18.10, Fedora 29). + +*For C++ usage of the pip package:* On Linux and macOS, CMake's `find_package` +command should find Halide as long as you're in the same virtual environment you +installed it in. On Windows, you will need to add the virtual environment root +directory to `CMAKE_PREFIX_PATH`. This can be done by running +`set CMAKE_PREFIX_PATH=%VIRTUAL_ENV%` in `cmd`. + +Other build systems can find the Halide root path by running `python -c +"import halide; print(halide.install_dir())"`. + +## Homebrew + +Alternatively, if you use macOS, you can install Halide via +[Homebrew](https://brew.sh/) like so: + +``` +$ brew install halide +``` + ## Binary tarballs The latest version of Halide can always be found on GitHub at https://github.com/halide/Halide/releases -We provide binary releases for many popular platforms and architectures, -including 32/64-bit x86 Windows, 64-bit macOS, and 32/64-bit x86/ARM +We provide binary releases for many popular platforms and architectures, +including 32/64-bit x86 Windows, 64-bit x86/ARM macOS, and 32/64-bit x86/ARM Ubuntu Linux. +The macOS releases are built using XCode's command-line tools with Apple Clang +500.2.76. This means that we link against libc++ instead of libstdc++. You may +need to adjust compiler options accordingly if you're using an older XCode which +does not default to libc++. + +We use a recent Ubuntu LTS to build the Linux releases; if your distribution is +too old, it might not have the requisite glibc. + +Nightly builds of Halide and the LLVM versions we use in CI are also available +at https://buildbot.halide-lang.org/ + ## Vcpkg If you use [vcpkg](https://github.com/microsoft/vcpkg) to manage dependencies, @@ -52,27 +105,16 @@ code for the active platform. If you want to include all the backends, you should install `halide[target-all]:x64-windows` instead. Note that since this will build LLVM, it will take a _lot_ of disk space (up to 100GB). -## Homebrew - -Alternatively, if you use macOS, you can install Halide via -[Homebrew](https://brew.sh/) like so: - -``` -$ brew install halide -``` - ## Other package managers -We are interested in bringing Halide to other popular package managers and -Linux distribution repositories including, but not limited to, Conan, -Debian, [Ubuntu (or PPA)](https://github.com/halide/Halide/issues/5285), -CentOS/Fedora, and Arch. If you have experience publishing packages we would be -happy to work with you! +We are interested in bringing Halide to other popular package managers and Linux +distribution repositories! We track the status of various distributions of +Halide [in this GitHub issue](https://github.com/halide/Halide/issues/4660). If +you have experience publishing packages we would be happy to work with you! -If you are a maintainer of any other package distribution platform, we would be -excited to work with you, too. +# Building Halide -# Platform Support +## Platform Support There are two sets of platform requirements relevant to Halide: those required to run the compiler library in either JIT or AOT mode, and those required to run @@ -81,13 +123,13 @@ the _binary outputs_ of the AOT compiler. These are the **tested** host toolchain and platform combinations for building and running the Halide compiler library. -| Compiler | Version | OS | Architectures | -|------------|--------------|------------------------|-----------------| -| GCC | 9.4 | Ubuntu Linux 20.04 LTS | x86, x64, ARM32 | -| GCC | 9.4 | Ubuntu Linux 18.04 LTS | ARM32, ARM64 | -| MSVC | 2019 (19.28) | Windows 10 (20H2) | x86, x64 | -| AppleClang | 14.0.3 | macOS 13.4 | x86_64 | -| AppleClang | 14.0.3 | macOS 13.4 | ARM64 | +| Compiler | Version | OS | Architectures | +|------------|--------------|------------------------|---------------| +| GCC | 9.5 | Ubuntu Linux 20.04 LTS | x86, x64 | +| GCC | 11.4 | Ubuntu Linux 22.04 LTS | ARM32, ARM64 | +| MSVC | 2022 (19.37) | Windows 11 (22631) | x86, x64 | +| AppleClang | 15.0.0 | macOS 14.4.1 | x64 | +| AppleClang | 14.0.0 | macOS 14.6 | ARM64 | Some users have successfully built Halide for Linux using Clang 9.0.0+, for Windows using ClangCL 11.0.0+, and for Windows ARM64 by cross-compiling with @@ -105,106 +147,93 @@ the generated headers correctly. The C++ bindings currently require C++17. If you discover a compatibility problem with a generated pipeline, please open an issue. -# Building Halide with Make - -### TL;DR - -Have llvm-16.0 (or greater) installed and run `make` in the root directory of -the repository (where this README is). - -### Acquiring LLVM +## Acquiring LLVM At any point in time, building Halide requires either the latest stable version -of LLVM, the previous stable version of LLVM, and trunk. At the time of writing, -this means versions 18, 17, and 16 are supported, but 15 is not. The commands -`llvm-config` and `clang` must be somewhere in the path. +of LLVM, the previous stable version of LLVM, or trunk. At the time of writing, +this means versions 19, 18, and 17 are supported, but 16 is not. -If your OS does not have packages for LLVM, you can find binaries for it at -http://llvm.org/releases/download.html. Download an appropriate package and then -either install it, or at least put the `bin` subdirectory in your path. (This -works well on OS X and Ubuntu.) +It is simplest to get a binary release of LLVM on macOS by using +[Homebrew](https://brew.sh). Just run `brew install llvm`. On Debian flavors of +Linux, the [LLVM APT repo](https://apt.llvm.org) is best; use the provided +installation script. We know of no suitable official binary releases for +Windows, however the ones we use in CI can usually be found at +https://buildbot.halide-lang.org, along with tarballs for our other tested +platforms. See [the section on Windows](#windows) below for further advice. -If you want to build it yourself, first check it out from GitHub: +If your OS does not have packages for LLVM, or you want more control over the +configuration, you can build it yourself. First check it out from GitHub: -``` -% git clone --depth 1 --branch llvmorg-16.0.6 https://github.com/llvm/llvm-project.git +```shell +$ git clone --depth 1 --branch llvmorg-18.1.8 https://github.com/llvm/llvm-project.git ``` -(If you want to build LLVM 17.x, use branch `release/17.x`; for current trunk, -use `main`) +(LLVM 18.1.8 is the most recent released LLVM at the time of writing. For +current trunk, use `main` instead) Then build it like so: -``` -% cmake -DCMAKE_BUILD_TYPE=Release \ +```shell +$ cmake -G Ninja -S llvm-project/llvm -B build \ + -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_PROJECTS="clang;lld;clang-tools-extra" \ - -DLLVM_TARGETS_TO_BUILD="X86;ARM;NVPTX;AArch64;Hexagon;WebAssembly;RISCV" \ - -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_ENABLE_ASSERTIONS=ON \ - -DLLVM_ENABLE_EH=ON -DLLVM_ENABLE_RTTI=ON -DLLVM_BUILD_32_BITS=OFF \ - -DLLVM_ENABLE_RUNTIMES="compiler-rt" \ - -S llvm-project/llvm -B llvm-build -% cmake --build llvm-build -% cmake --install llvm-build --prefix llvm-install -``` - -Running a serial build will be slow. To improve speed, try running a parallel -build. That's done by default in Ninja; for make, use the option -j NNN, -where NNN is the number of parallel jobs, e.g. the number of CPUs you have. -Then, point Halide to it: - -``` -% export LLVM_ROOT=$PWD/llvm-install -% export LLVM_CONFIG=$LLVM_ROOT/bin/llvm-config -``` - -Note that you _must_ add `clang` to `LLVM_ENABLE_PROJECTS`; adding `lld` to -`LLVM_ENABLE_PROJECTS` is only required when using WebAssembly, -`LLVM_ENABLE_RUNTIMES="compiler-rt"` is only required if building the fuzz -tests, and adding `clang-tools-extra` is only necessary if you plan to -contribute code to Halide (so that you can run `clang-tidy` on your pull -requests). We recommend enabling both in all cases to simplify builds. You can -disable exception handling (EH) and RTTI if you don't want the Python bindings. - -### Building Halide with make - -With `LLVM_CONFIG` set (or `llvm-config` in your path), you should be able to -just run `make` in the root directory of the Halide source tree. -`make run_tests` will run the JIT test suite, and `make test_apps` will make -sure all the apps compile and run (but won't check their output). - -There is no `make install`. If you want to make an install package, use CMake. - -### Building Halide out-of-tree with make - -If you wish to build Halide in a separate directory, you can do that like so: - - % cd .. - % mkdir halide_build - % cd halide_build - % make -f ../Halide/Makefile - -# Building Halide with CMake + -DLLVM_ENABLE_RUNTIMES=compiler-rt \ + -DLLVM_TARGETS_TO_BUILD="WebAssembly;X86;AArch64;ARM;Hexagon;NVPTX;PowerPC;RISCV" \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DLLVM_ENABLE_EH=ON \ + -DLLVM_ENABLE_RTTI=ON \ + -DLLVM_ENABLE_HTTPLIB=OFF \ + -DLLVM_ENABLE_LIBEDIT=OFF \ + -DLLVM_ENABLE_LIBXML2=OFF \ + -DLLVM_ENABLE_TERMINFO=OFF \ + -DLLVM_ENABLE_ZLIB=OFF \ + -DLLVM_ENABLE_ZSTD=OFF \ + -DLLVM_BUILD_32_BITS=OFF +$ cmake --build build +$ cmake --install build --prefix llvm-install +``` + +This will produce a working LLVM installation in `$PWD/llvm-install`. We refer +to this path as `LLVM_ROOT` later. **Do not confuse this installation tree with +the build tree!** + +LLVM takes a long time to build, so the above command uses Ninja to maximize +parallelism. If you choose to omit `-G Ninja`, Makefiles will be generated +instead. In this case, enable parallelism with `cmake --build build -j NNN` +where `NNN` is the number of parallel jobs, i.e. the number of CPUs you have. + +Note that you _must_ add `clang` and `lld` to `LLVM_ENABLE_PROJECTS` and +`WebAssembly` and `X86` _must_ be included in `LLVM_TARGETS_TO_BUILD`. +`LLVM_ENABLE_RUNTIMES=compiler-rt` is only required to build the fuzz tests, and +`clang-tools-extra` is only necessary if you plan to contribute code to Halide +(so that you can run `clang-tidy` on your pull requests). You can disable +exception handling (EH) and RTTI if you don't want the Python bindings. We +recommend enabling the full set to simplify builds during development. + +## Building Halide with CMake + +This is discussed in greater detail in [BuildingHalideWithCMake.md]. CMake +version 3.28+ is required to build Halide. + +[BuildingHalideWithCMake.md]: doc/BuildingHalideWithCMake.md ### MacOS and Linux Follow the above instructions to build LLVM or acquire a suitable binary release. Then change directory to the Halide repository and run: -``` -% cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_DIR=$LLVM_ROOT/lib/cmake/llvm -S . -B build -% cmake --build build +```shell +$ cmake -G Ninja -S . -B build -DCMAKE_BUILD_TYPE=Release -DHalide_LLVM_ROOT=$LLVM_ROOT +$ cmake --build build ``` -`LLVM_DIR` is the folder in the LLVM installation tree **(do not use the build -tree by mistake)** that contains `LLVMConfig.cmake`. It is not required to set -this variable if you have a suitable system-wide version installed. If you have -multiple system-wide versions installed, you can specify the version with -`Halide_REQUIRE_LLVM_VERSION`. Remove `-G Ninja` if you prefer to build with a -different generator. +Setting `-DHalide_LLVM_ROOT` is not required if you have a suitable system-wide +version installed. However, if you have multiple LLVMs installed, it can pick +between them. ### Windows -We suggest building with Visual Studio 2019. Your mileage may vary with earlier +We suggest building with Visual Studio 2022. Your mileage may vary with earlier versions. Be sure to install the "C++ CMake tools for Windows" in the Visual Studio installer. For older versions of Visual Studio, do not install the CMake tools, but instead acquire CMake and Ninja from their respective project @@ -215,13 +244,13 @@ to `D:\Halide`. We also assume that your shell environment is set up correctly. For a 64-bit build, run: ``` -D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 +D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 ``` For a 32-bit build, run: ``` -D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64_x86 +D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64_x86 ``` #### Managing dependencies with vcpkg @@ -232,23 +261,19 @@ The best way to get compatible dependencies on Windows is to use ``` D:\> git clone https://github.com/Microsoft/vcpkg.git D:\> cd vcpkg -D:\> .\bootstrap-vcpkg.bat -D:\vcpkg> .\vcpkg integrate install +D:\vcpkg> .\bootstrap-vcpkg.bat -disableMetrics ... CMake projects should use: "-DCMAKE_TOOLCHAIN_FILE=D:/vcpkg/scripts/buildsystems/vcpkg.cmake" ``` -Then install the libraries. For a 64-bit build, run: +When using the toolchain file, vcpkg will automatically build all the necessary +dependencies. However, as stated above, be aware that acquiring LLVM this way +may use over 100 GB of disk space for its build trees and take a very long time +to build. You can manually delete the build trees afterward, but vcpkg will not +do this automatically. -``` -D:\vcpkg> .\vcpkg install libpng:x64-windows libjpeg-turbo:x64-windows llvm[target-all,clang-tools-extra]:x64-windows -``` - -To support 32-bit builds, also run: - -``` -D:\vcpkg> .\vcpkg install libpng:x86-windows libjpeg-turbo:x86-windows llvm[target-all,clang-tools-extra]:x86-windows -``` +See [BuildingHalideWithCMake.md](./doc/BuildingHalideWithCMake.md#vcpkg-presets) +for directions to use Vcpkg for everything _except_ LLVM. #### Building Halide @@ -257,189 +282,157 @@ build in either 32-bit or 64-bit depending on the environment script (`vcvars`) that was run earlier. ``` -D:\Halide> cmake -G Ninja ^ - -DCMAKE_BUILD_TYPE=Release ^ - -DCMAKE_TOOLCHAIN_FILE=D:/vcpkg/scripts/buildsystems/vcpkg.cmake ^ - -S . -B build +D:\Halide> cmake -G Ninja -S . -B build ^ + --toolchain D:/vcpkg/scripts/buildsystems/vcpkg.cmake ^ + -DCMAKE_BUILD_TYPE=Release ``` -**Note:** If building with Python bindings on 32-bit (enabled by default), be -sure to point CMake to the installation path of a 32-bit Python 3. You can do -this by specifying, for example: -`"-DPython3_ROOT_DIR=C:\Program Files (x86)\Python38-32"`. - Then run the build with: ``` -D:\Halide> cmake --build build --config Release +D:\Halide> cmake --build build ``` To run all the tests: ``` -D:\Halide> cd build -D:\Halide\build> ctest -C Release +D:\Halide> ctest --test-dir build --output-on-failure ``` Subsets of the tests can be selected with `-L` and include `correctness`, -`python`, `error`, and the other directory names under `/tests`. +`generator`, `error`, and the other directory names under `tests/`. #### Building LLVM (optional) Follow these steps if you want to build LLVM yourself. First, download LLVM's -sources (these instructions use the latest 17.0 release) +sources (these instructions use the 18.1.8 release). ``` -D:\> git clone --depth 1 --branch release/17.x https://github.com/llvm/llvm-project.git +D:\> git clone --depth 1 --branch llvm-org-18.1.8 https://github.com/llvm/llvm-project.git ``` -For a 64-bit build, run: +As above, run `vcvarsall.bat` to pick between x86 and x64. Then configure LLVM +with the following command (for 32-bit, set `-DLLVM_BUILD_32_BITS=ON` instead): ``` -D:\> cmake -G Ninja ^ +D:\> cmake -G Ninja -S llvm-project\llvm -B build ^ -DCMAKE_BUILD_TYPE=Release ^ -DLLVM_ENABLE_PROJECTS=clang;lld;clang-tools-extra ^ - -DLLVM_ENABLE_TERMINFO=OFF ^ - -DLLVM_TARGETS_TO_BUILD=X86;ARM;NVPTX;AArch64;Hexagon;RISCV ^ + -DLLVM_ENABLE_RUNTIMES=compiler-rt ^ + -DLLVM_TARGETS_TO_BUILD=WebAssembly;X86;AArch64;ARM;Hexagon;NVPTX;PowerPC;RISCV ^ -DLLVM_ENABLE_ASSERTIONS=ON ^ -DLLVM_ENABLE_EH=ON ^ -DLLVM_ENABLE_RTTI=ON ^ - -DLLVM_BUILD_32_BITS=OFF ^ - -S llvm-project\llvm -B llvm-build -``` - -For a 32-bit build, run: - -``` -D:\> cmake -G Ninja ^ - -DCMAKE_BUILD_TYPE=Release ^ - -DLLVM_ENABLE_PROJECTS=clang;lld;clang-tools-extra ^ + -DLLVM_ENABLE_HTTPLIB=OFF ^ + -DLLVM_ENABLE_LIBEDIT=OFF ^ + -DLLVM_ENABLE_LIBXML2=OFF ^ -DLLVM_ENABLE_TERMINFO=OFF ^ - -DLLVM_TARGETS_TO_BUILD=X86;ARM;NVPTX;AArch64;Hexagon;RISCV ^ - -DLLVM_ENABLE_ASSERTIONS=ON ^ - -DLLVM_ENABLE_EH=ON ^ - -DLLVM_ENABLE_RTTI=ON ^ - -DLLVM_BUILD_32_BITS=ON ^ - -S llvm-project\llvm -B llvm32-build + -DLLVM_ENABLE_ZLIB=OFF ^ + -DLLVM_ENABLE_ZSTD=OFF ^ + -DLLVM_BUILD_32_BITS=OFF ``` -Finally, run: +**MSBuild:** If you want to build LLVM with MSBuild instead of Ninja, use +`-G "Visual Studio 17 2022" -Thost=x64 -A x64` or +`-G "Visual Studio 17 2022" -Thost=x64 -A Win32` in place of `-G Ninja`. + +Finally, run the build and install to a local directory: ``` -D:\> cmake --build llvm-build --config Release -D:\> cmake --install llvm-build --prefix llvm-install +D:\> cmake --build build --config Release +D:\> cmake --install build --prefix llvm-install ``` You can substitute `Debug` for `Release` in the above `cmake` commands if you -want a debug build. Make sure to add `-DLLVM_DIR=D:/llvm-install/lib/cmake/llvm` -to the Halide CMake command to override `vcpkg`'s LLVM. +want a debug build. -**MSBuild:** If you want to build LLVM with MSBuild instead of Ninja, use -`-G "Visual Studio 16 2019" -Thost=x64 -A x64` or -`-G "Visual Studio 16 2019" -Thost=x64 -A Win32` in place of `-G Ninja`. +To use this with Halide, but still allow vcpkg to manage other dependencies, you +must add two flags to Halide's CMake configure command line. First, disable LLVM +with `-DVCPKG_OVERLAY_PORTS=cmake/vcpkg`. Second, point CMake to our newly built +Halide with `-DHalide_LLVM_ROOT=D:/llvm-install`. #### If all else fails... -Do what the build-bots do: https://buildbot.halide-lang.org/master/#/builders - -If the column that best matches your system is red, then maybe things aren't -just broken for you. If it's green, then you can click the "stdio" links in the -latest build to see what commands the build bots run, and what the output was. - -# Some useful environment variables - -`HL_TARGET=...` will set Halide's AOT compilation target. - -`HL_JIT_TARGET=...` will set Halide's JIT compilation target. - -`HL_DEBUG_CODEGEN=1` will print out pseudocode for what Halide is compiling. -Higher numbers will print more detail. - -`HL_NUM_THREADS=...` specifies the number of threads to create for the thread -pool. When the async scheduling directive is used, more threads than this number -may be required and thus allocated. A maximum of 256 threads is allowed. (By -default, the number of cores on the host is used.) - -`HL_TRACE_FILE=...` specifies a binary target file to dump tracing data into -(ignored unless at least one `trace_` feature is enabled in `HL_TARGET` or -`HL_JIT_TARGET`). The output can be parsed programmatically by starting from the -code in `utils/HalideTraceViz.cpp`. - -# Using Halide on OSX +Do what the buildbots do: https://buildbot.halide-lang.org/master/#/builders -Precompiled Halide distributions are built using XCode's command-line tools with -Apple clang 500.2.76. This means that we link against libc++ instead of -libstdc++. You may need to adjust compiler options accordingly if you're using -an older XCode which does not default to libc++. +If the row that best matches your system is red, then maybe things aren't just +broken for you. If it's green, then you can click through to the latest build +and see the commands that the build bots run. Open a step ("Configure Halide" is +useful) and look at the "stdio" logs in the viewer. These logs contain the full +commands that were run, as well as the environment variables they were run with. -# Halide for Hexagon HVX +## Building Halide with make -Halide supports offloading work to Qualcomm Hexagon DSP on Qualcomm Snapdragon -845/710 devices or newer. The Hexagon DSP provides a set of 128 byte vector -instruction extensions - the Hexagon Vector eXtensions (HVX). HVX is well suited -for image processing, and Halide for Hexagon HVX will generate the appropriate -HVX vector instructions from a program authored in Halide. +> [!WARNING] +> We do not provide support for the Makefile. Feel free to use it, but if +> anything goes wrong, switch to the CMake build. Note also that the Makefile +> cannot build the Python bindings or produce install packages. -Halide can be used to compile Hexagon object files directly, by using a target -such as `hexagon-32-qurt-hvx`. +*TL;DR*: Have LLVM 17 (or greater) installed and run `make` in the root +directory of the repository (where this README is). -Halide can also be used to offload parts of a pipeline to Hexagon using the -`hexagon` scheduling directive. To enable the `hexagon` scheduling directive, -include the `hvx` target feature in your target. The currently supported -combination of targets is to use the HVX target features with an x86 linux -host (to use the simulator) or with an ARM android target (to use Hexagon DSP -hardware). For examples of using the `hexagon` scheduling directive on both the -simulator and a Hexagon DSP, see the blur example app. +By default, `make` will use the `llvm-config` tool found in the `PATH`. If you +want to use a different LLVM, such as a custom-built one following the +instructions above, set the following environment variable: -To build and run an example app using the Hexagon target, - -1. Obtain and build trunk LLVM and Clang. (Earlier versions of LLVM may work but - are not actively tested and thus not recommended.) -2. Download and install the Hexagon SDK and Hexagon Tools. Hexagon SDK 4.3.0 or - later is needed. Hexagon Tools 8.4 or later is needed. -3. Build and run an example for Hexagon HVX - -### 1. Obtain and build trunk LLVM and Clang - -(Follow the instructions given previously, just be sure to check out the `main` -branch.) - -### 2. Download and install the Hexagon SDK and Hexagon Tools - -Go to https://qpm.qualcomm.com/#/main/home +```shell +$ export LLVM_CONFIG="$LLVM_ROOT/bin/llvm-config" +``` -1. Go to Tools, and download Qualcomm Package Manager 3. Install the package manager on your machine. -2. Run the installed Qualcomm Package Manager and install the Qualcomm Hexagon SDK 5.x (or 4.x). - The SDK can be selected from the Qualcomm Hexagon SDK Products. -3. Set an environment variable to point to the SDK installation location - ``` - export SDK_LOC=/location/of/SDK - ``` +Now you should be able to just run `make` in the root directory of the Halide +source tree. `make run_tests` will run the JIT test suite, and `make test_apps` +will make sure all the apps compile and run (but won't check their output). -### 3. Build and run an example for Hexagon HVX +When building the tests, you can set the AOT compilation target with the +`HL_TARGET` environment variable. -In addition to running Hexagon code on device, Halide also supports running -Hexagon code on the simulator from the Hexagon tools. +### Building Halide out-of-tree with make -To build and run the blur example in Halide/apps/blur on the simulator: +If you wish to build Halide in a separate directory, you can do that like so: -``` -cd apps/blur -export HL_HEXAGON_SIM_REMOTE=../../src/runtime/hexagon_remote/bin/v65/hexagon_sim_remote -export HL_HEXAGON_TOOLS=$SDK_LOC/Hexagon_Tools/8.x/Tools/ -LD_LIBRARY_PATH=../../src/runtime/hexagon_remote/bin/host/:$HL_HEXAGON_TOOLS/lib/iss/:. HL_TARGET=host-hvx make test +```shell +$ cd .. +$ mkdir halide_build +$ cd halide_build +$ make -f ../Halide/Makefile ``` -### To build and run the blur example in Halide/apps/blur on Android: +# Some useful environment variables -To build the example for Android, first ensure that you have Android NDK r19b or -later installed, and the ANDROID_NDK_ROOT environment variable points to it. -(Note that Qualcomm Hexagon SDK v4.3.0 includes Android NDK r19c, which is -fine.) +`HL_JIT_TARGET=...` will set Halide's JIT compilation target. -Now build and run the blur example using the script to run it on device: +`HL_DEBUG_CODEGEN=1` will print out pseudocode for what Halide is compiling. +Higher numbers will print more detail. -``` -export HL_HEXAGON_TOOLS=$SDK_LOC/HEXAGON_Tools/8.4.11/Tools/ -HL_TARGET=arm-64-android-hvx ./adb_run_on_device.sh -``` +`HL_NUM_THREADS=...` specifies the number of threads to create for the thread +pool. When the async scheduling directive is used, more threads than this number +may be required and thus allocated. A maximum of 256 threads is allowed. (By +default, the number of cores on the host is used.) + +`HL_TRACE_FILE=...` specifies a binary target file to dump tracing data into +(ignored unless at least one `trace_` feature is enabled in the target). The +output can be parsed programmatically by starting from the code in +`utils/HalideTraceViz.cpp`. + +# Further references + +We have more documentation in `doc/`, the following links might be helpful: + +| Document | Description | +|-----------------------------------------------|---------------------------------------------------------------------------| +| [CMake build](doc/BuildingHalideWithCMake.md) | How to configure and build Halide using CMake. | +| [CMake package](doc/HalideCMakePackage.md) | How to use the Halide CMake package to build your code. | +| [Hexagon](doc/Hexagon.md) | How to use the Hexagon backend. | +| [Python](doc/Python.md) | Documentation for the Python bindings. | +| [RunGen](doc/RunGen.md) | How to use the RunGen interface to run and benchmark arbitrary pipelines. | +| [Vulkan](doc/Vulkan.md) | How to use the Halide Vulkan backend (BETA) | +| [WebAssembly](doc/WebAssembly.md) | How to use the WebAssembly backend and how to use V8 in place of wabt. | +| [WebGPU](doc/WebGPU.md) | How to run WebGPU pipelines (BETA) | + +The following links are of greater interest to developers wishing to contribute +code to Halide: + +| Document | Description | +|------------------------------------------|---------------------------------------------------------------------------------------------------------------| +| [CMake developer](doc/CodeStyleCMake.md) | Guidelines for authoring new CMake code. | +| [FuzzTesting](doc/FuzzTesting.md) | Information about fuzz testing the Halide compiler (rather than pipelines). Intended for internal developers. | diff --git a/README_cmake.md b/README_cmake.md deleted file mode 100644 index d58fb18b767a..000000000000 --- a/README_cmake.md +++ /dev/null @@ -1,1411 +0,0 @@ -# Halide and CMake - -This is a comprehensive guide to the three main usage stories of the Halide -CMake build. - -1. Compiling or packaging Halide from source. -2. Building Halide programs using the official CMake package. -3. Contributing to Halide and updating the build files. - -The following sections cover each in detail. - -## Table of Contents - -- [Halide and CMake](#halide-and-cmake) - - [Table of Contents](#table-of-contents) -- [Getting started](#getting-started) - - [Installing CMake](#installing-cmake) - - [Cross-platform](#cross-platform) - - [Windows](#windows) - - [macOS](#macos) - - [Ubuntu Linux](#ubuntu-linux) - - [Installing dependencies](#installing-dependencies) - - [Windows](#windows-1) - - [macOS](#macos-1) - - [Ubuntu](#ubuntu) -- [Building Halide with CMake](#building-halide-with-cmake) - - [Basic build](#basic-build) - - [Windows](#windows-2) - - [macOS and Linux](#macos-and-linux) - - [CMake Presets](#cmake-presets) - - [Installing](#installing) - - [Build options](#build-options) - - [Find module options](#find-module-options) -- [Using Halide from your CMake build](#using-halide-from-your-cmake-build) - - [A basic CMake project](#a-basic-cmake-project) - - [JIT mode](#jit-mode) - - [AOT mode](#aot-mode) - - [Autoschedulers](#autoschedulers) - - [RunGenMain](#rungenmain) - - [Halide package documentation](#halide-package-documentation) - - [Components](#components) - - [Variables](#variables) - - [Imported targets](#imported-targets) - - [Functions](#functions) - - [`add_halide_library`](#add_halide_library) - - [`add_halide_generator`](#add_halide_generator) - - [`add_halide_python_extension_library`](#add_halide_python_extension_library) - - [`add_halide_runtime`](#add_halide_runtime) - - [Cross compiling](#cross-compiling) - - [Use `add_halide_generator`](#use-add_halide_generator) - - [Use a super-build](#use-a-super-build) - - [Use `ExternalProject` directly](#use-externalproject-directly) - - [Use an emulator or run on device](#use-an-emulator-or-run-on-device) - - [Bypass CMake](#bypass-cmake) -- [Contributing CMake code to Halide](#contributing-cmake-code-to-halide) - - [General guidelines and best practices](#general-guidelines-and-best-practices) - - [Prohibited commands list](#prohibited-commands-list) - - [Prohibited variables list](#prohibited-variables-list) - - [Adding tests](#adding-tests) - - [Adding apps](#adding-apps) - -# Getting started - -This section covers installing a recent version of CMake and the correct -dependencies for building and using Halide. If you have not used CMake before, -we strongly suggest reading through the [CMake documentation][cmake-docs] first. - -## Installing CMake - -Halide requires at least version 3.22, which was released in November 2021. -Fortunately, getting a recent version of CMake couldn't be easier, and there are -multiple good options on any system to do so. Generally, one should always have -the most recent version of CMake installed system-wide. CMake is committed to -backwards compatibility and even the most recent release can build projects over -a decade old. - -### Cross-platform - -The Python package manager `pip3` has the newest version of CMake at all times. -This might be the most convenient method since Python 3 is an optional -dependency for Halide, anyway. - -``` -$ pip3 install --upgrade cmake -``` - -See the [PyPI website][pypi-cmake] for more details. - -### Windows - -On Windows, there are three primary methods for installing an up-to-date CMake: - -1. If you have Visual Studio 2019 installed, you can get CMake 3.17 through the - Visual Studio installer. This is the recommended way of getting CMake if you - are able to use Visual Studio 2019. See Microsoft's - [documentation][vs2019-cmake-docs] for more details. -2. If you use [Chocolatey][chocolatey], its [CMake package][choco-cmake] is kept - up to date. It should be as simple as `choco install cmake`. -3. Otherwise, you should install CMake from [Kitware's website][cmake-download]. - -### macOS - -On macOS, the [Homebrew][homebrew] [CMake package][brew-cmake] is kept up to -date. Simply run: - -``` -$ brew update -$ brew install cmake -``` - -to install the newest version of CMake. If your environment prevents you from -installing Homebrew, the binary release on [Kitware's website][cmake-download] -is also a viable option. - -### Ubuntu Linux - -There are a few good ways to install a modern CMake on Ubuntu: - -1. If you're on Ubuntu Linux 22.04 (Jammy Jellyfish), then simply running - `sudo apt install cmake` will get you CMake 3.22. -2. If you are on an older Ubuntu release or would like to use the newest CMake, - try installing via the snap store: `snap install cmake`. Be sure you do not - already have `cmake` installed via APT. The snap package automatically stays - up to date. -3. For older versions of Debian, Ubuntu, Mint, and derivatives, Kitware provides - an [APT repository][cmake-apt] with up-to-date releases. Note that this is - still useful for Ubuntu 20.04 because it will remain up to date. -4. If all else fails, you might need to build CMake from source (eg. on old - Ubuntu versions running on ARM). In that case, follow the directions posted - on [Kitware's website][cmake-from-source]. - -For other Linux distributions, check with your distribution's package manager or -use pip as detailed above. Snap packages might also be available. - -**Note:** On WSL 1, the snap service is not available; in this case, prefer to -use the APT repository. On WSL 2, all methods are available. - -## Installing dependencies - -We generally recommend using a package manager to fetch Halide's dependencies. -Except where noted, we recommend using [vcpkg][vcpkg] on Windows, -[Homebrew][homebrew] on macOS, and APT on Ubuntu 20.04 LTS. - -Only LLVM and Clang are _absolutely_ required to build Halide. Halide always -supports three LLVM versions: the current major version, the previous major -version, and trunk. The LLVM and Clang versions must match exactly. For most -users, we recommend using a binary release of LLVM rather than building it -yourself. - -However, to run all of the tests and apps, an extended set is needed. This -includes [lld][lld], [Python 3][python], [libpng][libpng], [libjpeg][libjpeg], -[Doxygen][doxygen], [OpenBLAS][openblas], [ATLAS][atlas], and [Eigen3][eigen]. -While not required to build any part of Halide, we find that [Ninja][ninja] is -the best backend build tool across all platforms. - -Note that CMake has many special variables for overriding the locations of -packages and executables. A partial list can be found in the -["find module options"](#find-module-options) section below, and more can be -found in the documentation for the CMake [find_package][find_package] command. -Normally, you should prefer to make sure your environment is set up so that -CMake can find dependencies automatically. For instance, if you want CMake to -use a particular version of Python, create a [virtual environment][venv] and -activate it _before_ configuring Halide. - -### Windows - -We assume you have vcpkg installed at `D:\vcpkg`. Follow the instructions in the -[vcpkg README][vcpkg] to install. Start by installing LLVM. - -``` -D:\vcpkg> .\vcpkg install llvm[target-all,enable-assertions,clang-tools-extra]:x64-windows -D:\vcpkg> .\vcpkg install llvm[target-all,enable-assertions,clang-tools-extra]:x86-windows -``` - -This will also install Clang and LLD. The `enable-assertions` option is not -strictly necessary but will make debugging during development much smoother. -These builds will take a long time and a lot of disk space. After they are -built, it is safe to delete the intermediate build files and caches in -`D:\vcpkg\buildtrees` and `%APPDATA%\local\vcpkg`. - -Then install the other libraries: - -``` -D:\vcpkg> .\vcpkg install libpng:x64-windows libjpeg-turbo:x64-windows openblas:x64-windows eigen3:x64-windows -D:\vcpkg> .\vcpkg install libpng:x86-windows libjpeg-turbo:x86-windows openblas:x86-windows eigen3:x86-windows -``` - -To build the documentation, you will need to install [Doxygen][doxygen]. This -can be done either through [Chocolatey][choco-doxygen] or from the [Doxygen -website][doxygen-download]. - -``` -> choco install doxygen -``` - -To build the Python bindings, you will need to install Python 3. This should be -done by running the official installer from the [Python website][python]. Be -sure to download the debugging symbols through the installer. This will require -using the "Advanced Installation" workflow. Although it is not strictly -necessary, it is convenient to install Python system-wide on Windows (ie. -`C:\Program Files`). This makes it easy for CMake to find without needing to -manually set the `PATH`. - -Once Python is installed, you can install the Python module dependencies either -globally or in a [virtual environment][venv] by running - -``` -> pip3 install -r requirements.txt -``` - -from the root of the repository. - -If you would like to use [Ninja][ninja], note that it is installed alongside -CMake when using the Visual Studio 2019 installer. Alternatively, you can -install via [Chocolatey][choco-ninja] or place the [pre-built -binary][ninja-download] from their website in the PATH. - -``` -> choco install ninja -``` - -### macOS - -On macOS, it is possible to install all dependencies via [Homebrew][homebrew]: - -``` -$ brew install llvm libpng libjpeg python@3.8 openblas doxygen ninja -``` - -The `llvm` package includes `clang`, `clang-format`, and `lld`, too. Don't -forget to install the Python module dependencies: - -``` -$ pip3 install -r python_bindings/requirements.txt -``` - -### Ubuntu - -Finally, on Ubuntu 20.04 LTS, you should install the following packages (this -includes the Python module dependencies): - -``` -dev@ubuntu:~$ sudo apt install \ - clang-tools lld llvm-dev libclang-dev liblld-10-dev \ - libpng-dev libjpeg-dev libgl-dev \ - python3-dev python3-numpy python3-scipy python3-imageio python3-pybind11 \ - libopenblas-dev libeigen3-dev libatlas-base-dev \ - doxygen ninja-build -``` - -# Building Halide with CMake - -## Basic build - -These instructions assume that your working directory is the Halide repo root. - -### Windows - -If you plan to use the Ninja generator, be sure to be in the developer command -prompt corresponding to your intended environment. Note that whatever your -intended target system (x86, x64, or arm), you must use the 64-bit _host tools_ -because the 32-bit tools run out of memory during the linking step with LLVM. -More information is available from [Microsoft's documentation][msvc-cmd]. - -You should either open the correct Developer Command Prompt directly or run the -[`vcvarsall.bat`][vcvarsall] script with the correct argument, ie. one of the -following: - -``` -D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 -D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64_x86 -D:\> "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64_arm -``` - -Then, assuming that vcpkg is installed to `D:\vcpkg`, simply run: - -``` -> cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=D:\vcpkg\scripts\buildsystems\vcpkg.cmake -S . -B build -> cmake --build .\build -``` - -Valid values of [`CMAKE_BUILD_TYPE`][cmake_build_type] are `Debug`, -`RelWithDebInfo`, `MinSizeRel`, and `Release`. When using a single-configuration -generator (like Ninja) you must specify a build type when configuring Halide (or -any other CMake project). - -Otherwise, if you wish to create a Visual Studio based build system, you can -configure with: - -``` -> cmake -G "Visual Studio 16 2019" -Thost=x64 -A x64 ^ - -DCMAKE_TOOLCHAIN_FILE=D:\vcpkg\scripts\buildsystems\vcpkg.cmake ^ - -S . -B build -> cmake --build .\build --config Release -j %NUMBER_OF_PROCESSORS% -``` - -Because the Visual Studio generator is a _multi-config generator_, you don't set -`CMAKE_BUILD_TYPE` at configure-time, but instead pass the configuration to the -build (and test/install) commands with the `--config` flag. More documentation -is available in the [CMake User Interaction Guide][cmake-user-interaction]. - -The process is similar for 32-bit: - -``` -> cmake -G "Visual Studio 16 2019" -Thost=x64 -A Win32 ^ - -DCMAKE_TOOLCHAIN_FILE=D:\vcpkg\scripts\buildsystems\vcpkg.cmake ^ - -S . -B build -> cmake --build .\build --config Release -j %NUMBER_OF_PROCESSORS% -``` - -In both cases, the `-Thost=x64` flag ensures that the correct host tools are -used. - -**Note:** due to limitations in MSBuild, incremental builds using the VS -generators will not detect changes to headers in the `src/runtime` folder. We -recommend using Ninja for day-to-day development and use Visual Studio only if -you need it for packaging. - -### macOS and Linux - -The instructions here are straightforward. Assuming your environment is set up -correctly, just run: - -``` -dev@host:~/Halide$ cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -S . -B build -dev@host:~/Halide$ cmake --build ./build -``` - -If you omit `-G Ninja`, a Makefile-based generator will likely be used instead. -In either case, [`CMAKE_BUILD_TYPE`][cmake_build_type] must be set to one of the -standard types: `Debug`, `RelWithDebInfo`, `MinSizeRel`, or `Release`. - -### CMake Presets - -If you are using CMake 3.21+, we provide several [presets][cmake_presets] to -make the above commands more convenient. The following CMake preset commands -correspond to the longer ones above. - -``` -> cmake --preset=win64 # VS 2019 generator, 64-bit build, vcpkg deps -> cmake --preset=win32 # VS 2019 generator, 32-bit build, vcpkg deps -> cmake --preset=release # Release mode, any single-config generator / compiler - -$ cmake --list-presets # Get full list of presets. -``` - -The Windows presets assume that the environment variable `VCPKG_ROOT` is set and -points to the root of the vcpkg installation. - -There are also presets to use some Clang sanitizers with the CMake build; -at present, only Fuzzer and ASAN (Address Sanitizer) are supported, and -only on linux-x86-64. To use these, you must build LLVM with additional options: - -``` - -D LLVM_ENABLE_PROJECTS="clang;lld;clang-tools-extra" - -D LLVM_ENABLE_RUNTIMES="compiler-rt;libcxx;libcxxabi;libunwind" -``` - -To build / test with ASAN, use `--preset linux-x64-asan`. - -To build / test with the Fuzzer, use `--preset linux-x64-fuzzer`. - -## Installing - -Once built, Halide will need to be installed somewhere before using it in a -separate project. On any platform, this means running the -[`cmake --install`][cmake-install] command in one of two ways. For a -single-configuration generator (like Ninja), run either: - -``` -dev@host:~/Halide$ cmake --install ./build --prefix /path/to/Halide-install -> cmake --install .\build --prefix X:\path\to\Halide-install -``` - -For a multi-configuration generator (like Visual Studio) run: - -``` -dev@host:~/Halide$ cmake --install ./build --prefix /path/to/Halide-install --config Release -> cmake --install .\build --prefix X:\path\to\Halide-install --config Release -``` - -Of course, make sure that you build the corresponding config before attempting -to install it. - -## Build options - -Halide reads and understands several options that can configure the build. The -following are the most consequential and control how Halide is actually -compiled. - -| Option | Default | Description | -|------------------------------------------|-----------------------|---------------------------------------------------------------------------------------------------| -| [`BUILD_SHARED_LIBS`][build_shared_libs] | `ON` | Standard CMake variable that chooses whether to build as a static or shared library. | -| `Halide_BUNDLE_STATIC` | `OFF` | When building Halide as a static library, merge static library dependencies into libHalide.a. | -| `Halide_LLVM_SHARED_LIBS` | `OFF` | Link to the shared version of LLVM. Not available on Windows. | -| `Halide_ENABLE_RTTI` | _inherited from LLVM_ | Enable RTTI when building Halide. Recommended to be set to `ON` | -| `Halide_ENABLE_EXCEPTIONS` | `ON` | Enable exceptions when building Halide | -| `Halide_TARGET` | _empty_ | The default target triple to use for `add_halide_library` (and the generator tests, by extension) | - -The following options are _advanced_ and should not be required in typical workflows. Generally, these are used by -Halide's own CI infrastructure, or as escape hatches for third-party packagers. - -| Option | Default | Description | -|-----------------------------|--------------------------------------------------------------------|------------------------------------------------------------------------------------------| -| `Halide_CLANG_TIDY_BUILD` | `OFF` | Used internally to generate fake compile jobs for runtime files when running clang-tidy. | -| `Halide_CCACHE_BUILD` | `OFF` | Use ccache with Halide-recommended settings to accelerate rebuilds. | -| `Halide_CCACHE_PARAMS` | `CCACHE_CPP2=yes CCACHE_HASHDIR=yes CCACHE_SLOPPINESS=pch_defines` | Options to pass to `ccache` when using `Halide_CCACHE_BUILD`. | -| `Halide_SOVERSION_OVERRIDE` | `${Halide_VERSION_MAJOR}` | Override the SOVERSION for libHalide. Expects a positive integer (i.e. not a version). | - -The following options are only available when building Halide directly, ie. not -through the [`add_subdirectory`][add_subdirectory] or -[`FetchContent`][fetchcontent] mechanisms. They control whether non-essential -targets (like tests and documentation) are built. - -| Option | Default | Description | -|------------------------|---------|------------------------------------------------------------------| -| `WITH_TESTS` | `ON` | Enable building unit and integration tests | -| `WITH_PYTHON_BINDINGS` | `ON` | Enable building Python 3.x bindings | -| `WITH_DOCS` | `OFF` | Enable building the documentation via Doxygen | -| `WITH_UTILS` | `ON` | Enable building various utilities including the trace visualizer | -| `WITH_TUTORIALS` | `ON` | Enable building the tutorials | - -The following options control whether to build certain test subsets. They only -apply when `WITH_TESTS=ON`: - -| Option | Default | Description | -|---------------------------|---------|-----------------------------------| -| `WITH_TEST_AUTO_SCHEDULE` | `ON` | enable the auto-scheduling tests | -| `WITH_TEST_CORRECTNESS` | `ON` | enable the correctness tests | -| `WITH_TEST_ERROR` | `ON` | enable the expected-error tests | -| `WITH_TEST_WARNING` | `ON` | enable the expected-warning tests | -| `WITH_TEST_PERFORMANCE` | `ON` | enable performance testing | -| `WITH_TEST_GENERATOR` | `ON` | enable the AOT generator tests | - -The following options are WebAssembly-specific. They only apply when -`TARGET_WEBASSEMBLY=ON`: - -| Option | Default | Description | -|-----------------------|---------|------------------------------------------------------------------------------------------| -| `Halide_WASM_BACKEND` | `wabt` | Select the backend for WASM testing. Can be `wabt`, `V8` or a false value such as `OFF`. | - -### Find module options - -Halide uses the following find modules to search for certain dependencies. These -modules accept certain variables containing hints for the search process. Before -setting any of these variables, closely study the [`find_package`][find_package] -documentation. - -All of these variables should be set at the CMake command line via the `-D` -flag. - -First, Halide expects to find LLVM and Clang through the `CONFIG` mode of -`find_package`. You can tell Halide where to find these dependencies by setting -the corresponding `_DIR` variables: - -| Variable | Description | -|-------------|------------------------------------------------| -| `LLVM_DIR` | `$LLVM_ROOT/lib/cmake/LLVM/LLVMConfig.cmake` | -| `Clang_DIR` | `$LLVM_ROOT/lib/cmake/Clang/ClangConfig.cmake` | - -Here, `$LLVM_ROOT` is assumed to point to the root of an LLVM installation tree. -This is either a system path or one produced by running `cmake --install` (as -detailed in the main README.md). When building LLVM (and any other `CONFIG` -packages) manually, it is a common mistake to point CMake to a _build tree_ -rather than an _install tree_. Doing so often produces inscrutable errors. - -When using CMake 3.18 or above, some of Halide's tests will search for CUDA -using the [`FindCUDAToolkit`][findcudatoolkit] module. If it doesn't find your -CUDA installation automatically, you can point it to it by setting: - -| Variable | Description | -|--------------------|---------------------------------------------------| -| `CUDAToolkit_ROOT` | Path to the directory containing `bin/nvcc[.exe]` | -| `CUDA_PATH` | _Environment_ variable, same as above. | - -If the CMake version is lower than 3.18, the deprecated [`FindCUDA`][findcuda] -module will be used instead. It reads the variable `CUDA_TOOLKIT_ROOT_DIR` -instead of `CUDAToolkit_ROOT` above. - -Halide also searches for `libpng` and `libjpeg-turbo` through the -[`FindPNG`][findpng] and [`FindJPEG`][findjpeg] modules, respectively. They can -be overridden by setting the following variables. - -| Variable | Description | -|---------------------|----------------------------------------------------| -| `PNG_LIBRARIES` | Paths to the libraries to link against to use PNG. | -| `PNG_INCLUDE_DIRS` | Path to `png.h`, etc. | -| `JPEG_LIBRARIES` | Paths to the libraries needed to use JPEG. | -| `JPEG_INCLUDE_DIRS` | Paths to `jpeglib.h`, etc. | - -When `WITH_DOCS` is set to `ON`, Halide searches for Doxygen using the -[`FindDoxygen`][finddoxygen] module. It can be overridden by setting the -following variable. - -| Variable | Description | -|----------------------|---------------------------------| -| `DOXYGEN_EXECUTABLE` | Path to the Doxygen executable. | - -When compiling for an OpenCL target, Halide uses the [`FindOpenCL`][findopencl] -target to locate the libraries and include paths. These can be overridden by -setting the following variables: - -| Variable | Description | -|-----------------------|-------------------------------------------------------| -| `OpenCL_LIBRARIES` | Paths to the libraries to link against to use OpenCL. | -| `OpenCL_INCLUDE_DIRS` | Include directories for OpenCL. | - -Lastly, Halide searches for Python 3 using the [`FindPython3`][findpython3] -module, _not_ the deprecated `FindPythonInterp` and `FindPythonLibs` modules, -like other projects you might have encountered. You can select which Python -installation to use by setting the following variable. - -| Variable | Description | -|--------------------|-------------------------------------------------------| -| `Python3_ROOT_DIR` | Define the root directory of a Python 3 installation. | - -# Using Halide from your CMake build - -This section assumes some basic familiarity with CMake but tries to be explicit -in all its examples. To learn more about CMake, consult the -[documentation][cmake-docs] and engage with the community on the [CMake -Discourse][cmake-discourse]. - -Note: previous releases bundled a `halide.cmake` module that was meant to be -[`include()`][include]-ed into your project. This has been removed. Please -upgrade to the new package config module. - -## A basic CMake project - -There are two main ways to use Halide in your application: as a **JIT compiler** -for dynamic pipelines or an **ahead-of-time (AOT) compiler** for static -pipelines. CMake provides robust support for both use cases. - -No matter how you intend to use Halide, you will need some basic CMake -boilerplate. - -```cmake -cmake_minimum_required(VERSION 3.28) -project(HalideExample) - -set(CMAKE_CXX_STANDARD 17) # or newer -set(CMAKE_CXX_STANDARD_REQUIRED YES) -set(CMAKE_CXX_EXTENSIONS NO) - -find_package(Halide REQUIRED) -``` - -The [`cmake_minimum_required`][cmake_minimum_required] command is required to be -the first command executed in a CMake program. It disables all of the deprecated -behavior ("policies" in CMake lingo) from earlier versions. The -[`project`][project] command sets the name of the project (and has arguments for -versioning, language support, etc.) and is required by CMake to be called -immediately after setting the minimum version. - -The next three variables set the project-wide C++ standard. The first, -[`CMAKE_CXX_STANDARD`][cmake_cxx_standard], simply sets the standard version. -Halide requires at least C++17. The second, -[`CMAKE_CXX_STANDARD_REQUIRED`][cmake_cxx_standard_required], tells CMake to -fail if the compiler cannot provide the requested standard version. Lastly, -[`CMAKE_CXX_EXTENSIONS`][cmake_cxx_extensions] tells CMake to disable -vendor-specific extensions to C++. This is not necessary to simply use Halide, -but we require it when authoring new code in the Halide repo. - -Finally, we use [`find_package`][find_package] to locate Halide on your system. -If Halide is not globally installed, you will need to add the root of the Halide -installation directory to [`CMAKE_PREFIX_PATH`][cmake_prefix_path] at the CMake -command line. - -```console -dev@ubuntu:~/myproj$ cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="/path/to/Halide-install" -S . -B build -``` - -## JIT mode - -To use Halide in JIT mode (like the [tutorials][halide-tutorials] do, for -example), you can simply link to `Halide::Halide`. - -```cmake -# ... same project setup as before ... -add_executable(my_halide_app main.cpp) -target_link_libraries(my_halide_app PRIVATE Halide::Halide) -``` - -Then `Halide.h` will be available to your code and everything should just work. -That's it! - -## AOT mode - -Using Halide in AOT mode is more complicated so we'll walk through it step by -step. Note that this only applies to Halide generators, so it might be useful to -re-read the [tutorial][halide-generator-tutorial] on generators. Assume (like in -the tutorial) that you have a source file named `my_generators.cpp` and that in -it you have generator classes `MyFirstGenerator` and `MySecondGenerator` with -registered names `my_first_generator` and `my_second_generator` respectively. - -Then the first step is to add a **generator executable** to your build: - -```cmake -# ... same project setup as before ... -add_executable(my_generators my_generators.cpp) -target_link_libraries(my_generators PRIVATE Halide::Generator) -``` - -Using the generator executable, we can add a Halide library corresponding to -`MyFirstGenerator`. - -```cmake -# ... continuing from above -add_halide_library(my_first_generator FROM my_generators) -``` - -This will create a static library target in CMake that corresponds to the output -of running your generator. The second generator in the file requires generator -parameters to be passed to it. These are also easy to handle: - -```cmake -# ... continuing from above -add_halide_library(my_second_generator FROM my_generators - PARAMS parallel=false scale=3.0 rotation=ccw output.type=uint16) -``` - -Adding multiple configurations is easy, too: - -```cmake -# ... continuing from above -add_halide_library(my_second_generator_2 FROM my_generators - GENERATOR my_second_generator - PARAMS scale=9.0 rotation=ccw output.type=float32) - -add_halide_library(my_second_generator_3 FROM my_generators - GENERATOR my_second_generator - PARAMS parallel=false output.type=float64) -``` - -Here, we had to specify which generator to use (`my_second_generator`) since it -uses the target name by default. The functions in these libraries will be named -after the target names, `my_second_generator_2` and `my_second_generator_3`, by -default, but it is possible to control this via the `FUNCTION_NAME` parameter. - -Each one of these targets, ``, carries an associated `.runtime` -target, which is also a static library containing the Halide runtime. It is -transitively linked through `` to targets that link to ``. On an -operating system like Linux, where weak linking is available, this is not an -issue. However, on Windows, this can fail due to symbol redefinitions. In these -cases, you must declare that two Halide libraries share a runtime, like so: - -```cmake -# ... updating above -add_halide_library(my_second_generator_2 FROM my_generators - GENERATOR my_second_generator - USE_RUNTIME my_first_generator.runtime - PARAMS scale=9.0 rotation=ccw output.type=float32) - -add_halide_library(my_second_generator_3 FROM my_generators - GENERATOR my_second_generator - USE_RUNTIME my_first_generator.runtime - PARAMS parallel=false output.type=float64) -``` - -This will even work correctly when different combinations of targets are -specified for each halide library. A "greatest common denominator" target will -be chosen that is compatible with all of them (or the build will fail). - -### Autoschedulers - -When the autoschedulers are included in the release package, they are very -simple to apply to your own generators. For example, we could update the -definition of the `my_first_generator` library above to use the `Adams2019` -autoscheduler: - -```cmake -add_halide_library(my_second_generator FROM my_generators - AUTOSCHEDULER Halide::Adams2019) -``` - -### RunGenMain - -Halide provides a generic driver for generators to be used during development -for benchmarking and debugging. Suppose you have a generator executable called -`my_gen` and a generator within called `my_filter`. Then you can pass a variable -name to the `REGISTRATION` parameter of `add_halide_library` which will contain -the name of a generated C++ source that should be linked to `Halide::RunGenMain` -and `my_filter`. - -For example: - -```cmake -add_halide_library(my_filter FROM my_gen - REGISTRATION filter_reg_cpp) -add_executable(runner ${filter_reg_cpp}) -target_link_libraries(runner PRIVATE my_filter Halide::RunGenMain) -``` - -Then you can run, debug, and benchmark your generator through the `runner` -executable. - -## Halide package documentation - -Halide provides a CMake _package configuration_ module. The intended way to use -the CMake build is to run `find_package(Halide ...)` in your `CMakeLists.txt` -file. Closely read the [`find_package` documentation][find_package] before -proceeding. - -### Components - -The Halide package script understands a handful of optional components when -loading the package. - -First, if you plan to use the Halide Image IO library, you will want to include -the `png` and `jpeg` components when loading Halide. - -Second, Halide releases can contain a variety of configurations: static, shared, -debug, release, etc. CMake handles Debug/Release configurations automatically, -but generally only allows one type of library to be loaded. - -The package understands two components, `static` and `shared`, that specify -which type of library you would like to load. For example, if you want to make -sure that you link against shared Halide, you can write: - -```cmake -find_package(Halide REQUIRED COMPONENTS shared) -``` - -If the shared libraries are not available, this will result in a failure. - -If no component is specified, then the `Halide_SHARED_LIBS` variable is checked. -If it is defined and set to true, then the shared libraries will be loaded or -the package loading will fail. Similarly, if it is defined and set to false, the -static libraries will be loaded. - -If no component is specified and `Halide_SHARED_LIBS` is _not_ defined, then the -[`BUILD_SHARED_LIBS`][build_shared_libs] variable will be inspected. If it is -**not defined** or **defined and set to true**, then it will attempt to load the -shared libs and fall back to the static libs if they are not available. -Similarly, if `BUILD_SHARED_LIBS` is **defined and set to false**, then it will -try the static libs first then fall back to the shared libs. - -### Variables - -Variables that control package loading: - -| Variable | Description | -|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `Halide_SHARED_LIBS` | override `BUILD_SHARED_LIBS` when loading the Halide package via `find_package`. Has no effect when using Halide via `add_subdirectory` as a Git or `FetchContent` submodule. | -| `Halide_RUNTIME_NO_THREADS` | skip linking of Threads library to runtime. Should be set if your toolchain does not support it (e.g. baremetal). | -| `Halide_RUNTIME_NO_DL_LIBS` | skip linking of DL library to runtime. Should be set if your toolchain does not support it (e.g. baremetal). | - -Variables set by the package: - -| Variable | Description | -|----------------------------|--------------------------------------------------------------------| -| `Halide_VERSION` | The full version string of the loaded Halide package | -| `Halide_VERSION_MAJOR` | The major version of the loaded Halide package | -| `Halide_VERSION_MINOR` | The minor version of the loaded Halide package | -| `Halide_VERSION_PATCH` | The patch version of the loaded Halide package | -| `Halide_VERSION_TWEAK` | The tweak version of the loaded Halide package | -| `Halide_HOST_TARGET` | The Halide target triple corresponding to "host" for this build. | -| `Halide_CMAKE_TARGET` | The Halide target triple corresponding to the active CMake target. | -| `Halide_ENABLE_EXCEPTIONS` | Whether Halide was compiled with exception support | -| `Halide_ENABLE_RTTI` | Whether Halide was compiled with RTTI | - -Variables that control package behavior: - -| Variable | Description | -|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------| -| `Halide_PYTHON_LAUNCHER` | Semicolon separated list containing a command to launch the Python interpreter. Can be used to set environment variables for Python generators. | -| `Halide_NO_DEFAULT_FLAGS` | Off by default. When enabled, suppresses recommended compiler flags that would be added by `add_halide_generator` | - - -### Imported targets - -Halide defines the following targets that are available to users: - -| Imported target | Description | -|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `Halide::Halide` | this is the JIT-mode library to use when using Halide from C++. | -| `Halide::Generator` | this is the target to use when defining a generator executable. It supplies a `main()` function. | -| `Halide::Runtime` | adds include paths to the Halide runtime headers | -| `Halide::Tools` | adds include paths to the Halide tools, including the benchmarking utility. | -| `Halide::ImageIO` | adds include paths to the Halide image IO utility. Depends on `PNG::PNG` and `JPEG::JPEG` if they exist or were loaded through the corresponding package components. | -| `Halide::ThreadPool` | adds include paths to the Halide _simple_ thread pool utility library. This is not the same as the runtime's thread pool and is intended only for use by tests. Depends on `Threads::Threads`. | -| `Halide::RunGenMain` | used with the `REGISTRATION` parameter of `add_halide_library` to create simple runners and benchmarking tools for Halide libraries. | - -The following targets are not guaranteed to be available: - -| Imported target | Description | -|-------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `Halide::Python` | this is a Python 3 package that can be referenced as `$/..` when setting up `PYTHONPATH` for Python tests or the like from CMake. | -| `Halide::Adams19` | the Adams et.al. 2019 autoscheduler (no GPU support) | -| `Halide::Li18` | the Li et.al. 2018 gradient autoscheduler (limited GPU support) | -| `Halide::Mullapudi2016` | the Mullapudi et.al. 2016 autoscheduler (no GPU support) | - -### Functions - -Currently, only two functions are defined: - -#### `add_halide_library` - -This is the main function for managing generators in AOT compilation. The full -signature follows: - -``` -add_halide_library( FROM - [GENERATOR generator-name] - [FUNCTION_NAME function-name] - [NAMESPACE cpp-namespace] - [USE_RUNTIME hl-target] - [PARAMS param1 [param2 ...]] - [TARGETS target1 [target2 ...]] - [FEATURES feature1 [feature2 ...]] - [PLUGINS plugin1 [plugin2 ...]] - [AUTOSCHEDULER scheduler-name] - [GRADIENT_DESCENT] - [C_BACKEND] - [REGISTRATION OUTVAR] - [HEADER OUTVAR] - [FUNCTION_INFO_HEADER OUTVAR] - [ OUTVAR]) - -extra-output = ASSEMBLY | BITCODE | COMPILER_LOG | FEATURIZATION - | LLVM_ASSEMBLY | PYTHON_EXTENSION - | PYTORCH_WRAPPER | SCHEDULE | STMT | STMT_HTML -``` - -This function creates a called `` corresponding to running the -`` (an executable target which links to `Halide::Generator`) -one time, using command line arguments derived from the other parameters. - -The arguments `GENERATOR` and `FUNCTION_NAME` default to ``. They -correspond to the `-g` and `-f` command line flags, respectively. - -`NAMESPACE` is syntactic sugar to specify the C++ namespace (if any) of the -generated function; you can also specify the C++ namespace (if any) directly -in the `FUNCTION_NAME` argument, but for repeated declarations or very long -namespaces, specifying this separately can provide more readable build files. - -If `USE_RUNTIME` is not specified, this function will create another target -called `.runtime` which corresponds to running the generator with `-r` -and a compatible list of targets. This runtime target is an INTERFACE dependency -of ``. If multiple runtime targets need to be linked together, setting -`USE_RUNTIME` to another Halide library, `` will prevent the generation -of `.runtime` and instead use `.runtime`. This argument is -most commonly used in conjunction with [`add_halide_runtime`](#add_halide_runtime). - -Parameters can be passed to a generator via the `PARAMS` argument. Parameters -should be space-separated. Similarly, `TARGETS` is a space-separated list of -targets for which to generate code in a single function. They must all share the -same platform/bits/os triple (eg. `arm-32-linux`). Features that are in common -among all targets, including device libraries (like `cuda`) should go in -`FEATURES`. If `TARGETS` is not specified, the value of `Halide_TARGET` specified -at configure time will be used. - -Every element of `TARGETS` must begin with the same `arch-bits-os` triple. This -function understands two _meta-triples_, `host` and `cmake`. The meta-triple -`host` is equal to the `arch-bits-os` triple used to compile Halide along with -all of the supported instruction set extensions. On platforms that support -running both 32 and 64-bit programs, this will not necessarily equal the -platform the compiler is running on or that CMake is targeting. - -The meta-triple `cmake` is equal to the `arch-bits-os` of the current CMake -target. This is useful if you want to make sure you are not unintentionally -cross-compiling, which would result in an [`IMPORTED` target][imported-target] -being created. When `TARGETS` is empty and the `host` target would not -cross-compile, then `host` will be used. Otherwise, `cmake` will be used and an -author warning will be issued. - -To use an autoscheduler, set the `AUTOSCHEDULER` argument to a target -named like `Namespace::Scheduler`, for example `Halide::Adams19`. This will set -the `autoscheduler` GeneratorParam on the generator command line to `Scheduler` -and add the target to the list of plugins. Additional plugins can be loaded by -setting the `PLUGINS` argument. If the argument to `AUTOSCHEDULER` does not -contain `::` or it does not name a target, it will be passed to the `-s` flag -verbatim. - -If `GRADIENT_DESCENT` is set, then the module will be built suitably for -gradient descent calculation in TensorFlow or PyTorch. See -`Generator::build_gradient_module()` for more documentation. This corresponds to -passing `-d 1` at the generator command line. - -If the `C_BACKEND` option is set, this command will invoke the configured C++ -compiler on a generated source. Note that a `.runtime` target is _not_ -created in this case, and the `USE_RUNTIME` option is ignored. Other options -work as expected. - -If `REGISTRATION` is set, the path (relative to `CMAKE_CURRENT_BINARY_DIR`) -to the generated `.registration.cpp` file will be set in `OUTVAR`. This can be -used to generate a runner for a Halide library that is useful for benchmarking -and testing, as documented above. This is equivalent to setting -`-e registration` at the generator command line. - -If `HEADER` is set, the path (relative to `CMAKE_CURRENT_BINARY_DIR`) to the -generated `.h` header file will be set in `OUTVAR`. This can be used with -`install(FILES)` to conveniently deploy the generated header along with your -library. - -If `FUNCTION_INFO_HEADER` is set, the path (relative to -`CMAKE_CURRENT_BINARY_DIR`) to the generated `.function_info.h` header file -will be set in `OUTVAR`. This produces a file that contains `constexpr` -descriptions of information about the generated functions (e.g., argument -type and information). It is generated separately from the normal `HEADER` -file because `HEADER` is intended to work with basic `extern "C"` linkage, -while `FUNCTION_INFO_HEADER` requires C++17 or later to use effectively. -(This can be quite useful for advanced usages, such as producing automatic -call wrappers, etc.) Examples of usage can be found in the generated file. - -Lastly, each of the `extra-output` arguments directly correspond to an extra -output (via `-e`) from the generator. The value `OUTVAR` names a variable into -which a path (relative to -[`CMAKE_CURRENT_BINARY_DIR`][cmake_current_binary_dir]) to the extra file will -be written. - -#### `add_halide_generator` - -This function aids in creating cross-compilable builds that use Halide generators. - -``` -add_halide_generator( - target - [PACKAGE_NAME package-name] - [PACKAGE_NAMESPACE namespace] - [EXPORT_FILE export-file] - [PYSTUB generator-name] - [[SOURCES] source1 ...] -) -``` - -Every named argument is optional, and the function uses the following default arguments: - -- If `PACKAGE_NAME` is not provided, it defaults to `${PROJECT_NAME}-halide_generators`. -- If `PACKAGE_NAMESPACE` is not provided, it defaults to `${PROJECT_NAME}::halide_generators::`. -- If `EXPORT_FILE` is not provided, it defaults to `${PROJECT_BINARY_DIR}/cmake/${ARG_PACKAGE_NAME}-config.cmake` - -The `SOURCES` keyword marks the beginning of sources to be used to build -``, if it is not loaded. All unparsed arguments will be interpreted as -sources. - -This function guarantees that a Halide generator target named -`` is available. It will first search for a package named -`` using `find_package`; if it is found, it is assumed that it -provides the target. Otherwise, it will create an executable target named -`target` and an `ALIAS` target ``. This function also -creates a custom target named `` if it does not exist and -`` would exist. In this case, `` will depend on -``, this enables easy building of _just_ the Halide generators managed -by this function. - -After the call, `_FOUND` will be set to true if the host -generators were imported (and hence won't be built). Otherwise, it will be set -to false. This variable may be used to conditionally set properties on -``. - -Please see [test/integration/xc](https://github.com/halide/Halide/tree/main/test/integration/xc) for a simple example -and [apps/hannk](https://github.com/halide/Halide/tree/main/apps/hannk) for a complete app that uses it extensively. - -If `PYSTUB` is specified, then a Python Extension will be built that -wraps the Generator with CPython glue to allow use of the Generator -Python 3.x. The result will be a a shared library of the form -`_pystub..so`, where describes the specific Python -version and platform (e.g., `cpython-310-darwin` for Python 3.10 on macOS.). -See `README_python.md` for examples of use. - -#### `add_halide_python_extension_library` - -This function wraps the outputs of one or more `add_halide_library` targets with glue code to produce -a Python Extension library. - -``` -add_halide_python_extension_library( - target - [MODULE_NAME module-name] - HALIDE_LIBRARIES library1 ... -) -``` - -`FROM` specifies any valid Generator target. If omitted, - -`HALIDE_LIBRARIES` is a list of one of more `add_halide_library` targets. Each will be added to the -extension as a callable method of the module. Note that every library specified must be built with -the `PYTHON_EXTENSION` keyword specified, and all libraries must use the same Halide runtime. - -The result will be a a shared library of the form -`..so`, where describes the specific Python version and -platform (e.g., `cpython-310-darwin` for Python 3.10 on macOS.) - -#### `add_halide_runtime` - -This function generates a library containing a Halide runtime. Most user code will never -need to use this, as `add_halide_library()` will call it for you if necessary. The most common -use case is usually in conjunction with `add_halide_python_extension_library()`, as a way to -ensure that all the halide libraries share an identical runtime. - -``` -add_halide_runtime( - target - [TARGETS target1 [target2 ...]] -) -``` - -The `TARGETS` argument has identical semantics to the argument of the same name -for [`add_halide_library`](#add_halide_library). - -## Cross compiling - -Cross-compiling in CMake can be tricky, since CMake doesn't easily support -compiling for both the host platform and the cross-platform within the same -build. Unfortunately, Halide generator executables are just about always -designed to run on the host platform. Each project will be set up differently -and have different requirements, but here are some suggestions for effective use -of CMake in these scenarios. - -### Use `add_halide_generator` - -If you are writing new programs that use Halide, you might wish to use our -helper, `add_halide_generator`. When using this helper, you are expected to -build your project twice: once for your build host and again for your intended -target. - -When building the host build, you can use the `` (see the -documentation above) target to build _just_ the generators. Then, in the -target build, set `_ROOT` to the host build directory. - -For example: - -``` -$ cmake -G Ninja -S . -B build-host -DCMAKE_BUILD_TYPE=Release -$ cmake --build build-host --target -$ cmake -G Ninja -S . -B build-target -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_TOOLCHAIN_FILE=/path/to/target-tc.cmake \ - -D_ROOT:FILEPATH=$PWD/build-host -$ cmake --build build-target -``` - -### Use a super-build - -A CMake super-build consists of breaking down a project into sub-projects that -are isolated by [toolchain][cmake-toolchains]. The basic structure is to have an -outermost project that only coordinates the sub-builds via the -[`ExternalProject`][externalproject] module. - -One would then use Halide to build a generator executable in one self-contained -project, then export that target to be used in a separate project. The second -project would be configured with the target [toolchain][cmake-toolchains] and -would call `add_halide_library` with no `TARGETS` option and set `FROM` equal to -the name of the imported generator executable. Obviously, this is a significant -increase in complexity over a typical CMake project. - -This is very compatible with the `add_halide_generator` strategy above. - -### Use `ExternalProject` directly - -A lighter weight alternative to the above is to use -[`ExternalProject`][externalproject] directly in your parent build. Configure -the parent build with the target [toolchain][cmake-toolchains], and configure -the inner project to use the host toolchain. Then, manually create an -[`IMPORTED` target][imported-executable] for your generator executable and call -`add_halide_library` as described above. - -The main drawback of this approach is that creating accurate `IMPORTED` targets -is difficult since predicting the names and locations of your binaries across -all possible platform and CMake project generators is difficult. In particular, -it is hard to predict executable extensions in cross-OS builds. - -### Use an emulator or run on device - -The [`CMAKE_CROSSCOMPILING_EMULATOR`][cmake_crosscompiling_emulator] variable -allows one to specify a command _prefix_ to run a target-system binary on the -host machine. One could set this to a custom shell script that uploads the -generator executable, runs it on the device and copies back the results. - -### Bypass CMake - -The previous two options ensure that the targets generated by -`add_halide_library` will be _normal_ static libraries. This approach does not -use [`ExternalProject`][externalproject], but instead produces `IMPORTED` -targets. The main drawback of `IMPORTED` targets is that they are considered -second-class in CMake. In particular, they cannot be installed with the typical -[`install(TARGETS)` command][install-targets]. Instead, they must be installed -using [`install(FILES)`][install-files] and the -[`$`][target-file] generator expression. - -# Contributing CMake code to Halide - -When contributing new CMake code to Halide, keep in mind that the minimum -version is 3.22. Therefore, it is possible (and indeed required) to use modern -CMake best practices. - -Like any large and complex system with a dedication to preserving backwards -compatibility, CMake is difficult to learn and full of traps. While not -comprehensive, the following serves as a guide for writing quality CMake code -and outlines the code quality expectations we have as they apply to CMake. - -## General guidelines and best practices - -The following are some common mistakes that lead to subtly broken builds. - -- **Reading the build directory.** While setting up the build, the build - directory should be considered _write only_. Using the build directory as a - read/write temporary directory is acceptable as long as all temp files are - cleaned up by the end of configuration. -- **Not using [generator expressions][cmake-genex].** Declarative is better than - imperative and this is no exception. Conditionally adding to a target property - can leak unwanted details about the build environment into packages. Some - information is not accurate or available except via generator expressions, eg. - the build configuration. -- **Using the wrong variable.** `CMAKE_SOURCE_DIR` doesn't always point to the - Halide source root. When someone uses Halide via - [`FetchContent`][fetchcontent], it will point to _their_ source root instead. - The correct variable is [`Halide_SOURCE_DIR`][project-name_source_dir]. If you - want to know if the compiler is MSVC, check it directly with the - [`MSVC`][msvc] variable; don't use [`WIN32`][win32]. That will be wrong when - compiling with clang on Windows. In most cases, however, a generator - expression will be more appropriate. -- **Using directory properties.** Directory properties have vexing behavior and - are essentially deprecated from CMake 3.0+. Propagating target properties is - the way of the future. -- **Using the wrong visibility.** Target properties can be `PRIVATE`, - `INTERFACE`, or both (aka `PUBLIC`). Pick the most conservative one for each - scenario. Refer to the [transitive usage requirements][cmake-propagation] docs - for more information. -- **Needlessly expanding variables** The [`if`][cmake_if] and - [`foreach`][cmake_foreach] commands generally expand variables when provided by - name. Expanding such variables manually can unintentionally change the behavior - of the command. Use `foreach (item IN LISTS list)` instead of - `foreach (item ${list})`. Similarly, use `if (varA STREQUAL varB)` instead of - `if ("${varA}" STREQUAL "${varB}")` and _definitely_ don't use - `if (${varA} STREQUAL ${varB})` since that will fail (in the best case) if - either variable's value contains a semi-colon (due to argument expansion). - -### Prohibited commands list - -As mentioned above, using directory properties is brittle and they are therefore -_not allowed_. The following functions may not appear in any new CMake code. - -| Command | Alternative | -|-------------------------------------|----------------------------------------------------------------------------------------------------| -| `add_compile_definitions` | Use [`target_compile_definitions`][target_compile_definitions] | -| `add_compile_options` | Use [`target_compile_options`][target_compile_options] | -| `add_definitions` | Use [`target_compile_definitions`][target_compile_definitions] | -| `add_link_options` | Use [`target_link_options`][target_link_options], but prefer not to use either | -| `get_directory_property` | Use cache variables or target properties | -| `get_property(... DIRECTORY)` | Use cache variables or target properties | -| `include_directories` | Use [`target_include_directories`][target_include_directories] | -| `link_directories` | Use [`target_link_libraries`][target_link_libraries] | -| `link_libraries` | Use [`target_link_libraries`][target_link_libraries] | -| `remove_definitions` | [Generator expressions][cmake-genex] in [`target_compile_definitions`][target_compile_definitions] | -| `set_directory_properties` | Use cache variables or target properties | -| `set_property(... DIRECTORY)` | Use cache variables or target properties | -| `target_link_libraries(target lib)` | Use [`target_link_libraries`][target_link_libraries] _with a visibility specifier_ (eg. `PRIVATE`) | - -As an example, it was once common practice to write code similar to this: - -```cmake -# WRONG: do not do this -include_directories(include) -add_library(my_lib source1.cpp ..) -``` - -However, this has two major pitfalls. First, it applies to _all_ targets created -in that directory, even those before the call to `include_directories` and those -created in [`include()`][include]-ed CMake files. As CMake files get larger and -more complex, this behavior gets harder to pinpoint. This is particularly vexing -when using the `link_libraries` or `add_defintions` commands. Second, this form -does not provide a way to _propagate_ the include directory to consumers of -`my_lib`. The correct way to do this is: - -```cmake -# CORRECT -add_library(my_lib source1.cpp ...) -target_include_directories(my_lib PUBLIC $) -``` - -This is better in many ways. It only affects the target in question. It -propagates the include path to the targets linking to it (via `PUBLIC`). It also -does not incorrectly export the host-filesystem-specific include path when -installing or packaging the target (via `$`). - -If common properties need to be grouped together, use an INTERFACE target -(better) or write a function (worse). There are also several functions that are -disallowed for other reasons: - -| Command | Reason | Alternative | -|---------------------------------|-----------------------------------------------------------------------------------|----------------------------------------------------------------------------------------| -| `aux_source_directory` | Interacts poorly with incremental builds and Git | List source files explicitly | -| `build_command` | CTest internal function | Use CTest build-and-test mode via [`CMAKE_CTEST_COMMAND`][cmake_ctest_command] | -| `cmake_host_system_information` | Usually misleading information. | Inspect [toolchain][cmake-toolchains] variables and use generator expressions. | -| `cmake_policy(... OLD)` | OLD policies are deprecated by definition. | Instead, fix the code to work with the new policy. | -| `create_test_sourcelist` | We use our own unit testing solution | See the [adding tests](#adding-tests) section. | -| `define_property` | Adds unnecessary complexity | Use a cache variable. Exceptions under special circumstances. | -| `enable_language` | Halide is C/C++ only | [`FindCUDAToolkit`][findcudatoolkit] or [`FindCUDA`][findcuda], appropriately guarded. | -| `file(GLOB ...)` | Interacts poorly with incremental builds and Git | List source files explicitly. Allowed if not globbing for source files. | -| `fltk_wrap_ui` | Halide does not use FLTK | None | -| `include_external_msproject` | Halide must remain portable | Write a CMake package config file or find module. | -| `include_guard` | Use of recursive inclusion is not allowed | Write (recursive) functions. | -| `include_regular_expression` | Changes default dependency checking behavior | None | -| `load_cache` | Superseded by [`FetchContent`][fetchcontent]/[`ExternalProject`][externalproject] | Use aforementioned modules | -| `macro` | CMake macros are not hygienic and are therefore error-prone | Use functions instead. | -| `site_name` | Privacy: do not want leak host name information | Provide a cache variable, generate a unique name. | -| `variable_watch` | Debugging helper | None. Not needed in production. | - -Lastly, do not introduce any dependencies via [`find_package`][find_package] -without broader approval. Confine dependencies to the `dependencies/` subtree. - -### Prohibited variables list - -Any variables that are specific to languages that are not enabled should, of -course, be avoided. But of greater concern are variables that are easy to misuse -or should not be overridden for our end-users. The following (non-exhaustive) -list of variables shall not be used in code merged into main. - -| Variable | Reason | Alternative | -|---------------------------------|-----------------------------------------------|---------------------------------------------------------------------------------------------------------| -| `CMAKE_ROOT` | Code smell | Rely on `find_package` search options; include `HINTS` if necessary | -| `CMAKE_DEBUG_TARGET_PROPERTIES` | Debugging helper | None | -| `CMAKE_FIND_DEBUG_MODE` | Debugging helper | None | -| `CMAKE_RULE_MESSAGES` | Debugging helper | None | -| `CMAKE_VERBOSE_MAKEFILE` | Debugging helper | None | -| `CMAKE_BACKWARDS_COMPATIBILITY` | Deprecated | None | -| `CMAKE_BUILD_TOOL` | Deprecated | `${CMAKE_COMMAND} --build` or [`CMAKE_MAKE_PROGRAM`][cmake_make_program] (but see below) | -| `CMAKE_CACHEFILE_DIR` | Deprecated | [`CMAKE_BINARY_DIR`][cmake_binary_dir], but see below | -| `CMAKE_CFG_INTDIR` | Deprecated | `$`, `$`, target resolution of [`add_custom_command`][add_custom_command], etc. | -| `CMAKE_CL_64` | Deprecated | [`CMAKE_SIZEOF_VOID_P`][cmake_sizeof_void_p] | -| `CMAKE_COMPILER_IS_*` | Deprecated | [`CMAKE__COMPILER_ID`][cmake_lang_compiler_id] | -| `CMAKE_HOME_DIRECTORY` | Deprecated | [`CMAKE_SOURCE_DIR`][cmake_source_dir], but see below | -| `CMAKE_DIRECTORY_LABELS` | Directory property | None | -| `CMAKE_BUILD_TYPE` | Only applies to single-config generators. | `$` | -| `CMAKE_*_FLAGS*` (w/o `_INIT`) | User-only | Write a [toolchain][cmake-toolchains] file with the corresponding `_INIT` variable | -| `CMAKE_COLOR_MAKEFILE` | User-only | None | -| `CMAKE_ERROR_DEPRECATED` | User-only | None | -| `CMAKE_CONFIGURATION_TYPES` | We only support the four standard build types | None | - -Of course feel free to insert debugging helpers _while developing_ but please -remove them before review. Finally, the following variables are allowed, but -their use must be motivated: - -| Variable | Reason | Alternative | -|------------------------------------------------|-----------------------------------------------------|----------------------------------------------------------------------------------------------| -| [`CMAKE_SOURCE_DIR`][cmake_source_dir] | Points to global source root, not Halide's. | [`Halide_SOURCE_DIR`][project-name_source_dir] or [`PROJECT_SOURCE_DIR`][project_source_dir] | -| [`CMAKE_BINARY_DIR`][cmake_binary_dir] | Points to global build root, not Halide's | [`Halide_BINARY_DIR`][project-name_binary_dir] or [`PROJECT_BINARY_DIR`][project_binary_dir] | -| [`CMAKE_MAKE_PROGRAM`][cmake_make_program] | CMake abstracts over differences in the build tool. | Prefer CTest's build and test mode or CMake's `--build` mode | -| [`CMAKE_CROSSCOMPILING`][cmake_crosscompiling] | Often misleading. | Inspect relevant variables directly, eg. [`CMAKE_SYSTEM_NAME`][cmake_system_name] | -| [`BUILD_SHARED_LIBS`][build_shared_libs] | Could override user setting | None, but be careful to restore value when overriding for a dependency | - -Any use of these functions and variables will block a PR. - -## Adding tests - -When adding a file to any of the folders under `test`, be aware that CI expects -that every `.c` and `.cpp` appears in the `CMakeLists.txt` file _on its own -line_, possibly as a comment. This is to avoid globbing and also to ensure that -added files are not missed. - -For most test types, it should be as simple as adding to the existing lists, -which must remain in alphabetical order. Generator tests are trickier, but -following the existing examples is a safe way to go. - -## Adding apps - -If you're contributing a new app to Halide: great! Thank you! There are a few -guidelines you should follow when writing a new app. - -- Write the app as if it were a top-level project. You should call - `find_package(Halide)` and set the C++ version to 11. -- Call [`enable_testing()`][enable_testing] and add a small test that runs the - app. -- Don't assume your app will have access to a GPU. Write your schedules to be - robust to varying buildbot hardware. -- Don't assume your app will be run on a specific OS, architecture, or bitness. - Write your apps to be robust (ideally efficient) on all supported platforms. -- If you rely on any additional packages, don't include them as `REQUIRED`, - instead test to see if their targets are available and, if not, call - `return()` before creating any targets. In this case, print a - `message(STATUS "[SKIP] ...")`, too. -- Look at the existing apps for examples. -- Test your app with ctest before opening a PR. Apps are built as part of the - test, rather than the main build. - -[add_custom_command]: - https://cmake.org/cmake/help/latest/command/add_custom_command.html -[add_library]: https://cmake.org/cmake/help/latest/command/add_library.html -[add_subdirectory]: - https://cmake.org/cmake/help/latest/command/add_subdirectory.html -[atlas]: http://math-atlas.sourceforge.net/ -[brew-cmake]: https://formulae.brew.sh/cask/cmake#default -[build_shared_libs]: - https://cmake.org/cmake/help/latest/variable/BUILD_SHARED_LIBS.html -[choco-cmake]: https://chocolatey.org/packages/cmake/ -[choco-doxygen]: https://chocolatey.org/packages/doxygen.install -[choco-ninja]: https://chocolatey.org/packages/ninja -[chocolatey]: https://chocolatey.org/ -[cmake-apt]: https://apt.kitware.com/ -[cmake-discourse]: https://discourse.cmake.org/ -[cmake-docs]: https://cmake.org/cmake/help/latest/ -[cmake-download]: https://cmake.org/download/ -[cmake-from-source]: https://cmake.org/install/ -[cmake-genex]: - https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html -[cmake-install]: - https://cmake.org/cmake/help/latest/manual/cmake.1.html#install-a-project -[cmake-propagation]: - https://cmake.org/cmake/help/latest/manual/cmake-buildsystem.7.html#transitive-usage-requirements -[cmake-toolchains]: - https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html -[cmake-user-interaction]: - https://cmake.org/cmake/help/latest/guide/user-interaction/index.html#setting-build-variables -[cmake_binary_dir]: - https://cmake.org/cmake/help/latest/variable/CMAKE_BINARY_DIR.html -[cmake_build_type]: - https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html -[cmake_crosscompiling]: - https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html -[cmake_crosscompiling_emulator]: - https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING_EMULATOR.html -[cmake_ctest_command]: - https://cmake.org/cmake/help/latest/variable/CMAKE_CTEST_COMMAND.html -[cmake_current_binary_dir]: - https://cmake.org/cmake/help/latest/variable/CMAKE_CURRENT_BINARY_DIR.html -[cmake_cxx_extensions]: - https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_EXTENSIONS.html -[cmake_cxx_standard]: - https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD.html -[cmake_cxx_standard_required]: - https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD_REQUIRED.html -[cmake_foreach]: - https://cmake.org/cmake/help/latest/command/foreach.html -[cmake_if]: - https://cmake.org/cmake/help/latest/command/if.html -[cmake_lang_compiler_id]: - https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER_ID.html -[cmake_make_program]: - https://cmake.org/cmake/help/latest/variable/CMAKE_MAKE_PROGRAM.html -[cmake_minimum_required]: - https://cmake.org/cmake/help/latest/command/cmake_minimum_required.html -[cmake_prefix_path]: - https://cmake.org/cmake/help/latest/variable/CMAKE_PREFIX_PATH.html -[cmake_presets]: - https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html -[cmake_sizeof_void_p]: - https://cmake.org/cmake/help/latest/variable/CMAKE_SIZEOF_VOID_P.html -[cmake_source_dir]: - https://cmake.org/cmake/help/latest/variable/CMAKE_SOURCE_DIR.html -[cmake_system_name]: - https://cmake.org/cmake/help/latest/variable/CMAKE_SYSTEM_NAME.html -[doxygen-download]: https://www.doxygen.nl/download.html -[doxygen]: https://www.doxygen.nl/index.html -[eigen]: http://eigen.tuxfamily.org/index.php?title=Main_Page -[enable_testing]: - https://cmake.org/cmake/help/latest/command/enable_testing.html -[externalproject]: - https://cmake.org/cmake/help/latest/module/ExternalProject.html -[fetchcontent]: https://cmake.org/cmake/help/latest/module/FetchContent.html -[find_package]: https://cmake.org/cmake/help/latest/command/find_package.html -[findcuda]: https://cmake.org/cmake/help/latest/module/FindCUDA.html -[findcudatoolkit]: - https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html -[finddoxygen]: https://cmake.org/cmake/help/latest/module/FindDoxygen.html -[findjpeg]: https://cmake.org/cmake/help/latest/module/FindJPEG.html -[findopencl]: https://cmake.org/cmake/help/latest/module/FindOpenCL.html -[findpng]: https://cmake.org/cmake/help/latest/module/FindPNG.html -[findpython3]: https://cmake.org/cmake/help/latest/module/FindPython3.html -[findx11]: https://cmake.org/cmake/help/latest/module/FindX11.html -[halide-generator-tutorial]: - https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html -[halide-tutorials]: https://halide-lang.org/tutorials/tutorial_introduction.html -[homebrew]: https://brew.sh -[imported-executable]: - https://cmake.org/cmake/help/latest/command/add_executable.html#imported-executables -[imported-target]: - https://cmake.org/cmake/help/latest/manual/cmake-buildsystem.7.html#imported-targets -[include]: https://cmake.org/cmake/help/latest/command/include.html -[install-files]: https://cmake.org/cmake/help/latest/command/install.html#files -[install-targets]: - https://cmake.org/cmake/help/latest/command/install.html#targets -[libjpeg]: https://www.libjpeg-turbo.org/ -[libpng]: http://www.libpng.org/pub/png/libpng.html -[lld]: https://lld.llvm.org/ -[msvc]: https://cmake.org/cmake/help/latest/variable/MSVC.html -[msvc-cmd]: - https://docs.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=vs-2019 -[ninja-download]: https://github.com/ninja-build/ninja/releases -[ninja]: https://ninja-build.org/ -[openblas]: https://www.openblas.net/ -[project]: https://cmake.org/cmake/help/latest/command/project.html -[project-name_binary_dir]: - https://cmake.org/cmake/help/latest/variable/PROJECT-NAME_BINARY_DIR.html -[project-name_source_dir]: - https://cmake.org/cmake/help/latest/variable/PROJECT-NAME_SOURCE_DIR.html -[project_source_dir]: - https://cmake.org/cmake/help/latest/variable/PROJECT_SOURCE_DIR.html -[project_binary_dir]: - https://cmake.org/cmake/help/latest/variable/PROJECT_BINARY_DIR.html -[pypi-cmake]: https://pypi.org/project/cmake/ -[python]: https://www.python.org/downloads/ -[target-file]: - https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html#target-dependent-queries -[target_compile_definitions]: - https://cmake.org/cmake/help/latest/command/target_compile_definitions.html -[target_compile_options]: - https://cmake.org/cmake/help/latest/command/target_compile_options.html -[target_include_directories]: - https://cmake.org/cmake/help/latest/command/target_include_directories.html -[target_link_libraries]: - https://cmake.org/cmake/help/latest/command/target_link_libraries.html -[target_link_options]: - https://cmake.org/cmake/help/latest/command/target_link_options.html -[vcpkg]: https://github.com/Microsoft/vcpkg -[vcvarsall]: - https://docs.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=vs-2019#vcvarsall-syntax -[venv]: https://docs.python.org/3/tutorial/venv.html -[vs2019-cmake-docs]: - https://docs.microsoft.com/en-us/cpp/build/cmake-projects-in-visual-studio?view=vs-2019 -[win32]: https://cmake.org/cmake/help/latest/variable/WIN32.html diff --git a/apps/cuda_mat_mul/CMakeLists.txt b/apps/cuda_mat_mul/CMakeLists.txt index 7a9341e888c7..a64b61bf2323 100644 --- a/apps/cuda_mat_mul/CMakeLists.txt +++ b/apps/cuda_mat_mul/CMakeLists.txt @@ -29,7 +29,6 @@ add_halide_generator(mat_mul.generator SOURCES mat_mul_generator.cpp) # Filters add_halide_library(mat_mul FROM mat_mul.generator - TARGETS host FEATURES cuda cuda_capability_50 PARAMS size=1024) diff --git a/apps/hannk/cmake/superbuild/CMakeLists.txt b/apps/hannk/cmake/superbuild/CMakeLists.txt index 7877eb00c565..5f07e4ebb21a 100644 --- a/apps/hannk/cmake/superbuild/CMakeLists.txt +++ b/apps/hannk/cmake/superbuild/CMakeLists.txt @@ -46,11 +46,8 @@ set(ep_opts SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." BUILD_ALWAYS YES USES_TERMINAL_CONFIGURE YES - USES_TERMINAL_BUILD YES) - -if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.20) - list(APPEND ep_opts CONFIGURE_HANDLED_BY_BUILD TRUE) -endif () + USES_TERMINAL_BUILD YES + CONFIGURE_HANDLED_BY_BUILD TRUE) ## # Define host and target builds diff --git a/cmake/BundleStatic.cmake b/cmake/BundleStatic.cmake index 299666a75707..0357e584d011 100644 --- a/cmake/BundleStatic.cmake +++ b/cmake/BundleStatic.cmake @@ -22,12 +22,10 @@ function(_bundle_static_check_output VAR) set("${VAR}" "${${VAR}}" PARENT_SCOPE) endfunction() -function(_bundle_static_is_apple_libtool result item) +function(_bundle_static_is_apple_libtool result_var item) _bundle_static_check_output(version_info "${item}" -V) - if (version_info MATCHES "Apple, Inc.") - set(result 1 PARENT_SCOPE) - else () - set(result 0 PARENT_SCOPE) + if (NOT version_info MATCHES "Apple, Inc.") + set(${result_var} 0 PARENT_SCOPE) endif () endfunction() diff --git a/cmake/HalideGeneratorHelpers.cmake b/cmake/HalideGeneratorHelpers.cmake index ffa69377e15b..dcf802ca162f 100644 --- a/cmake/HalideGeneratorHelpers.cmake +++ b/cmake/HalideGeneratorHelpers.cmake @@ -57,7 +57,7 @@ function(add_halide_generator TARGET) set(ARG_SOURCES "${ARG_UNPARSED_ARGUMENTS}") endif () - _Halide_try_load_generators() + _Halide_try_load_generators("${ARG_PACKAGE_NAME}") # Communicate found information to the caller set(${ARG_PACKAGE_NAME}_FOUND "${${ARG_PACKAGE_NAME}_FOUND}" PARENT_SCOPE) @@ -96,6 +96,8 @@ function(add_halide_generator TARGET) add_executable(${gen} ALIAS ${TARGET}) target_link_libraries(${TARGET} PRIVATE Halide::Generator ${ARG_LINK_LIBRARIES}) + _Halide_place_dll(${TARGET}) + if (NOT ARG_NO_DEFAULT_FLAGS AND NOT Halide_NO_DEFAULT_FLAGS) # For crosscompiling builds, the Halide headers will be included using -isystem, # which will cause all warnings to be ignored. This is not helpful, since @@ -127,11 +129,9 @@ function(add_halide_generator TARGET) "HALIDE_GENERATOR_PYSTUB(${GEN_NAME}, ${MODULE_NAME})\n") set(stub_file "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${GEN_NAME}.${MODULE_NAME}.py_stub_generated.cpp") - if (NOT EXISTS "${stub_file}") - file(WRITE "${stub_file}" "${stub_text}") - endif () + file(CONFIGURE OUTPUT "${stub_file}" CONTENT "${stub_text}" @ONLY) - Python3_add_library(${TARGET}_pystub MODULE WITH_SOABI "${stub_file}" ${ARG_SOURCES}) + Python_add_library(${TARGET}_pystub MODULE WITH_SOABI "${stub_file}" ${ARG_SOURCES}) set_target_properties(${TARGET}_pystub PROPERTIES CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN ON @@ -142,32 +142,289 @@ function(add_halide_generator TARGET) endif () endfunction() -# NOTE: this function must only be called by add_halide_generator -# since it reads from its scope. -function(_Halide_try_load_generators) +function(_Halide_try_load_generators package_name) # Don't repeatedly run the search for the tools package. - if (NOT DEFINED ${ARG_PACKAGE_NAME}_FOUND) + if (NOT DEFINED ${package_name}_FOUND) # Some toolchains, like Emscripten, try to disable finding packages # outside their sysroots, but we always want to find the native # generators. Setting CMAKE_FIND_ROOT_PATH_BOTH here overrides # the toolchain search preference. This is okay since a user can - # always override this call by setting ${ARG_PACKAGE_NAME}_ROOT. - find_package(${ARG_PACKAGE_NAME} QUIET - CMAKE_FIND_ROOT_PATH_BOTH) + # always override this call by setting ${package_name}_ROOT. + find_package(${package_name} QUIET CMAKE_FIND_ROOT_PATH_BOTH) # Communicate found information to the caller - set(${ARG_PACKAGE_NAME}_FOUND "${${ARG_PACKAGE_NAME}_FOUND}" PARENT_SCOPE) + set(${package_name}_FOUND "${${package_name}_FOUND}" PARENT_SCOPE) - if (NOT ${ARG_PACKAGE_NAME}_FOUND AND CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR) + if (NOT ${package_name}_FOUND AND CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR) message(WARNING - "'${ARG_PACKAGE_NAME}' was not found and it looks like you " + "'${package_name}' was not found and it looks like you " "are cross-compiling without an emulator. This is likely to " - "fail. Please set -D${ARG_PACKAGE_NAME}_ROOT=... at the CMake " + "fail. Please set -D${package_name}_ROOT=... at the CMake " "command line to the build directory of a host-built ${PROJECT_NAME}.") endif () endif () endfunction() +function(_Halide_library_from_generator TARGET) + cmake_parse_arguments( + PARSE_ARGV 1 ARG "" + "FUNCTION_NAME;GENERATOR;GRADIENT_DESCENT;TYPE;USE_RUNTIME" + "COMMAND;DEPENDS;EXTRA_OUTPUTS;PARAMS;PLUGINS;TARGETS" + ) + + ## "hash table" of extra outputs to extensions + set(assembly_extension ".s") + set(bitcode_extension ".bc") + set(c_source_extension ".halide_generated.cpp") + set(compiler_log_extension ".halide_compiler_log") + set(featurization_extension ".featurization") + set(function_info_header_extension ".function_info.h") + set(hlpipe_extension ".hlpipe") + set(llvm_assembly_extension ".ll") + set(python_extension_extension ".py.cpp") + set(pytorch_wrapper_extension ".pytorch.h") + set(registration_extension ".registration.cpp") + set(schedule_extension ".schedule.h") + set(stmt_extension ".stmt") + set(stmt_html_extension ".stmt.html") + + ## Validate TYPE + if (NOT ARG_TYPE MATCHES "^(c_source|static_library|object)$") + message(FATAL_ERROR "`${ARG_TYPE}` not among: c_source, object, static_library") + endif () + + ## Validate plugins + foreach (plugin IN LISTS ARG_PLUGINS) + if (NOT TARGET "${plugin}") + message(FATAL_ERROR "Plugin `${plugin}` is not a target.") + endif () + endforeach () + + ## Always omit the runtime + list(TRANSFORM ARG_TARGETS APPEND "-no_runtime") + + ## Resolve plugins + if (ARG_PLUGINS) + list(TRANSFORM ARG_PLUGINS REPLACE "(.+)" "$" OUTPUT_VARIABLE plugins_args) + list(JOIN plugins_args "," plugins_args) + list(PREPEND plugins_args -p) + else () + set(plugins_args "") + endif () + + ## Gather platform information + _Halide_get_platform_extensions( + object_extension + static_library_extension + "${ARG_TARGETS}" + ) + + macro(_Halide_add_output type base_name) + list(APPEND outputs "${type}") + list(APPEND output_files "${base_name}${${type}_extension}") + endmacro() + + ## Check the type to determine outputs + set(outputs c_header) + set(output_files "${TARGET}.h") + + list(LENGTH ARG_TARGETS num_targets) + if (ARG_TYPE STREQUAL "object" AND num_targets GREATER 1) + foreach (t IN LISTS ARG_TARGETS) + _Halide_add_output("${ARG_TYPE}" "${TARGET}-${t}") + endforeach () + _Halide_add_output("${ARG_TYPE}" "${TARGET}_wrapper") + else () + # c_source, static_library, or object with one target + _Halide_add_output("${ARG_TYPE}" "${TARGET}") + endif () + + foreach (output IN LISTS ARG_EXTRA_OUTPUTS) + if (output STREQUAL "c_source" AND ARG_TYPE STREQUAL "c_source") + # Skip this as it was handled above + else () + _Halide_add_output("${output}" "${TARGET}") + endif () + endforeach () + + ## Run the generator + add_custom_command( + OUTPUT ${output_files} + COMMAND ${ARG_COMMAND} + -n "${TARGET}" + -d "$" + -g "${ARG_GENERATOR}" + -f "${ARG_FUNCTION_NAME}" + -e "$,$>>" + ${plugins_args} + -o . + "target=$>" + ${ARG_PARAMS} + DEPENDS ${ARG_DEPENDS} ${ARG_PLUGINS} + VERBATIM + ) + + ## Populate output variables + list(TRANSFORM output_files PREPEND "${CMAKE_CURRENT_BINARY_DIR}/") + + foreach (out IN LISTS outputs) + set("local_out_${out}" "") + endforeach () + + foreach (out file IN ZIP_LISTS outputs output_files) + list(APPEND "local_out_${out}" "${file}") + list(APPEND "OUT_${out}" "${file}") + endforeach () + + foreach (out IN LISTS outputs) + set("OUT_${out}" "${OUT_${out}}" PARENT_SCOPE) + endforeach () + + # Create the filter's library target + if (ARG_TYPE STREQUAL "static_library") + add_library("${TARGET}" STATIC IMPORTED GLOBAL) + set_target_properties("${TARGET}" PROPERTIES IMPORTED_LOCATION "${local_out_${ARG_TYPE}}") + else () + add_library("${TARGET}" STATIC ${local_out_${ARG_TYPE}}) + set_property(TARGET "${TARGET}" PROPERTY POSITION_INDEPENDENT_CODE ON) + set_property(TARGET "${TARGET}" PROPERTY LINKER_LANGUAGE CXX) + + if (NOT Halide_NO_DEFAULT_FLAGS) + # Silence many useless warnings in generated C++ code compilation + target_compile_options( + "${TARGET}" PRIVATE $<$:-Wno-psabi> + ) + endif () + _Halide_fix_xcode("${TARGET}") + endif () + + add_custom_target("${TARGET}.update" DEPENDS ${output_files}) + add_dependencies("${TARGET}" "${TARGET}.update") + + target_link_libraries("${TARGET}" INTERFACE "${ARG_USE_RUNTIME}") + add_dependencies("${TARGET}" "${ARG_USE_RUNTIME}") + + if (NOT ARG_TYPE STREQUAL "c_source") + _Halide_add_targets_to_runtime("${ARG_USE_RUNTIME}" TARGETS ${ARG_TARGETS}) + endif () + + target_sources("${TARGET}" INTERFACE + FILE_SET HEADERS + BASE_DIRS "${CMAKE_CURRENT_BINARY_DIR}" + FILES "${local_out_c_header}") +endfunction() + +function(_Halide_lipo) + cmake_parse_arguments(PARSE_ARGV 0 ARG "OVERWRITE" "TARGET" "INPUTS") + + set(merged_libs ${ARG_INPUTS}) + + list(TRANSFORM merged_libs REPLACE "^(.+)$" "$" + OUTPUT_VARIABLE merged_libs_files) + + if (ARG_OVERWRITE) + list(APPEND merged_libs_files "$") + endif () + + find_program(LIPO lipo REQUIRED) + add_custom_command( + TARGET "${ARG_TARGET}" POST_BUILD + COMMAND "${LIPO}" -create ${merged_libs_files} -output "$" + VERBATIM + ) + + list(TRANSFORM merged_libs REPLACE "^(.+)$" "$" + OUTPUT_VARIABLE merged_libs_targets) + target_link_libraries("${ARG_TARGET}" INTERFACE ${merged_libs_targets}) + + ## Visual Studio and Xcode lack any way to specify that the link step of + # building a target has dependencies outside the object files that go inside + # the target. CMake models this with LINK_DEPENDS, which only has an effect + # on the Ninja and Makefile generators. However, it does not consider static + # libraries to have a link step! So instead, we create a dummy file and use + # OBJECT_DEPENDS (subject to the same generator restrictions). + + if (NOT CMAKE_GENERATOR MATCHES "Ninja|Make") + message( + WARNING + "Generator ${CMAKE_GENERATOR} does not support OBJECT_DEPENDS! " + "${ARG_TARGET} will not be rebuilt accurately in incremental builds." + ) + endif () + + set(sources "") + foreach (lib IN LISTS merged_libs) + get_property(lib_sources TARGET "${lib}" PROPERTY SOURCES) + foreach (source IN LISTS lib_sources) + cmake_path(ABSOLUTE_PATH source BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + list(APPEND sources ${source}) + endforeach () + endforeach () + + string(MAKE_C_IDENTIFIER "Halide_lipo_dummy_symbol_${ARG_TARGET}" dummy_symbol) + set(dep_src "${CMAKE_CURRENT_BINARY_DIR}/${dummy_symbol}.cpp") + file(CONFIGURE OUTPUT "${dep_src}" CONTENT [[ + /* suppresses warning from ranlib */ + void @dummy_symbol@() {} + ]] @ONLY) + target_sources("${ARG_TARGET}" PRIVATE "${dep_src}") + set_source_files_properties("${dep_src}" PROPERTIES OBJECT_DEPENDS "${sources}") +endfunction() + +function(_Halide_compute_generator_cmd) + cmake_parse_arguments(PARSE_ARGV 0 ARG "" "FROM;OUT_COMMAND;OUT_DEPENDS" "") + + if (NOT ARG_FROM) + message(FATAL_ERROR "Missing FROM argument specifying a Halide generator target") + endif () + + if (NOT TARGET "${ARG_FROM}") + # FROM is usually an unqualified name; if we are crosscompiling, we might need a + # fully-qualified name, so add the default package name and retry + set(FQ_ARG_FROM "${PROJECT_NAME}::halide_generators::${ARG_FROM}") + if (NOT TARGET "${FQ_ARG_FROM}") + message(FATAL_ERROR "Unable to locate FROM as either ${ARG_FROM} or ${FQ_ARG_FROM}") + endif () + + set(ARG_FROM "${FQ_ARG_FROM}") + endif () + + get_property(py_src TARGET "${ARG_FROM}" PROPERTY Halide_PYTHON_GENERATOR_SOURCE) + if (NOT py_src) + set("${ARG_OUT_COMMAND}" "${ARG_FROM}" PARENT_SCOPE) + set("${ARG_OUT_DEPENDS}" "${ARG_FROM}" PARENT_SCOPE) + return() + endif () + + # TODO: Python Generators need work to support crosscompiling (https://github.com/halide/Halide/issues/7014) + if (NOT TARGET Halide::Python) + message(FATAL_ERROR "Missing Halide::Python. Load the Python component " + "in find_package() or set WITH_PYTHON_BINDINGS=ON if in tree.") + endif () + + if (NOT TARGET Python::Interpreter) + message(FATAL_ERROR "Missing Python::Interpreter. Missing call to find_package(Python 3)?") + endif () + + set("${ARG_OUT_COMMAND}" + ${CMAKE_COMMAND} -E env "PYTHONPATH=$/..>" -- + ${Halide_PYTHON_LAUNCHER} "$" $ + PARENT_SCOPE) + set("${ARG_OUT_DEPENDS}" ${ARG_FROM} Halide::Python ${py_src} PARENT_SCOPE) +endfunction() + +function(_Halide_set_osx_arch TARGET TRIPLE) + if (APPLE) + if (TRIPLE STREQUAL "arm-64-osx") + set_property(TARGET "${TARGET}" PROPERTY OSX_ARCHITECTURES "arm64") + elseif (TRIPLE STREQUAL "x86-64-osx") + set_property(TARGET "${TARGET}" PROPERTY OSX_ARCHITECTURES "x86_64") + else () + message(FATAL_ERROR "Could not set OSX_ARCHITECTURES for ${TRIPLE}") + endif () + endif () +endfunction() + ## # Function to simplify writing the CMake rules for invoking a generator executable # and getting a usable CMake library out of it. @@ -188,8 +445,10 @@ function(add_halide_library TARGET) ASSEMBLY BITCODE COMPILER_LOG + C_SOURCE FEATURIZATION FUNCTION_INFO_HEADER + HLPIPE LLVM_ASSEMBLY PYTHON_EXTENSION PYTORCH_WRAPPER @@ -198,88 +457,32 @@ function(add_halide_library TARGET) STMT STMT_HTML) - # "hash table" of extra outputs to extensions - set(ASSEMBLY_extension ".s") - set(BITCODE_extension ".bc") - set(COMPILER_LOG_extension ".halide_compiler_log") - set(FEATURIZATION_extension ".featurization") - set(FUNCTION_INFO_HEADER_extension ".function_info.h") - set(LLVM_ASSEMBLY_extension ".ll") - set(PYTHON_EXTENSION_extension ".py.cpp") - set(PYTORCH_WRAPPER_extension ".pytorch.h") - set(REGISTRATION_extension ".registration.cpp") - set(SCHEDULE_extension ".schedule.h") - set(STMT_extension ".stmt") - set(STMT_HTML_extension ".stmt.html") - ## # Parse the arguments and set defaults for missing values. ## + set(features_args FEATURES) + foreach (arch IN ITEMS x86 arm powerpc hexagon wasm riscv) + foreach (bits IN ITEMS 32 64) + foreach (os IN ITEMS linux windows osx android ios qurt noos fuchsia wasmrt) + list(APPEND features_args "FEATURES[${arch}-${bits}-${os}]") + endforeach () + endforeach () + endforeach () + set(options C_BACKEND GRADIENT_DESCENT) set(oneValueArgs FROM GENERATOR FUNCTION_NAME NAMESPACE USE_RUNTIME AUTOSCHEDULER HEADER ${extra_output_names} NO_THREADS NO_DL_LIBS) - set(multiValueArgs TARGETS FEATURES PARAMS PLUGINS) + set(multiValueArgs TARGETS PARAMS PLUGINS ${features_args}) cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if (NOT "${ARG_UNPARSED_ARGUMENTS}" STREQUAL "") message(AUTHOR_WARNING "Arguments to add_halide_library were not recognized: ${ARG_UNPARSED_ARGUMENTS}") endif () - if (NOT ARG_FROM) - message(FATAL_ERROR "Missing FROM argument specifying a Halide generator target") - endif () - - if (NOT TARGET ${ARG_FROM}) - # FROM is usually an unqualified name; if we are crosscompiling, we might need a - # fully-qualified name, so add the default package name and retry - set(FQ_ARG_FROM "${PROJECT_NAME}::halide_generators::${ARG_FROM}") - if (NOT TARGET ${FQ_ARG_FROM}) - message(FATAL_ERROR "Unable to locate FROM as either ${ARG_FROM} or ${FQ_ARG_FROM}") - endif () - set(ARG_FROM "${FQ_ARG_FROM}") - endif () - - get_property(py_src TARGET ${ARG_FROM} PROPERTY Halide_PYTHON_GENERATOR_SOURCE) - if (py_src) - # TODO: Python Generators need work to support crosscompiling (https://github.com/halide/Halide/issues/7014) - if (NOT TARGET Halide::Python) - message(FATAL_ERROR "This version of Halide was built without support for Python bindings; rebuild using WITH_PYTHON_BINDINGS=ON to use this rule with Python Generators.") - endif () - - if (NOT TARGET Python3::Interpreter) - message(FATAL_ERROR "You must call find_package(Python3) in your CMake code in order to use this rule with Python Generators.") - endif () - - if (CMAKE_VERSION VERSION_LESS 3.24) - set(arg_sep "") - else () - set(arg_sep "--") - endif () - - set( - GENERATOR_CMD - ${CMAKE_COMMAND} -E env "PYTHONPATH=$/.." ${arg_sep} - ${Halide_PYTHON_LAUNCHER} - "$" $ - ) - set(GENERATOR_CMD_DEPS ${ARG_FROM} Halide::Python ${py_src}) - else () - set(GENERATOR_CMD "${ARG_FROM}") - set(GENERATOR_CMD_DEPS ${ARG_FROM}) - _Halide_place_dll(${ARG_FROM}) - endif () - - if (ARG_C_BACKEND) - if (ARG_USE_RUNTIME) - message(AUTHOR_WARNING "The C backend does not use a runtime.") - endif () - if (ARG_TARGETS) - message(AUTHOR_WARNING "The C backend sources will be compiled with the current CMake toolchain.") - endif () + if (ARG_C_BACKEND AND ARG_TARGETS) + message(AUTHOR_WARNING "The C backend sources will be compiled with the current CMake toolchain.") endif () - set(gradient_descent "$") - if (NOT ARG_GENERATOR) set(ARG_GENERATOR "${TARGET}") endif () @@ -308,15 +511,18 @@ function(add_halide_library TARGET) "The default 'host' target ${Halide_HOST_TARGET} differs from the active CMake " "target ${Halide_CMAKE_TARGET}. Using ${Halide_CMAKE_TARGET} to compile ${TARGET}. " "This might result in performance degradation from missing arch flags (eg. avx).") - set(ARG_TARGETS "${Halide_CMAKE_TARGET}") + set(ARG_TARGETS cmake) endif () endif () - list(TRANSFORM ARG_TARGETS REPLACE "cmake" "${Halide_CMAKE_TARGET}") + _Halide_validate_multitarget(common_triple ${ARG_TARGETS}) - list(APPEND ARG_FEATURES no_runtime) - list(JOIN ARG_FEATURES "-" ARG_FEATURES) - list(TRANSFORM ARG_TARGETS APPEND "-${ARG_FEATURES}") + _Halide_is_crosscompiling(is_crosscompiling "${common_triple}") + _Halide_get_platform_extensions( + object_extension + static_library_extension + "${common_triple}" + ) ## # Set up the runtime library, if needed @@ -324,19 +530,26 @@ function(add_halide_library TARGET) if (ARG_C_BACKEND) # The C backend does not provide a runtime, so just supply headers. + if (ARG_USE_RUNTIME) + message(AUTHOR_WARNING "The C backend does not use a runtime.") + endif () set(ARG_USE_RUNTIME Halide::Runtime) elseif (NOT ARG_USE_RUNTIME) # If we're not using an existing runtime, create one. - - # To forward NO_THREADS/NO_DL_LIBS args to add_halide_runtime() + set(runtime_args "") if (DEFINED ARG_NO_THREADS) - set(CALL_ARG_NO_THREADS NO_THREADS ${ARG_NO_THREADS}) + list(APPEND runtime_args NO_THREADS "${ARG_NO_THREADS}") endif () if (DEFINED ARG_NO_DL_LIBS) - set(CALL_ARG_NO_DL_LIBS NO_DL_LIBS ${ARG_NO_DL_LIBS}) + list(APPEND runtime_args NO_DL_LIBS "${ARG_NO_DL_LIBS}") endif () - add_halide_runtime("${TARGET}.runtime" TARGETS ${ARG_TARGETS} FROM ${ARG_FROM} ${CALL_ARG_NO_THREADS} ${CALL_ARG_NO_DL_LIBS}) + add_halide_runtime( + "${TARGET}.runtime" + NO_DEFAULT_TARGETS TARGETS ${ARG_TARGETS} + ${runtime_args} + ) + set(ARG_USE_RUNTIME "${TARGET}.runtime") elseif (NOT TARGET ${ARG_USE_RUNTIME}) message(FATAL_ERROR "Invalid runtime target ${ARG_USE_RUNTIME}") @@ -348,49 +561,20 @@ function(add_halide_library TARGET) # Determine which outputs the generator call will emit. ## - _Halide_get_platform_details( - is_crosscompiling - object_suffix - static_library_suffix - ${ARG_TARGETS}) - - # Always emit a C header - set(generator_outputs c_header) - set(generator_output_files "${TARGET}.h") - if (ARG_HEADER) - set(${ARG_HEADER} "${TARGET}.h" PARENT_SCOPE) - endif () - - # Then either a C source, a set of object files, or a cross-compiled static library. if (ARG_C_BACKEND) - list(APPEND generator_outputs c_source) - set(generator_sources "${TARGET}.halide_generated.cpp") + set(library_type c_source) elseif (is_crosscompiling) - # When cross-compiling, we need to use a static, imported library - list(APPEND generator_outputs static_library) - set(generator_sources "${TARGET}${static_library_suffix}") + set(library_type static_library) else () - # When compiling for the current CMake toolchain, create a native - list(APPEND generator_outputs object) - list(LENGTH ARG_TARGETS len) - if (len EQUAL 1) - set(generator_sources "${TARGET}${object_suffix}") - else () - set(generator_sources ${ARG_TARGETS}) - list(TRANSFORM generator_sources PREPEND "${TARGET}-") - list(TRANSFORM generator_sources APPEND "${object_suffix}") - list(APPEND generator_sources "${TARGET}_wrapper${object_suffix}") - endif () + set(library_type object) endif () - list(APPEND generator_output_files ${generator_sources}) # Add in extra outputs using the table defined at the start of this function + set(extra_outputs "") foreach (out IN LISTS extra_output_names) if (ARG_${out}) - set(${ARG_${out}} "${TARGET}${${out}_extension}" PARENT_SCOPE) - list(APPEND generator_output_files "${TARGET}${${out}_extension}") string(TOLOWER "${out}" out) - list(APPEND generator_outputs ${out}) + list(APPEND extra_outputs ${out}) endif () endforeach () @@ -418,63 +602,140 @@ function(add_halide_library TARGET) # Main library target for filter. ## - if (is_crosscompiling) - add_library("${TARGET}" STATIC IMPORTED GLOBAL) - set_target_properties("${TARGET}" PROPERTIES - IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/${generator_sources}") - else () - add_library("${TARGET}" STATIC ${generator_sources}) - set_target_properties("${TARGET}" PROPERTIES - POSITION_INDEPENDENT_CODE ON - LINKER_LANGUAGE CXX) - if (NOT Halide_NO_DEFAULT_FLAGS) - # Silence many useless warnings in generated C++ code compilation - target_compile_options( - "${TARGET}" PRIVATE - $<$:-Wno-psabi>) - endif () - _Halide_fix_xcode("${TARGET}") - endif () + _Halide_compute_generator_cmd( + FROM "${ARG_FROM}" + OUT_COMMAND generator_cmd + OUT_DEPENDS generator_cmd_deps + ) - # Load the plugins and setup dependencies - set(generator_plugins "") - if (ARG_PLUGINS) - foreach (p IN LISTS ARG_PLUGINS) - list(APPEND generator_plugins "$") - endforeach () - # $ gets confused about quoting. Just use list(JOIN) here instead. - list(JOIN generator_plugins $ generator_plugins_list) - set(generator_plugins -p ${generator_plugins_list}) - endif () - - add_custom_command(OUTPUT ${generator_output_files} - COMMAND ${GENERATOR_CMD} - -n "${TARGET}" - -d "${gradient_descent}" - -g "${ARG_GENERATOR}" - -f "${ARG_FUNCTION_NAME}" - -e "$>" - ${generator_plugins} - -o . - "target=$>" - ${ARG_PARAMS} - DEPENDS ${GENERATOR_CMD_DEPS} ${ARG_PLUGINS} - VERBATIM) + set(generator_args + COMMAND ${generator_cmd} + DEPENDS ${generator_cmd_deps} + EXTRA_OUTPUTS ${extra_outputs} + FUNCTION_NAME "${ARG_FUNCTION_NAME}" + GENERATOR "${ARG_GENERATOR}" + GRADIENT_DESCENT "${ARG_GRADIENT_DESCENT}" + PARAMS ${ARG_PARAMS} + PLUGINS ${ARG_PLUGINS} + TYPE "${library_type}" + USE_RUNTIME "${ARG_USE_RUNTIME}" + ) - list(TRANSFORM generator_output_files PREPEND "${CMAKE_CURRENT_BINARY_DIR}/") - add_custom_target("${TARGET}.update" ALL DEPENDS ${generator_output_files}) + list(JOIN ARG_FEATURES "-" ARG_FEATURES) - add_dependencies("${TARGET}" "${TARGET}.update") + # Clear output lists + foreach (output IN LISTS extra_outputs) + set(OUT_${output} "") + endforeach () - target_include_directories("${TARGET}" INTERFACE "$") - target_link_libraries("${TARGET}" INTERFACE "${ARG_USE_RUNTIME}") + list(LENGTH Halide_CMAKE_TARGET num_platforms) + if (common_triple STREQUAL "cmake" AND num_platforms GREATER 1) + set(merged_base "") + set(merged_libs "") + + foreach (triple IN LISTS Halide_CMAKE_TARGET) + set(features_arch "ARG_FEATURES[${triple}]") + set(features_arch "${${features_arch}}") + if (features_arch) + list(TRANSFORM features_arch PREPEND "${triple}-" + OUTPUT_VARIABLE targets_arch) + else () + set(targets_arch "${triple}") + endif () + + list(TRANSFORM targets_arch APPEND "-${ARG_FEATURES}") + list(TRANSFORM targets_arch REPLACE "-$" "") + + if (NOT merged_base AND NOT ARG_C_BACKEND) + set(this_lib "${TARGET}") + set(merged_base "${this_lib}") + else () + set(this_lib "${TARGET}-${triple}") + list(APPEND merged_libs "${this_lib}") + endif () + + # Appends to OUT_c_header, OUT_, etc. + _Halide_library_from_generator( + "${this_lib}" ${generator_args} TARGETS ${targets_arch}) + _Halide_set_osx_arch("${this_lib}" "${triple}") + endforeach () + + if (ARG_C_BACKEND) + add_library("${TARGET}" STATIC) + _Halide_lipo(TARGET "${TARGET}" INPUTS ${merged_libs}) + else () + _Halide_lipo(TARGET "${merged_base}" INPUTS ${merged_libs} OVERWRITE) + endif () + else () + list(TRANSFORM ARG_TARGETS REPLACE "cmake" "${Halide_CMAKE_TARGET}") + if (ARG_FEATURES) + list(TRANSFORM ARG_TARGETS APPEND "-${ARG_FEATURES}") + endif () + + # Appends to OUT_c_header, OUT_, etc. + _Halide_library_from_generator( + "${TARGET}" ${generator_args} TARGETS ${ARG_TARGETS}) + endif () # Save some info for add_halide_python_extension_library() in case it is used for this target. set_property(TARGET "${TARGET}" PROPERTY Halide_LIBRARY_RUNTIME_TARGET "${ARG_USE_RUNTIME}") set_property(TARGET "${TARGET}" PROPERTY Halide_LIBRARY_FUNCTION_NAME "${ARG_FUNCTION_NAME}") - if ("python_extension" IN_LIST generator_outputs) - set_property(TARGET "${TARGET}" PROPERTY Halide_LIBRARY_PYTHON_EXTENSION_CPP "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.py.cpp") + if ("python_extension" IN_LIST extra_outputs) + list(GET OUT_python_extension 0 py_ext_cpp) # These files should always be identical + set_property(TARGET "${TARGET}" PROPERTY Halide_LIBRARY_PYTHON_EXTENSION_CPP "${py_ext_cpp}") + endif () + + # Propagate outputs + if (ARG_HEADER) + set(${ARG_HEADER} "${OUT_c_header}") + endif () + + foreach (output IN LISTS extra_outputs) + string(TOUPPER "ARG_${output}" outvar_arg) + if (${outvar_arg}) + set("${${outvar_arg}}" ${OUT_${output}} PARENT_SCOPE) + endif () + endforeach () +endfunction() + +function(_Halide_validate_multitarget OUT_TRIPLE) + list(LENGTH ARGN len) + if (len LESS 1) + message(FATAL_ERROR "Must supply at least one target") + endif () + + set(triple "") + set(all_features "") + foreach (target IN LISTS ARGN) + if (target MATCHES "^(host|cmake|[^-]+-[^-]+-[^-]+)(-[^-]+)*$") + set(this_triple "${CMAKE_MATCH_1}") + list(APPEND all_features ${CMAKE_MATCH_2}) + if (NOT triple) + set(triple "${this_triple}") + elseif (NOT this_triple STREQUAL triple) + message(FATAL_ERROR "Multi-target entry `${target}` does not match earlier triple `${triple}`") + endif () + else () + message(FATAL_ERROR "TARGET `${target}` is malformed") + endif () + endforeach () + + list(LENGTH Halide_CMAKE_TARGET num_platforms) + if (num_platforms GREATER 1) + if (NOT all_features STREQUAL "") + message( + FATAL_ERROR + "Multiarch builds cannot include features in the target list. " + "Use FEATURES[arch] instead. " + "Halide_CMAKE_TARGET=${Halide_CMAKE_TARGET} and saw TARGETS ${ARGN}." + ) + endif () + if (triple STREQUAL "host" AND "${Halide_HOST_TARGET}" IN_LIST Halide_CMAKE_TARGET) + set(triple "cmake") + endif () endif () + + set(${OUT_TRIPLE} "${triple}" PARENT_SCOPE) endfunction() function(add_halide_python_extension_library TARGET) @@ -528,13 +789,13 @@ function(add_halide_python_extension_library TARGET) set(pyext_runtime_name ${TARGET}_module_definition) set(pyext_module_definition_src "${CMAKE_CURRENT_BINARY_DIR}/${pyext_runtime_name}.py.cpp") - _Halide_gengen_ensure() + add_custom_command(OUTPUT ${pyext_module_definition_src} - COMMAND _Halide_gengen -r "${pyext_runtime_name}" -e python_extension -o "${CMAKE_CURRENT_BINARY_DIR}" target=host - DEPENDS _Halide_gengen + COMMAND Halide::GenRT -r "${pyext_runtime_name}" -e python_extension -o "${CMAKE_CURRENT_BINARY_DIR}" target=host + DEPENDS Halide::GenRT VERBATIM) - Python3_add_library(${TARGET} MODULE WITH_SOABI ${pycpps} ${pyext_module_definition_src}) + Python_add_library(${TARGET} MODULE WITH_SOABI ${pycpps} ${pyext_module_definition_src}) target_link_libraries(${TARGET} PRIVATE ${ARG_HALIDE_LIBRARIES}) target_compile_definitions(${TARGET} PRIVATE # Skip the default module-definition code in each file @@ -542,6 +803,7 @@ function(add_halide_python_extension_library TARGET) # Gotta explicitly specify the module name and function(s) for this mode HALIDE_PYTHON_EXTENSION_MODULE_NAME=${ARG_MODULE_NAME} "HALIDE_PYTHON_EXTENSION_FUNCTIONS=${function_names}") + target_compile_features(${TARGET} PRIVATE cxx_std_17) set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME ${ARG_MODULE_NAME}) _Halide_target_export_single_symbol(${TARGET} "PyInit_${ARG_MODULE_NAME}") endfunction() @@ -589,33 +851,16 @@ endfunction() ## function(add_halide_runtime RT) - set(options "") - set(oneValueArgs FROM NO_THREADS NO_DL_LIBS) + set(options NO_DEFAULT_TARGETS) + set(oneValueArgs NO_THREADS NO_DL_LIBS) set(multiValueArgs TARGETS) cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - # If no TARGETS argument, use Halide_TARGET instead if (NOT ARG_TARGETS) set(ARG_TARGETS "${Halide_TARGET}") endif () - # Ensure all targets are tagged with "no_runtime", - # so that GCD calculation doesn't get confused. - list(TRANSFORM ARG_TARGETS APPEND "-no_runtime") - - if (ARG_FROM) - # Try to use generator which is available. This is essential for cross-compilation - # where we cannot use host compiler to build generator only for runtime. - - # Need to check if the ones for python extension, which is not actually an executable - get_target_property(target_type ${ARG_FROM} TYPE) - get_target_property(aliased ${ARG_FROM} ALIASED_TARGET) - if (target_type STREQUAL "EXECUTABLE" AND NOT aliased) - add_executable(_Halide_gengen ALIAS ${ARG_FROM}) - endif () - endif () - # The default of NO_THREADS/NO_DL_LIBS is OFF unless Halide_RUNTIME_NO_THREADS/NO_DL_LIBS is defined globally if (NOT DEFINED ARG_NO_THREADS) set(ARG_NO_THREADS ${Halide_RUNTIME_NO_THREADS}) @@ -624,32 +869,31 @@ function(add_halide_runtime RT) set(ARG_NO_DL_LIBS ${Halide_RUNTIME_NO_DL_LIBS}) endif () - # Ensure _Halide_gengen is defined - _Halide_gengen_ensure() + _Halide_validate_multitarget(common_triple ${ARG_TARGETS}) - _Halide_get_platform_details( - is_crosscompiling - object_suffix - static_library_suffix - ${ARG_TARGETS}) + _Halide_is_crosscompiling(is_crosscompiling "${common_triple}") + _Halide_get_platform_extensions( + object_extension + static_library_extension + "${common_triple}") - if (is_crosscompiling) - set(GEN_OUTS "${RT}${static_library_suffix}") - set(GEN_ARGS "") - else () - set(GEN_OUTS "${RT}${object_suffix}") - set(GEN_ARGS -e object) - endif () + # We defer reading the list of targets for which to generate a common + # runtime to CMake _generation_ time. This prevents issues where a lower + # GCD is required by a later Halide library linking to this runtime. + set(target_list "$>") - add_custom_command(OUTPUT ${GEN_OUTS} - COMMAND _Halide_gengen -r "${RT}" -o . ${GEN_ARGS} - # Defers reading the list of targets for which to generate a common runtime to CMake _generation_ time. - # This prevents issues where a lower GCD is required by a later Halide library linking to this runtime. - target=$,$> - DEPENDS _Halide_gengen - VERBATIM) + # Remove features that should not be attached to a runtime + # TODO: The fact that removing profile fixes a duplicate symbol linker error on Windows smells like a bug. + set(target_list "$") if (is_crosscompiling) + set(GEN_OUTS "${RT}${static_library_extension}") + add_custom_command( + OUTPUT "${GEN_OUTS}" + COMMAND Halide::GenRT -r "${RT}" -o . + "target=$,$>" + DEPENDS Halide::GenRT + VERBATIM) add_custom_target("${RT}.update" DEPENDS "${GEN_OUTS}") add_library("${RT}" STATIC IMPORTED GLOBAL) @@ -658,48 +902,93 @@ function(add_halide_runtime RT) set_target_properties("${RT}" PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${GEN_OUTS}) else () - add_library("${RT}" STATIC ${GEN_OUTS}) - set_target_properties("${RT}" PROPERTIES LINKER_LANGUAGE CXX) - _Halide_fix_xcode("${RT}") + list(LENGTH Halide_CMAKE_TARGET num_platforms) + if (common_triple STREQUAL "cmake" AND num_platforms GREATER 1) + set(base_rt "") + set(arch_rt "") + foreach (triple IN LISTS Halide_CMAKE_TARGET) + set(arch_target_list "$") + set(arch_target_list "$") + + if (NOT base_rt) + set(this_rt "${RT}") + set(base_rt "${this_rt}") + else () + set(this_rt "${RT}-${triple}") + list(APPEND arch_rt "${this_rt}") + endif () + + add_custom_command( + OUTPUT "${this_rt}${object_extension}" + COMMAND Halide::GenRT -r "${this_rt}" -o . -e object + "target=$,$>" + DEPENDS Halide::GenRT + VERBATIM) + + add_library("${this_rt}" STATIC "${this_rt}${object_extension}") + set_target_properties("${this_rt}" PROPERTIES LINKER_LANGUAGE CXX) + _Halide_fix_xcode("${this_rt}") + _Halide_set_osx_arch("${this_rt}" "${triple}") + endforeach () + + _Halide_lipo(TARGET "${base_rt}" INPUTS ${arch_rt} OVERWRITE) + else () + set(target_list "$") + add_custom_command( + OUTPUT "${RT}${object_extension}" + COMMAND Halide::GenRT -r "${RT}" -o . -e object + "target=$,$>" + DEPENDS Halide::GenRT + VERBATIM) + add_library("${RT}" STATIC "${RT}${object_extension}") + set_target_properties("${RT}" PROPERTIES LINKER_LANGUAGE CXX) + _Halide_fix_xcode("${RT}") + endif () endif () # Take care of the runtime/toolchain which doesn't have Threads or DL libs - if (NOT ARG_NO_THREADS AND NOT TARGET Threads::Threads) + target_link_libraries("${RT}" INTERFACE Halide::Runtime) + if (NOT ARG_NO_THREADS) find_package(Threads REQUIRED) + target_link_libraries("${RT}" INTERFACE Threads::Threads) + endif () + if (NOT ARG_NO_DL_LIBS) + target_link_libraries("${RT}" INTERFACE ${CMAKE_DL_LIBS}) + endif () + + if (NOT ARG_NO_DEFAULT_TARGETS) + _Halide_add_targets_to_runtime("${RT}" TARGETS ${ARG_TARGETS}) endif () - target_link_libraries("${RT}" INTERFACE - Halide::Runtime - $<$>:Threads::Threads> - $<$>:${CMAKE_DL_LIBS}>) - _Halide_add_targets_to_runtime("${RT}" TARGETS ${ARG_TARGETS}) endfunction() -function(_Halide_get_platform_details OUT_XC OUT_OBJ OUT_STATIC) - if ("${ARGN}" MATCHES "host") - set(ARGN "${Halide_HOST_TARGET}") +function(_Halide_is_crosscompiling OUT_XC triple) + if (triple MATCHES "host") + set(triple "${Halide_HOST_TARGET}") endif () - if ("${ARGN}" MATCHES "windows") - # Otherwise, all targets are windows, so Halide emits .obj files - set(${OUT_OBJ} ".obj" PARENT_SCOPE) - set(${OUT_STATIC} ".lib" PARENT_SCOPE) + if (triple MATCHES "cmake" OR triple IN_LIST Halide_CMAKE_TARGET) + set("${OUT_XC}" 0 PARENT_SCOPE) else () - # All other targets use .a - set(${OUT_OBJ} ".o" PARENT_SCOPE) - set(${OUT_STATIC} ".a" PARENT_SCOPE) + set("${OUT_XC}" 1 PARENT_SCOPE) endif () +endfunction() - # Well-formed targets must either start with "host" or a target triple. - if ("${ARGN}" MATCHES "host") - set(halide_triple ${Halide_HOST_TARGET}) - else () - string(REGEX REPLACE "^([^-]+-[^-]+-[^-]+).*$" "\\1" halide_triple "${ARGN}") +function(_Halide_get_platform_extensions OUT_OBJ OUT_STATIC triple) + if (triple MATCHES "host") + set(triple "${Halide_HOST_TARGET}") endif () - if (NOT Halide_CMAKE_TARGET STREQUAL halide_triple) - set("${OUT_XC}" 1 PARENT_SCOPE) + if (triple MATCHES "cmake") + set(triple "${Halide_CMAKE_TARGET}") + endif () + + if (triple MATCHES "windows") + set("${OUT_OBJ}" ".obj" PARENT_SCOPE) + set("${OUT_STATIC}" ".lib" PARENT_SCOPE) else () - set("${OUT_XC}" 0 PARENT_SCOPE) + # All other OSes use .a + set("${OUT_OBJ}" ".o" PARENT_SCOPE) + set("${OUT_STATIC}" ".a" PARENT_SCOPE) endif () endfunction() @@ -709,13 +998,18 @@ endfunction() ## function(_Halide_add_targets_to_runtime TARGET) - cmake_parse_arguments(ARG "" "" "TARGETS" ${ARGN}) + cmake_parse_arguments(PARSE_ARGV 1 ARG "" "" "TARGETS") - # Remove features that should not be attached to a runtime - # TODO: The fact that removing profile fixes a duplicate symbol linker error on Windows smells like a bug. - list(TRANSFORM ARG_TARGETS REPLACE "-(user_context|no_asserts|no_bounds_query|no_runtime|profile)" "") - set_property(TARGET "${TARGET}" APPEND PROPERTY Halide_RT_TARGETS "${ARG_TARGETS}") + if (NOT TARGET "${TARGET}") + message(FATAL_ERROR "not a target: ${TARGET}") + endif () + + get_property(aliased TARGET "${TARGET}" PROPERTY ALIASED_TARGET) + if (aliased) + set(TARGET "${aliased}") + endif () + set_property(TARGET "${TARGET}" APPEND PROPERTY Halide_RT_TARGETS "${ARG_TARGETS}") _Halide_target_link_gpu_libs(${TARGET} INTERFACE ${ARG_TARGETS}) endfunction() @@ -739,48 +1033,24 @@ endfunction() function(_Halide_fix_xcode TARGET) if (CMAKE_GENERATOR STREQUAL "Xcode") # Xcode generator requires at least one source file to work correctly. - # Touching the empty file unconditionally would cause the archiver to - # re-run every time CMake re-runs, even if nothing actually changed. set(empty_file "${CMAKE_CURRENT_BINARY_DIR}/Halide_${TARGET}_empty.cpp") - if (NOT EXISTS "${empty_file}") - file(TOUCH "${empty_file}") - endif () + file(CONFIGURE OUTPUT "${empty_file}" CONTENT "") target_sources("${TARGET}" PRIVATE "${empty_file}") endif () endfunction() function(_Halide_target_export_single_symbol TARGET SYMBOL) - if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript.apple") - file(WRITE - "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript.apple" - "_${SYMBOL}\n") - endif () - if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript") - file(WRITE - "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript" - "{ global: ${SYMBOL}; local: *; };\n") - endif () + file(CONFIGURE + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript.apple" + CONTENT "_${SYMBOL}\n") + + file(CONFIGURE + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript" + CONTENT "{ global: ${SYMBOL}; local: *; };\n") + target_export_script( ${TARGET} APPLE_LD "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript.apple" GNU_LD "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.${SYMBOL}.ldscript" ) endfunction() - -function(_Halide_gengen_ensure) - # Create a Generator that is GenGen.cpp and nothing else; all it can do is generate a runtime. - if (NOT TARGET _Halide_gengen) - - # add_executable requires at least one source file for some - # configs (e.g. Xcode), because, uh, reasons, so we'll create - # an empty one here to satisfy it - set(empty "${CMAKE_CURRENT_BINARY_DIR}/_Halide_gengen.empty.cpp") - if (NOT EXISTS "${empty}") - file(WRITE "${empty}" "/* nothing */\n") - endif () - - add_executable(_Halide_gengen "${empty}") - target_link_libraries(_Halide_gengen PRIVATE Halide::Generator) - _Halide_place_dll(_Halide_gengen) - endif () -endfunction() diff --git a/cmake/HalideTargetHelpers.cmake b/cmake/HalideTargetHelpers.cmake index ca0477b7b729..405caa5ed739 100644 --- a/cmake/HalideTargetHelpers.cmake +++ b/cmake/HalideTargetHelpers.cmake @@ -4,20 +4,15 @@ cmake_minimum_required(VERSION 3.28) # Utilities for manipulating Halide target triples ## -function(_Halide_cmake_target OUTVAR) - # Get arch from CMake - string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" arch) +macro(_Halide_target_arch_os arch os) + string(TOLOWER "${arch}" arch) list(TRANSFORM arch REPLACE "^.*(x86|arm|powerpc|hexagon|wasm|riscv).*$" "\\1") list(TRANSFORM arch REPLACE "^i.?86.*$" "x86") list(TRANSFORM arch REPLACE "^(amd|ia|em)64t?$" "x86") list(TRANSFORM arch REPLACE "^ppc(64(le)?)?$" "powerpc") list(TRANSFORM arch REPLACE "^aarch(64)?$" "arm") - # Get bits from CMake - math(EXPR bits "8 * ${CMAKE_SIZEOF_VOID_P}") - - # Get OS from CMake - string(TOLOWER "${CMAKE_SYSTEM_NAME}" os) + string(TOLOWER "${os}" os) list(TRANSFORM os REPLACE "^darwin$" "osx") list(TRANSFORM os REPLACE "^emscripten$" "wasmrt") @@ -25,40 +20,72 @@ function(_Halide_cmake_target OUTVAR) if (os STREQUAL "wasmrt" AND arch STREQUAL "x86") set(arch "wasm") endif () +endmacro() + +function(_Halide_host_target OUTVAR) + _Halide_target_arch_os("${CMAKE_HOST_SYSTEM_PROCESSOR}" "${CMAKE_HOST_SYSTEM_NAME}") + + cmake_host_system_information(RESULT is_64bit QUERY IS_64BIT) + if (is_64bit) + set(bits 64) + else () + set(bits 32) + endif () set(${OUTVAR} "${arch}-${bits}-${os}" PARENT_SCOPE) endfunction() +function(_Halide_cmake_target OUTVAR) + math(EXPR bits "8 * ${CMAKE_SIZEOF_VOID_P}") + if (CMAKE_OSX_ARCHITECTURES) + set(${OUTVAR} "") + foreach (processor IN LISTS CMAKE_OSX_ARCHITECTURES) + _Halide_target_arch_os("${processor}" "${CMAKE_SYSTEM_NAME}") + list(APPEND ${OUTVAR} "${arch}-${bits}-${os}") + endforeach () + list(REMOVE_DUPLICATES ${OUTVAR}) # defensive + else () + _Halide_target_arch_os("${CMAKE_SYSTEM_PROCESSOR}" "${CMAKE_SYSTEM_NAME}") + set(${OUTVAR} "${arch}-${bits}-${os}") + endif () + set(${OUTVAR} "${${OUTVAR}}" PARENT_SCOPE) +endfunction() + ## # Set Halide `host` and `cmake` meta-target values ## -_Halide_cmake_target(_active_triple) +if (NOT DEFINED Halide_HOST_TARGET) + _Halide_host_target(Halide_HOST_TARGET) +endif () -set(Halide_HOST_TARGET "${_active_triple}" +set(Halide_HOST_TARGET "${Halide_HOST_TARGET}" CACHE STRING "Halide target triple matching the Halide library") -set(Halide_CMAKE_TARGET "${_active_triple}" - CACHE STRING "Halide target triple matching the CMake target") -unset(_active_triple) +if (NOT DEFINED Halide_CMAKE_TARGET) + _Halide_cmake_target(Halide_CMAKE_TARGET) +endif () + +set(Halide_CMAKE_TARGET "${Halide_CMAKE_TARGET}" + CACHE STRING "Halide target triple matching the CMake target") ## # Cache variable to control the global target for add_halide_library. ## -if (NOT "$ENV{HL_TARGET}" STREQUAL "") - set(_default_target "$ENV{HL_TARGET}") -elseif (Halide_HOST_TARGET STREQUAL Halide_CMAKE_TARGET) - set(_default_target "host") -else () - set(_default_target "${Halide_CMAKE_TARGET}") +if (NOT DEFINED Halide_TARGET) + if (NOT "$ENV{HL_TARGET}" STREQUAL "") + set(Halide_TARGET "$ENV{HL_TARGET}") + elseif (Halide_HOST_TARGET STREQUAL Halide_CMAKE_TARGET) + set(Halide_TARGET "host") + else () + set(Halide_TARGET "cmake") + endif () endif () -set(Halide_TARGET "${_default_target}" +set(Halide_TARGET "${Halide_TARGET}" CACHE STRING "The default target to use when AOT compiling") -unset(_default_target) - ## # Print the active values of all special target triples. ## diff --git a/cmake/TargetExportScript.cmake b/cmake/TargetExportScript.cmake index bc386ba32921..08c1f2d14c48 100644 --- a/cmake/TargetExportScript.cmake +++ b/cmake/TargetExportScript.cmake @@ -42,10 +42,11 @@ function(target_export_script TARGET) endif () ## The Apple linker expects a different flag. - set(EXPORTED_SYMBOLS_FLAG "LINKER:-exported_symbols_list,${ARG_APPLE_LD}") + file(CONFIGURE OUTPUT _target_export_script.apple.ldscript CONTENT [[]]) + set(EXPORTED_SYMBOLS_FLAG "LINKER:-exported_symbols_list,${CMAKE_CURRENT_BINARY_DIR}/_target_export_script.apple.ldscript") check_linker_flag(CXX "${EXPORTED_SYMBOLS_FLAG}" LINKER_HAS_FLAG_EXPORTED_SYMBOLS_LIST) if (LINKER_HAS_FLAG_EXPORTED_SYMBOLS_LIST) - target_link_options(${TARGET} PRIVATE "${EXPORTED_SYMBOLS_FLAG}") + target_link_options(${TARGET} PRIVATE "LINKER:-exported_symbols_list,${ARG_APPLE_LD}") set_property(TARGET ${TARGET} APPEND PROPERTY LINK_DEPENDS "${ARG_APPLE_LD}") return() endif () diff --git a/doc/BuildingHalideWithCMake.md b/doc/BuildingHalideWithCMake.md new file mode 100644 index 000000000000..8fa1dcaa554d --- /dev/null +++ b/doc/BuildingHalideWithCMake.md @@ -0,0 +1,626 @@ +# Building Halide with CMake + +This is a detailed guide to building Halide with CMake. If you want to learn how +to use Halide in your own CMake projects, see [HalideCMakePackage.md]. If you +are looking for Halide's CMake coding guidelines, see [CodeStyleCMake.md]. + + +* [Building Halide with CMake](#building-halide-with-cmake) +* [Installing CMake](#installing-cmake) + * [Cross-platform](#cross-platform) + * [Windows](#windows) + * [macOS](#macos) + * [Ubuntu Linux](#ubuntu-linux) + * [Optional: Install Ninja](#optional-install-ninja) +* [Dependencies](#dependencies) + * [Summary](#summary) + * [Installing dependencies](#installing-dependencies) + * [vcpkg](#vcpkg) + * [Windows](#windows-1) + * [Homebrew](#homebrew) + * [Ubuntu / Debian](#ubuntu--debian) + * [Python](#python) +* [Building Halide](#building-halide) + * [Basic build](#basic-build) + * [Windows](#windows-2) + * [macOS and Linux](#macos-and-linux) + * [CMake Presets](#cmake-presets) + * [Common presets](#common-presets) + * [Vcpkg presets](#vcpkg-presets) + * [Sanitizer presets](#sanitizer-presets) + * [Build options](#build-options) + * [Installing](#installing) +* [Building Halide with pip](#building-halide-with-pip) + + +# Installing CMake + +This section covers installing a recent version of CMake and the correct +dependencies for building and using Halide. If you have not used CMake before, +we strongly suggest reading through the [CMake documentation][cmake-docs] first. + +Halide requires at least version 3.28. Fortunately, getting a recent version of +CMake couldn't be easier, and there are multiple good options on any system to +do so. Generally, one should always have the most recent version of CMake +installed system-wide. CMake is committed to backwards compatibility and even +the most recent release can build projects over a decade old. + +## Cross-platform + +Kitware provides packages for CMake on [PyPI][pypi-cmake] which can be installed +via `pip` into a [virtual environment][venv]. There are binary wheels available +for nearly all relevant platforms, including: + +| OS | x86-32 | x86-64 | ARM64 | +|-------------------|--------------------|--------------------|----------------------------| +| Windows | :white_check_mark: | :white_check_mark: | :white_check_mark: | +| macOS | :x: | 10.10+ | 11.0+ (incl. `universal2`) | +| Linux (musl 1.1+) | :white_check_mark: | :white_check_mark: | :white_check_mark: | +| Linux (glibc) | glibc 2.12+ | glibc 2.12+ | glibc 2.17+ | + +We recommend installing CMake using [pipx] to avoid package conflicts and +redundant installations. After installing pipx, run: + +```shell +$ pipx install cmake +``` + +Alternatively, you can use a normal virtual environment: + +```shell +$ python -m pip install cmake +``` + +If you don't want Python to manage your CMake installation, you can either +follow the platform-specific instructions below or install CMake +from [Kitware's binary releases][cmake-download]. If all else fails, you might +need to build CMake from source (e.g. on 32-bit ARM). In that case, follow the +directions posted on [Kitware's website][cmake-from-source]. + +## Windows + +On Windows, there are two primary methods for installing an up-to-date CMake: + +1. You can get CMake through the Visual Studio 2022 installer. +2. You can use Windows's built-in package manager, [winget][winget]: + ```shell + winget install Kitware.CMake + ``` + +We prefer the first option for its simplicity. See +Microsoft's [documentation][vs-cmake-docs] for more details. + +## macOS + +[Homebrew] keeps its [CMake package][brew-cmake] up to date. Simply run: + +```shell +$ brew install cmake +``` + +## Ubuntu Linux + +There are a few good ways to install CMake on Ubuntu: + +1. If you're running 24.04 LTS, then simply running + `sudo apt install cmake` will install CMake 3.28. +2. If you're running an older LTS or would like to use the newest CMake, try + installing via the [snap store][snap store]: `snap install cmake`. Note this + will conflict with an APT-provided CMake. +3. Kitware also provides an [APT repository][cmake-apt] with up-to-date + releases. Compatible with 20.04 LTS+ and is the best option for 32-bit ARM. + +For other Linux distributions, check with your distribution's package manager. + +**Note:** On WSL 1, snap is not available; in this case, prefer to use APT. On +WSL 2, all methods are available. + +## Optional: Install Ninja + +We strongly recommend using [Ninja] as your go-to CMake generator for working +with Halide. It has a much richer dependency structure than the alternatives, +and it is the only generator capable of producing accurate incremental builds. + +It is available in most package repositories: + +* Python: `pipx install ninja` +* Visual Studio Installer: alongside CMake +* winget: `winget install Ninja-build.Ninja` +* Homebrew: `brew install ninja` +* APT: `apt install ninja-build` + +You can also place a [pre-built binary][ninja-download] from their website in +the PATH. + +# Dependencies + +## Summary + +The following is a complete list of required and optional dependencies for +building the core pieces of Halide. + +| Dependency | Version | Required when... | Notes | +|---------------|--------------------|----------------------------|-----------------------------------------------------| +| [LLVM] | _see policy below_ | _always_ | WebAssembly and X86 targets are required. | +| [Clang] | `==LLVM` | _always_ | | +| [LLD] | `==LLVM` | _always_ | | +| [flatbuffers] | `~=23.5.26` | `WITH_SERIALIZATION=ON` | | +| [wabt] | `==1.0.36` | `Halide_WASM_BACKEND=wabt` | Does not have a stable API; exact version required. | +| [V8] | trunk | `Halide_WASM_BACKEND=V8` | Difficult to build. See [WebAssembly.md] | +| [Python] | `>=3.8` | `WITH_PYTHON_BINDINGS=ON` | | +| [pybind11] | `~=2.10.4` | `WITH_PYTHON_BINDINGS=ON` | | + +Halide maintains the following compatibility policy with LLVM: Halide version +`N` supports LLVM versions `N`, `N-1`, and `N-2`. Our binary distributions +always include the latest `N` patch at time of release. For most users, we +recommend using a pre-packaged binary release of LLVM rather than trying to +build it yourself. + +To build the apps, documentation, and tests, an extended set is needed. + +| Dependency | Required when... | Notes | +|---------------------------------|-----------------------------------|-----------------------------------------------------------------------------| +| [CUDA Toolkit][FindCUDAToolkit] | building `apps/cuda_mat_mul` | When compiling Halide pipelines that use CUDA, only the drivers are needed. | +| [Doxygen][FindDoxygen] | `WITH_DOCS=ON` | | +| [Eigen3][Eigen3CMake] | building `apps/linear_algebra` | | +| [libjpeg][FindJPEG] | `WITH_TESTS=ON` | Optionally used by `halide_image_io.h` and `Halide::ImageIO` in CMake. | +| [libpng][FindPNG] | `WITH_TESTS=ON` | (same as libjpeg) | +| [BLAS][FindBLAS] | building `apps/linear_algebra` | [ATLAS] and [OpenBLAS] are supported implementations | +| [OpenCL][FindOpenCL] | compiling pipelines with `opencl` | | + +It is best practice to configure your environment so that CMake can find +dependencies without package-specific hints. For instance, if you want CMake to +use a particular version of Python, create a virtual environment and activate it +_before_ configuring Halide. Similarly, the `CMAKE_PREFIX_PATH` variable can be +set to a local directory where from-source dependencies have been installed. +Carefully consult the [find_package] documentation to learn how the search +procedure works. + +If the build still fails to find a dependency, each package provides a bespoke +interface for providing hints and overriding incorrect results. Documentation +for these packages is linked in the table above. + +## Installing dependencies + +### vcpkg + +Halide has first-class support for using [vcpkg] to manage dependencies. The +list of dependencies and features is contained inside `vcpkg.json` at the root +of the repository. + +By default, a minimum set of LLVM backends will be enabled to compile JIT code +for the host and the serialization feature will be enabled. When using the vcpkg +toolchain file, you can set `-DVCPKG_MANIFEST_FEATURES=developer` +to enable building all dependencies (except Doxygen, which is not available on +vcpkg). + +By default, running `vcpkg install` will try to build all of LLVM. This is often +undesirable as it takes very long to do and consumes a lot of disk space, +especially as `vcpkg` requires special configuration to disable the debug build. +It will _also_ attempt to build Python 3 as a dependency of pybind11. + +To mitigate this issue, we provide a [vcpkg-overlay] that disables building LLVM +and Python. When using the vcpkg toolchain, you can enable it by setting +`-DVCPKG_OVERLAY_PORTS=cmake/vcpkg`. + +If you do choose to use vcpkg to build LLVM (the easiest way on Windows), note +that it is safe to delete the intermediate build files and caches in +`D:\vcpkg\buildtrees` and `%APPDATA%\local\vcpkg`. + +For convenience, we provide [CMake presets](#cmake-presets) that set these flags +appropriately per-platform. They are documented further below. + +### Windows + +On Windows, we recommend using `vcpkg` to install library dependencies. + +To build the documentation, you will need to install [Doxygen]. This can be done +either from the [Doxygen website][doxygen-download] or through [winget][winget]: + +```shell +$ winget install DimitriVanHeesch.Doxygen +``` + +To build the Python bindings, you will need to install Python 3. This should be +done by running the official installer from the [Python website][python]. Be +sure to download the debugging symbols through the installer. This will require +using the "Advanced Installation" workflow. Although it is not strictly +necessary, it is convenient to install Python system-wide on Windows (i.e. +`C:\Program Files`) because CMake looks at standard paths and registry keys. +This removes the need to manually set the `PATH`. + +Once Python is installed, you can install the Python module dependencies either +globally or in a [virtual environment][venv] by running + +```shell +$ python -m pip install -r requirements.txt +``` + +from the root of the repository. + +### Homebrew + +On macOS, it is possible to install all dependencies via [Homebrew][homebrew]: + +```shell +$ brew install llvm flatbuffers wabt python pybind11 doxygen eigen libpng libjpeg openblas +``` + +The `llvm` package includes `clang`, `clang-format`, and `lld`, too. To ensure +CMake can find the keg-only dependencies, set the following: + +```shell +$ export CMAKE_PREFIX_PATH="/opt/homebrew:/opt/homebrew/opt/llvm:/opt/homebrew/opt/jpeg" +``` + +### Ubuntu / Debian + +On Ubuntu you should install the following packages (this includes the Python +module dependencies): + +``` +$ sudo apt install clang-tools lld llvm-dev libclang-dev liblld-dev \ + libpng-dev libjpeg-dev libgl-dev python3-dev python3-numpy python3-scipy \ + python3-imageio python3-pybind11 libopenblas-dev libeigen3-dev \ + libatlas-base-dev doxygen +``` + +### Python + +When running the Python package, you will need to install additional +dependencies. These are tabulated in `requirements.txt` and may be installed +with: + +```shell +$ python -m pip install -U pip "setuptools[core]" wheel +$ python -m pip install -r requirements.txt +``` + +# Building Halide + +## Basic build + +These instructions assume that your working directory is the Halide repository +root. + +### Windows + +If you plan to use the Ninja generator, be sure to launch the developer command +prompt corresponding to your intended environment. Note that whatever your +intended target system (x86, x64, or ARM), you must use the 64-bit _host tools_ +because the 32-bit tools run out of memory during the linking step with LLVM. +More information is available from [Microsoft's documentation][msvc-cmd]. + +You should either open the correct Developer Command Prompt directly or run the +[`vcvarsall.bat`][vcvarsall] script with the correct argument, i.e. one of the +following: + +```shell +$ "C:\Program Files (x86)\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 +$ "C:\Program Files (x86)\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64_x86 +$ "C:\Program Files (x86)\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64_arm +``` + +Then, assuming that vcpkg is installed to `D:\vcpkg`, simply run: + +```shell +$ cmake -G Ninja -S . -B build --toolchain D:\vcpkg\scripts\buildsystems\vcpkg.cmake -DCMAKE_BUILD_TYPE=Release +$ cmake --build .\build +``` + +Valid values of [`CMAKE_BUILD_TYPE`][cmake_build_type] are `Debug`, +`RelWithDebInfo`, `MinSizeRel`, and `Release`. When using a single-configuration +generator (like Ninja) you must specify a build type in the configuration step. + +Otherwise, if you wish to create a Visual Studio based build system, you can +configure with: + +```shell +$ cmake -G "Visual Studio 17 2022" -Thost=x64 -A x64 -S . -B build ^ + --toolchain D:\vcpkg\scripts\buildsystems\vcpkg.cmake +$ cmake --build .\build --config Release -j %NUMBER_OF_PROCESSORS% +``` + +Because the Visual Studio generator is a _multi-config generator_, you don't set +`CMAKE_BUILD_TYPE` at configure-time, but instead pass the configuration to the +build (and test/install) commands with the `--config` flag. More documentation +is available in the [CMake User Interaction Guide][cmake-user-interaction]. + +The process is similar for 32-bit: + +``` +> cmake -G "Visual Studio 17 2022" -Thost=x64 -A Win32 -S . -B build ^ + --toolchain D:\vcpkg\scripts\buildsystems\vcpkg.cmake +> cmake --build .\build --config Release -j %NUMBER_OF_PROCESSORS% +``` + +In both cases, the `-Thost=x64` flag ensures that the correct host tools are +used. + +**Note:** due to limitations in MSBuild, incremental builds using the VS +generators will miss dependencies (including changes to headers in the +`src/runtime` folder). We recommend using Ninja for day-to-day development and +use Visual Studio only if you need it for packaging. + +### macOS and Linux + +The instructions here are straightforward. Assuming your environment is set up +correctly, just run: + +```shell +$ cmake -G Ninja -S . -B build -DCMAKE_BUILD_TYPE=Release +$ cmake --build build +``` + +If you omit `-G Ninja`, a Makefile-based generator will likely be used instead. +In either case, [`CMAKE_BUILD_TYPE`][cmake_build_type] must be set to one of the +standard types: `Debug`, `RelWithDebInfo`, `MinSizeRel`, or `Release`. + +## CMake Presets + +### Common presets + +Halide provides several [presets][cmake_presets] to make the above commands more +convenient. The following CMake preset commands correspond to the longer ones +above. + +```shell +$ cmake --preset=win64 # VS 2022 generator, 64-bit build, vcpkg deps +$ cmake --preset=win32 # VS 2022 generator, 32-bit build, vcpkg deps +$ cmake --preset=macOS # Ninja generator, macOS host build, Homebrew deps +$ cmake --preset=debug # Debug mode, any single-config generator / compiler +$ cmake --preset=release # Release mode, any single-config generator / compiler +``` + +### Vcpkg presets + +Halide provides two sets of corresponding vcpkg-enabled presets: _base_ and +_full_. + +| Base preset | Full preset | +|-----------------|----------------------| +| `win32` | `win32-vcpkg-full` | +| `win64` | `win64-vcpkg-full` | +| `macOS-vcpkg` | `macOS-vcpkg-full` | +| `debug-vcpkg` | `debug-vcpkg-full` | +| `release-vcpkg` | `release-vcpkg-full` | + +In simple terms, the base presets rely on the system to provide LLVM and Python, +while the full presets delegate this to vcpkg (which consumes a large amount of +hard disk space and time). + +The `macOS-vcpkg` preset adds `/opt/homebrew/opt/llvm` to +`CMAKE_PREFIX_PATH`. + +### Sanitizer presets + +There are also presets to use some Clang sanitizers with the CMake build; at +present, only Fuzzer and ASAN (Address Sanitizer) are supported, and only on +linux-x86-64. + +* `linux-x64-asan`: Use the Address Sanitizer +* `linux-x64-fuzzer`: Use the Clang fuzzer plugin + +To use these, you must build LLVM with additional options: + +``` +-DLLVM_ENABLE_PROJECTS="clang;lld;clang-tools-extra" +-DLLVM_ENABLE_RUNTIMES="compiler-rt;libcxx;libcxxabi;libunwind" +``` + +## Build options + +Halide reads and understands several options that can configure the build. The +following are the most consequential and control how Halide is actually +compiled. + +| Option | Default | Description | +|------------------------------------------|-----------------------|---------------------------------------------------------------------------------------------------| +| [`BUILD_SHARED_LIBS`][build_shared_libs] | `ON` | Standard CMake variable that chooses whether to build as a static or shared library. | +| `Halide_BUNDLE_STATIC` | `OFF` | When building Halide as a static library, merge static library dependencies into libHalide.a. | +| `Halide_LLVM_SHARED_LIBS` | `OFF` | Link to the shared version of LLVM. Not available on Windows. | +| `Halide_ENABLE_RTTI` | _inherited from LLVM_ | Enable RTTI when building Halide. Recommended to be set to `ON` | +| `Halide_ENABLE_EXCEPTIONS` | `ON` | Enable exceptions when building Halide | +| `Halide_TARGET` | _empty_ | The default target triple to use for `add_halide_library` (and the generator tests, by extension) | +| `WITH_AUTOSCHEDULERS` | `ON` | Enable building the autoschedulers. Requires `BUILD_SHARED_LIBS`. | +| `WITH_SERIALIZATION` | `ON` | Include experimental Serialization/Deserialization features | + +The following options are disabled by default when building Halide through the [ +`add_subdirectory`][add_subdirectory] +or [`FetchContent`][fetchcontent] mechanisms. They control whether non-essential +targets (like tests and documentation) are built. + +| Option | Default | Description | +|------------------------|---------|------------------------------------------------------------------| +| `WITH_DOCS` | `OFF` | Enable building the documentation via Doxygen | +| `WITH_PACKAGING` | `ON` | Include the `install()` rules for Halide. | +| `WITH_PYTHON_BINDINGS` | `ON` | Enable building Python 3 bindings | +| `WITH_TESTS` | `ON` | Enable building unit and integration tests | +| `WITH_TUTORIALS` | `ON` | Enable building the tutorials | +| `WITH_UTILS` | `ON` | Enable building various utilities including the trace visualizer | + +The following options are _advanced_ and should not be required in typical +workflows. Generally, these are used by Halide's own CI infrastructure, or as +escape hatches for third-party packagers. + +| Option | Default | Description | +|-----------------------------|--------------------------------------------------------------------|------------------------------------------------------------------------------------------| +| `Halide_CLANG_TIDY_BUILD` | `OFF` | Used internally to generate fake compile jobs for runtime files when running clang-tidy. | +| `Halide_CCACHE_BUILD` | `OFF` | Use ccache with Halide-recommended settings to accelerate rebuilds. | +| `Halide_CCACHE_PARAMS` | `CCACHE_CPP2=yes CCACHE_HASHDIR=yes CCACHE_SLOPPINESS=pch_defines` | Options to pass to `ccache` when using `Halide_CCACHE_BUILD`. | +| `Halide_VERSION_OVERRIDE` | `${Halide_VERSION}` | Override the VERSION for libHalide. | +| `Halide_SOVERSION_OVERRIDE` | `${Halide_VERSION_MAJOR}` | Override the SOVERSION for libHalide. Expects a positive integer (i.e. not a version). | + +The following options control whether to build certain test subsets. They only +apply when `WITH_TESTS=ON`: + +| Option | Default | Description | +|---------------------------|------------|---------------------------------------| +| `WITH_TEST_AUTO_SCHEDULE` | `ON` | enable the auto-scheduling tests | +| `WITH_TEST_CORRECTNESS` | `ON` | enable the correctness tests | +| `WITH_TEST_ERROR` | `ON` | enable the expected-error tests | +| `WITH_TEST_FUZZ` | _detected_ | enable the libfuzzer-based fuzz tests | +| `WITH_TEST_GENERATOR` | `ON` | enable the AOT generator tests | +| `WITH_TEST_PERFORMANCE` | `ON` | enable performance testing | +| `WITH_TEST_RUNTIME` | `ON` | enable testing the runtime modules | +| `WITH_TEST_WARNING` | `ON` | enable the expected-warning tests | + +The following option selects the execution engine for in-process WASM testing: + +| Option | Default | Description | +|-----------------------|---------|------------------------------------------------------------------------------------------| +| `Halide_WASM_BACKEND` | `wabt` | Select the backend for WASM testing. Can be `wabt`, `V8` or a false value such as `OFF`. | + +## Installing + +Once built, Halide will need to be installed somewhere before using it in a +separate project. On any platform, this means running the +[`cmake --install`][cmake-install] command in one of two ways. For a +single-configuration generator (like Ninja), run either: + +```shell +$ cmake --install ./build --prefix /path/to/Halide-install +$ cmake --install .\build --prefix X:\path\to\Halide-install +``` + +For a multi-configuration generator (like Visual Studio) run: + +```shell +$ cmake --install ./build --prefix /path/to/Halide-install --config Release +$ cmake --install .\build --prefix X:\path\to\Halide-install --config Release +``` + +Of course, make sure that you build the corresponding config before attempting +to install it. + +# Building Halide with pip + +Halide also supports installation via the standard Python packaging workflow. +Running `pip install .` at the root of the repository will build a wheel and +install it into the currently active Python environment. + +However, this comes with a few caveats: + +1. `Halide_USE_FETCHCONTENT` is disabled, so the environment must be prepared + for CMake to find its dependencies. This is easiest to do by setting either + `CMAKE_PREFIX_PATH` to pre-built dependencies or by setting + `CMAKE_TOOLCHAIN_FILE` to vcpkg. +2. The build settings are fixed, meaning that `wabt` is required on non-Windows + systems, `flatbuffers` is always required, and the Python bindings must be + built. +3. The generated wheel will likely only work on your system. In particular, it + will not be repaired with `auditwheel` or `delocate`. + +Even so, this is a very good method of installing Halide. It supports both +Python and C++ `find_package` workflows. + + +[ATLAS]: http://math-atlas.sourceforge.net/ + +[BuildingHalideWithCMake.md]: ./BuildingHalideWithCMake.md + +[Clang]: https://clang.llvm.org + +[CodeStyleCMake.md]: ./CodeStyleCMake.md + +[Eigen3CMake]: https://eigen.tuxfamily.org/dox/TopicCMakeGuide.html + +[Eigen3]: http://eigen.tuxfamily.org/index.php?title=Main_Page + +[FindBLAS]: https://cmake.org/cmake/help/latest/module/FindBLAS.html + +[FindCUDAToolkit]: https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html + +[FindCUDA]: https://cmake.org/cmake/help/latest/module/FindCUDA.html + +[FindDoxygen]: https://cmake.org/cmake/help/latest/module/FindDoxygen.html + +[FindJPEG]: https://cmake.org/cmake/help/latest/module/FindJPEG.html + +[FindOpenCL]: https://cmake.org/cmake/help/latest/module/FindOpenCL.html + +[FindPNG]: https://cmake.org/cmake/help/latest/module/FindPNG.html + +[FindPython]: https://cmake.org/cmake/help/latest/module/FindPython.html + +[HalideCMakePackage.md]: ./HalideCMakePackage.md + +[LLVM]: https://github.com/llvm/llvm-project + +[Ninja]: https://ninja-build.org/ + +[OpenBLAS]: https://www.openblas.net/ + +[V8]: https://v8.dev + +[WebAssembly.md]: ./WebAssembly.md + +[add_subdirectory]: https://cmake.org/cmake/help/latest/command/add_subdirectory.html + +[brew-cmake]: https://formulae.brew.sh/cask/cmake#default + +[build_shared_libs]: https://cmake.org/cmake/help/latest/variable/BUILD_SHARED_LIBS.html + +[cmake-apt]: https://apt.kitware.com/ + +[cmake-docs]: https://cmake.org/cmake/help/latest/ + +[cmake-download]: https://cmake.org/download/ + +[cmake-from-source]: https://cmake.org/install/ + +[cmake-install]: https://cmake.org/cmake/help/latest/manual/cmake.1.html#install-a-project + +[cmake-user-interaction]: https://cmake.org/cmake/help/latest/guide/user-interaction/index.html#setting-build-variables + +[cmake_build_type]: https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html + +[cmake_presets]: https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html + +[doxygen-download]: https://www.doxygen.nl/download.html + +[doxygen]: https://www.doxygen.nl/index.html + +[enable_testing]: https://cmake.org/cmake/help/latest/command/enable_testing.html + +[fetchcontent]: https://cmake.org/cmake/help/latest/module/FetchContent.html + +[find_package]: https://cmake.org/cmake/help/latest/command/find_package.html + +[flatbuffers]: https://github.com/google/flatbuffers + +[homebrew]: https://brew.sh + +[libjpeg]: https://www.libjpeg-turbo.org/ + +[libpng]: http://www.libpng.org/pub/png/libpng.html + +[lld]: https://lld.llvm.org/ + +[msvc-cmd]: https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line + +[ninja-download]: https://github.com/ninja-build/ninja/releases + +[pipx]: https://pipx.pypa.io/stable/ + +[pybind11]: https://github.com/pybind/pybind11 + +[pypi-cmake]: https://pypi.org/project/cmake/ + +[python]: https://www.python.org/downloads/ + +[snap store]: https://snapcraft.io/cmake + +[vcpkg-overlay]: https://learn.microsoft.com/en-us/vcpkg/concepts/overlay-ports + +[vcpkg]: https://github.com/Microsoft/vcpkg + +[vcvarsall]: https://docs.microsoft.com/en-us/cpp/build/building-on-the-command-line#developer_command_file_locations + +[venv]: https://docs.python.org/3/tutorial/venv.html + +[vs-cmake-docs]: https://docs.microsoft.com/en-us/cpp/build/cmake-projects-in-visual-studio + +[wabt]: https://github.com/WebAssembly/wabt + +[winget]: https://learn.microsoft.com/en-us/windows/package-manager/winget/ diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 7b549bd84ed5..9e26ba175bde 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -8,15 +8,17 @@ set(DOXYGEN_CLASS_DIAGRAMS NO) set(DOXYGEN_DISTRIBUTE_GROUP_DOC YES) set(DOXYGEN_EXAMPLE_PATH "${Halide_SOURCE_DIR}/tutorial") set(DOXYGEN_EXCLUDE bin) +set(DOXYGEN_EXCLUDE_PATTERNS README.md) set(DOXYGEN_EXTRACT_ALL YES) set(DOXYGEN_EXTRACT_LOCAL_CLASSES NO) -set(DOXYGEN_FILE_PATTERNS *.h) +set(DOXYGEN_FILE_PATTERNS *.h *.md) set(DOXYGEN_GENERATE_TREEVIEW YES) set(DOXYGEN_HIDE_FRIEND_COMPOUNDS YES) set(DOXYGEN_HIDE_IN_BODY_DOCS YES) set(DOXYGEN_HIDE_UNDOC_CLASSES YES) set(DOXYGEN_HIDE_UNDOC_MEMBERS YES) set(DOXYGEN_JAVADOC_AUTOBRIEF YES) +set(DOXYGEN_MARKDOWN_ID_STYLE GITHUB) set(DOXYGEN_QT_AUTOBRIEF YES) set(DOXYGEN_QUIET YES) set(DOXYGEN_RECURSIVE YES) @@ -27,8 +29,6 @@ set(DOXYGEN_SORT_MEMBER_DOCS NO) set(DOXYGEN_SOURCE_BROWSER YES) set(DOXYGEN_STRIP_CODE_COMMENTS NO) -doxygen_add_docs(doc - "${Halide_SOURCE_DIR}/src" - "${Halide_SOURCE_DIR}/test" - ALL +doxygen_add_docs(doc "doc/" "src/" "test/" ALL + WORKING_DIRECTORY "${Halide_SOURCE_DIR}" COMMENT "Generate HTML documentation") diff --git a/doc/CodeStyleCMake.md b/doc/CodeStyleCMake.md new file mode 100644 index 000000000000..6a08d1b77317 --- /dev/null +++ b/doc/CodeStyleCMake.md @@ -0,0 +1,393 @@ +# Contributing CMake code to Halide + +This document specifies the coding standards we adhere to when authoring new +CMake code. If you need directions for building Halide, +see [BuildingHalideWithCMake.md]. If you are looking for Halide's CMake package +documentation, see [HalideCMakePackage.md]. + +This document is necessary for two major reasons. First, due to its long +history, size, and dedication to backwards compatibility, CMake is _incredibly_ +difficult to learn and full of traps. Second, Halide bundles its own LLVM-based +native code generator, which CMake deeply does not expect. This means we +routinely push CMake's build model to its limit. + +Therefore, we must be careful to write high-quality CMake code so that it is +clear when CMake's limitations are being tested. While not comprehensive, the +guide outlines the code quality expectations we have as they apply to CMake. + +When contributing new CMake code to Halide, keep in mind that the minimum +version is 3.28. Therefore, it is not only possible, but _required_, to use +modern CMake best practices. + + +* [Contributing CMake code to Halide](#contributing-cmake-code-to-halide) +* [General guidelines and best practices](#general-guidelines-and-best-practices) + * [Prohibited commands list](#prohibited-commands-list) + * [Prohibited variables list](#prohibited-variables-list) +* [Adding tests](#adding-tests) +* [Adding apps](#adding-apps) + + +# General guidelines and best practices + +The following are some common mistakes that lead to subtly broken builds. + +- **Reading the build directory.** While setting up the build, the build + directory should be considered _write only_. Using the build directory as a + read/write temporary directory is acceptable as long as all temp files are + cleaned up by the end of configuration. +- **Not using [generator expressions][cmake-genex].** Declarative is better than + imperative and this is no exception. Conditionally adding to a target property + can leak unwanted details about the build environment into packages. Some + information is not accurate or available except via generator expressions, + e.g. the build configuration. +- **Using the wrong variable.** `CMAKE_SOURCE_DIR` doesn't always point to the + Halide source root. When someone uses Halide via + [`FetchContent`][FetchContent], it will point to _their_ source root instead. + The correct variable is [`Halide_SOURCE_DIR`][project-name_source_dir]. If you + want to know if the compiler is MSVC, check it directly with the + [`MSVC`][msvc] variable; don't use [`WIN32`][win32]. That will be wrong when + compiling with clang on Windows. In most cases, however, a generator + expression will be more appropriate. +- **Using directory properties.** Directory properties have vexing behavior and + are essentially deprecated from CMake 3.0+. Propagating target properties is + the way of the future. +- **Using the wrong visibility.** Target properties can be `PRIVATE`, + `INTERFACE`, or both (aka `PUBLIC`). Pick the most conservative one for each + scenario. Refer to the [transitive usage requirements][cmake-propagation] docs + for more information. +- **Needlessly expanding variables** The [`if`][cmake_if] and + [`foreach`][cmake_foreach] commands generally expand variables when provided + by name. Expanding such variables manually can unintentionally change the + behavior of the command. Use `foreach (item IN LISTS list)` instead of + `foreach (item ${list})`. Similarly, use `if (varA STREQUAL varB)` instead of + `if ("${varA}" STREQUAL "${varB}")` and _definitely_ don't use + `if (${varA} STREQUAL ${varB})` since that will fail (in the best case) if + either variable's value contains a semicolon (due to argument expansion). + +## Prohibited commands list + +As mentioned above, using directory properties is brittle, and they are +therefore _not allowed_. The following functions may not appear in any new CMake +code. + +| Command | Alternative | +|-------------------------------------|----------------------------------------------------------------------------------------------------| +| `add_compile_definitions` | Use [`target_compile_definitions`][target_compile_definitions] | +| `add_compile_options` | Use [`target_compile_options`][target_compile_options] | +| `add_definitions` | Use [`target_compile_definitions`][target_compile_definitions] | +| `add_link_options` | Use [`target_link_options`][target_link_options], but prefer not to use either | +| `include_directories` | Use [`target_include_directories`][target_include_directories] | +| `link_directories` | Use [`target_link_libraries`][target_link_libraries] | +| `link_libraries` | Use [`target_link_libraries`][target_link_libraries] | +| `remove_definitions` | [Generator expressions][cmake-genex] in [`target_compile_definitions`][target_compile_definitions] | +| `set_directory_properties` | Use (cache) variables or target properties | +| `set_property(DIRECTORY)` | Use (cache) variables or target properties (custom properties excluded, but require justification) | +| `target_link_libraries(target lib)` | Use [`target_link_libraries`][target_link_libraries] _with a visibility specifier_ (eg. `PRIVATE`) | + +As an example, it was once common practice to write code similar to this: + +```cmake +# WRONG: do not do this +include_directories(include) +add_library(my_lib source1.cpp ..) +``` + +However, this has two major pitfalls. First, it applies to _all_ targets created +in that directory, even those before the call to `include_directories` and those +created in [`include()`][include]-ed CMake files. As CMake files get larger and +more complex, this behavior gets harder to pinpoint. This is particularly vexing +when using the `link_libraries` or `add_definitions` commands. Second, this form +does not provide a way to _propagate_ the include directory to consumers of +`my_lib`. The correct way to do this is: + +```cmake +# CORRECT +add_library(my_lib source1.cpp ...) +target_sources( + my_lib + PUBLIC + FILE_SET HEADERS + BASE_DIRS include + FILES include/header1.h +) +``` + +This is better in many ways. It only affects the target in question. It +propagates the include path to the targets linking to it (via `PUBLIC`). It also +correctly exports the host-filesystem-specific include path when installing or +packaging the target and installs the headers themselves, too. + +If common properties need to be grouped together, use an INTERFACE target +(better) or write a function (worse). + +There are also several functions that are disallowed for other reasons: + +| Command | Reason | Alternative | +|---------------------------------|-----------------------------------------------------------------------------------|--------------------------------------------------------------------------------| +| `aux_source_directory` | Interacts poorly with incremental builds and Git | List source files explicitly | +| `build_command` | CTest internal function | Use CTest build-and-test mode via [`CMAKE_CTEST_COMMAND`][cmake_ctest_command] | +| `cmake_host_system_information` | Usually misleading information. | Inspect [toolchain][cmake-toolchains] variables and use generator expressions. | +| `cmake_policy(... OLD)` | OLD policies are deprecated by definition. | Instead, fix the code to work with the new policy. | +| `create_test_sourcelist` | We use our own unit testing solution | See the [adding tests](#adding-tests) section. | +| `define_property` | Adds unnecessary complexity | Use a cache variable. Exceptions under special circumstances. | +| `enable_language` | Halide is C/C++ only | [`FindCUDAToolkit`][FindCUDAToolkit], appropriately guarded. | +| `file(GLOB ...)` | Interacts poorly with incremental builds and Git | List source files explicitly. Allowed if not globbing for source files. | +| `fltk_wrap_ui` | Halide does not use FLTK | None | +| `include_external_msproject` | Halide must remain portable | Write a CMake package config file or find module. | +| `include_guard` | Use of recursive inclusion is not allowed | Write (recursive) functions. | +| `include_regular_expression` | Changes default dependency checking behavior | None | +| `load_cache` | Superseded by [`FetchContent`][FetchContent]/[`ExternalProject`][ExternalProject] | Use aforementioned modules | +| `macro` | CMake macros are not hygienic and are therefore error-prone | Use functions instead. | +| `site_name` | Privacy: do not want leak host name information | Provide a cache variable, generate a unique name. | +| `variable_watch` | Debugging helper | None. Not needed in production. | + +Do not introduce any dependencies via [`find_package`][find_package] +without broader approval. Importantly, never introduce a new use of +`FetchContent`; prefer to add dependencies to `vcpkg.json`. + +## Prohibited variables list + +Any variables that are specific to languages that are not enabled should, of +course, be avoided. But of greater concern are variables that are easy to misuse +or should not be overridden for our end-users. The following (non-exhaustive) +list of variables shall not be used in code merged into main. + +| Variable | Reason | Alternative | +|---------------------------------|-----------------------------------------------|---------------------------------------------------------------------------------------------------------| +| `CMAKE_ROOT` | Code smell | Rely on `find_package` search options; include `HINTS` if necessary | +| `CMAKE_DEBUG_TARGET_PROPERTIES` | Debugging helper | None | +| `CMAKE_FIND_DEBUG_MODE` | Debugging helper | None | +| `CMAKE_RULE_MESSAGES` | Debugging helper | None | +| `CMAKE_VERBOSE_MAKEFILE` | Debugging helper | None | +| `CMAKE_BACKWARDS_COMPATIBILITY` | Deprecated | None | +| `CMAKE_BUILD_TOOL` | Deprecated | `${CMAKE_COMMAND} --build` or [`CMAKE_MAKE_PROGRAM`][cmake_make_program] (but see below) | +| `CMAKE_CACHEFILE_DIR` | Deprecated | [`CMAKE_BINARY_DIR`][cmake_binary_dir], but see below | +| `CMAKE_CFG_INTDIR` | Deprecated | `$`, `$`, target resolution of [`add_custom_command`][add_custom_command], etc. | +| `CMAKE_CL_64` | Deprecated | [`CMAKE_SIZEOF_VOID_P`][cmake_sizeof_void_p] | +| `CMAKE_COMPILER_IS_*` | Deprecated | [`CMAKE__COMPILER_ID`][cmake_lang_compiler_id] | +| `CMAKE_HOME_DIRECTORY` | Deprecated | [`CMAKE_SOURCE_DIR`][cmake_source_dir], but see below | +| `CMAKE_DIRECTORY_LABELS` | Directory property | None | +| `CMAKE_BUILD_TYPE` | Only applies to single-config generators. | `$` | +| `CMAKE_*_FLAGS*` (w/o `_INIT`) | User-only | Write a [toolchain][cmake-toolchains] file with the corresponding `_INIT` variable | +| `CMAKE_COLOR_MAKEFILE` | User-only | None | +| `CMAKE_ERROR_DEPRECATED` | User-only | None | +| `CMAKE_CONFIGURATION_TYPES` | We only support the four standard build types | None | + +Of course feel free to insert debugging helpers _while developing_ but please +remove them before review. Finally, the following variables are allowed, but +their use must be motivated: + +| Variable | Reason | Alternative | +|------------------------------------------------|-----------------------------------------------------|----------------------------------------------------------------------------------------------| +| [`CMAKE_SOURCE_DIR`][cmake_source_dir] | Points to global source root, not Halide's. | [`Halide_SOURCE_DIR`][project-name_source_dir] or [`PROJECT_SOURCE_DIR`][project_source_dir] | +| [`CMAKE_BINARY_DIR`][cmake_binary_dir] | Points to global build root, not Halide's | [`Halide_BINARY_DIR`][project-name_binary_dir] or [`PROJECT_BINARY_DIR`][project_binary_dir] | +| [`CMAKE_MAKE_PROGRAM`][cmake_make_program] | CMake abstracts over differences in the build tool. | Prefer CTest's build and test mode or CMake's `--build` mode | +| [`CMAKE_CROSSCOMPILING`][cmake_crosscompiling] | Often misleading. | Inspect relevant variables directly, eg. [`CMAKE_SYSTEM_NAME`][cmake_system_name] | +| [`BUILD_SHARED_LIBS`][build_shared_libs] | Could override user setting | None, but be careful to restore value when overriding for a dependency | + +Any use of these functions or variables will block a PR. + +# Adding tests + +When adding a file to any of the folders under `test`, be aware that CI expects +that every `.c` and `.cpp` appears in the `CMakeLists.txt` file _on its own +line_, possibly as a comment. This is to avoid globbing and also to ensure that +added files are not missed. + +For most test types, it should be as simple as adding to the existing lists, +which must remain in alphabetical order. Generator tests are trickier, but +following the existing examples is a safe way to go. + +# Adding apps + +If you're contributing a new app to Halide: great! Thank you! There are a few +guidelines you should follow when writing a new app. + +- Write the app as if it were a top-level project. You should call + `find_package(Halide)` and set the C++ version to 11. +- Call [`enable_testing()`][enable_testing] and add a small test that runs the + app. +- Don't assume your app will have access to a GPU. Write your schedules to be + robust to varying buildbot hardware. +- Don't assume your app will be run on a specific OS, architecture, or bitness. + Write your apps to be robust (ideally efficient) on all supported platforms. +- If you rely on any additional packages, don't include them as `REQUIRED`, + instead test to see if their targets are available and, if not, call + `return()` before creating any targets. In this case, print a + `message(STATUS "[SKIP] ...")`, too. +- Look at the existing apps for examples. +- Test your app with ctest before opening a PR. Apps are built as part of the + test, rather than the main build. + +[BuildingHalideWithCMake.md]: ./BuildingHalideWithCMake.md + +[CodeStyleCMake.md]: ./CodeStyleCMake.md + +[ExternalProject]: https://cmake.org/cmake/help/latest/module/ExternalProject.html + +[FetchContent]: https://cmake.org/cmake/help/latest/module/FetchContent.html + +[FindCUDAToolkit]: https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html + +[HalideCMakePackage.md]: ./HalideCMakePackage.md + +[add_custom_command]: https://cmake.org/cmake/help/latest/command/add_custom_command.html + +[add_library]: https://cmake.org/cmake/help/latest/command/add_library.html + +[add_subdirectory]: https://cmake.org/cmake/help/latest/command/add_subdirectory.html + +[atlas]: http://math-atlas.sourceforge.net/ + +[brew-cmake]: https://formulae.brew.sh/cask/cmake#default + +[build_shared_libs]: https://cmake.org/cmake/help/latest/variable/BUILD_SHARED_LIBS.html + +[cmake-apt]: https://apt.kitware.com/ + +[cmake-discourse]: https://discourse.cmake.org/ + +[cmake-docs]: https://cmake.org/cmake/help/latest/ + +[cmake-download]: https://cmake.org/download/ + +[cmake-from-source]: https://cmake.org/install/ + +[cmake-genex]: https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + +[cmake-install]: https://cmake.org/cmake/help/latest/manual/cmake.1.html#install-a-project + +[cmake-propagation]: https://cmake.org/cmake/help/latest/manual/cmake-buildsystem.7.html#transitive-usage-requirements + +[cmake-toolchains]: https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html + +[cmake-user-interaction]: https://cmake.org/cmake/help/latest/guide/user-interaction/index.html#setting-build-variables + +[cmake_binary_dir]: https://cmake.org/cmake/help/latest/variable/CMAKE_BINARY_DIR.html + +[cmake_build_type]: https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html + +[cmake_crosscompiling]: https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html + +[cmake_crosscompiling_emulator]: https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING_EMULATOR.html + +[cmake_ctest_command]: https://cmake.org/cmake/help/latest/variable/CMAKE_CTEST_COMMAND.html + +[cmake_current_binary_dir]: https://cmake.org/cmake/help/latest/variable/CMAKE_CURRENT_BINARY_DIR.html + +[cmake_cxx_extensions]: https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_EXTENSIONS.html + +[cmake_cxx_standard]: https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD.html + +[cmake_cxx_standard_required]: https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD_REQUIRED.html + +[cmake_foreach]: https://cmake.org/cmake/help/latest/command/foreach.html + +[cmake_if]: https://cmake.org/cmake/help/latest/command/if.html + +[cmake_lang_compiler_id]: https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER_ID.html + +[cmake_make_program]: https://cmake.org/cmake/help/latest/variable/CMAKE_MAKE_PROGRAM.html + +[cmake_minimum_required]: https://cmake.org/cmake/help/latest/command/cmake_minimum_required.html + +[cmake_prefix_path]: https://cmake.org/cmake/help/latest/variable/CMAKE_PREFIX_PATH.html + +[cmake_presets]: https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html + +[cmake_sizeof_void_p]: https://cmake.org/cmake/help/latest/variable/CMAKE_SIZEOF_VOID_P.html + +[cmake_source_dir]: https://cmake.org/cmake/help/latest/variable/CMAKE_SOURCE_DIR.html + +[cmake_system_name]: https://cmake.org/cmake/help/latest/variable/CMAKE_SYSTEM_NAME.html + +[doxygen-download]: https://www.doxygen.nl/download.html + +[doxygen]: https://www.doxygen.nl/index.html + +[eigen]: http://eigen.tuxfamily.org/index.php?title=Main_Page + +[enable_testing]: https://cmake.org/cmake/help/latest/command/enable_testing.html + +[find_package]: https://cmake.org/cmake/help/latest/command/find_package.html + +[findcuda]: https://cmake.org/cmake/help/latest/module/FindCUDA.html + +[finddoxygen]: https://cmake.org/cmake/help/latest/module/FindDoxygen.html + +[findjpeg]: https://cmake.org/cmake/help/latest/module/FindJPEG.html + +[findopencl]: https://cmake.org/cmake/help/latest/module/FindOpenCL.html + +[findpng]: https://cmake.org/cmake/help/latest/module/FindPNG.html + +[findpython3]: https://cmake.org/cmake/help/latest/module/FindPython3.html + +[findx11]: https://cmake.org/cmake/help/latest/module/FindX11.html + +[halide-generator-tutorial]: https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html + +[halide-tutorials]: https://halide-lang.org/tutorials/tutorial_introduction.html + +[homebrew]: https://brew.sh + +[imported-executable]: https://cmake.org/cmake/help/latest/command/add_executable.html#imported-executables + +[imported-target]: https://cmake.org/cmake/help/latest/manual/cmake-buildsystem.7.html#imported-targets + +[include]: https://cmake.org/cmake/help/latest/command/include.html + +[install-files]: https://cmake.org/cmake/help/latest/command/install.html#files + +[install-targets]: https://cmake.org/cmake/help/latest/command/install.html#targets + +[libjpeg]: https://www.libjpeg-turbo.org/ + +[libpng]: http://www.libpng.org/pub/png/libpng.html + +[lld]: https://lld.llvm.org/ + +[msvc-cmd]: https://docs.microsoft.com/en-us/cpp/build/building-on-the-command-line + +[msvc]: https://cmake.org/cmake/help/latest/variable/MSVC.html + +[ninja-download]: https://github.com/ninja-build/ninja/releases + +[ninja]: https://ninja-build.org/ + +[openblas]: https://www.openblas.net/ + +[project-name_binary_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT-NAME_BINARY_DIR.html + +[project-name_source_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT-NAME_SOURCE_DIR.html + +[project]: https://cmake.org/cmake/help/latest/command/project.html + +[project_binary_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT_BINARY_DIR.html + +[project_source_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT_SOURCE_DIR.html + +[pypi-cmake]: https://pypi.org/project/cmake/ + +[python]: https://www.python.org/downloads/ + +[target-file]: https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html#target-dependent-queries + +[target_compile_definitions]: https://cmake.org/cmake/help/latest/command/target_compile_definitions.html + +[target_compile_options]: https://cmake.org/cmake/help/latest/command/target_compile_options.html + +[target_include_directories]: https://cmake.org/cmake/help/latest/command/target_include_directories.html + +[target_link_libraries]: https://cmake.org/cmake/help/latest/command/target_link_libraries.html + +[target_link_options]: https://cmake.org/cmake/help/latest/command/target_link_options.html + +[vcpkg]: https://github.com/Microsoft/vcpkg + +[vcvarsall]: https://docs.microsoft.com/en-us/cpp/build/building-on-the-command-line#vcvarsall-syntax + +[venv]: https://docs.python.org/3/tutorial/venv.html + +[win32]: https://cmake.org/cmake/help/latest/variable/WIN32.html diff --git a/README_fuzz_testing.md b/doc/FuzzTesting.md similarity index 100% rename from README_fuzz_testing.md rename to doc/FuzzTesting.md diff --git a/doc/HalideCMakePackage.md b/doc/HalideCMakePackage.md new file mode 100644 index 000000000000..ed0a073ab400 --- /dev/null +++ b/doc/HalideCMakePackage.md @@ -0,0 +1,812 @@ +# Using Halide from your CMake build + +This is a detailed guide to building your own Halide programs with the official +CMake package. If you need directions for building Halide, +see [BuildingHalideWithCMake.md]. If you are looking for Halide's CMake coding +guidelines, see [CodeStyleCMake.md]. + +This document assumes some basic familiarity with CMake but tries to be explicit +in all its examples. To learn more about CMake, consult the +[documentation][cmake-docs] and engage with the community on +the [CMake Discourse][cmake-discourse]. + + +* [Using Halide from your CMake build](#using-halide-from-your-cmake-build) +* [A basic CMake project](#a-basic-cmake-project) + * [JIT mode](#jit-mode) + * [AOT mode](#aot-mode) + * [Autoschedulers](#autoschedulers) + * [RunGenMain](#rungenmain) +* [Halide package documentation](#halide-package-documentation) + * [Components](#components) + * [Variables](#variables) + * [Imported targets](#imported-targets) + * [Functions](#functions) + * [`add_halide_generator`](#add_halide_generator) + * [`add_halide_library`](#add_halide_library) + * [`add_halide_python_extension_library`](#add_halide_python_extension_library) + * [`add_halide_runtime`](#add_halide_runtime) +* [Cross compiling](#cross-compiling) + * [Use `add_halide_generator`](#use-add_halide_generator) + * [Use a super-build](#use-a-super-build) + * [Use `ExternalProject` directly](#use-externalproject-directly) + * [Use an emulator or run on device](#use-an-emulator-or-run-on-device) + * [Bypass CMake](#bypass-cmake) + + +# A basic CMake project + +There are two main ways to use Halide in your application: as a **JIT compiler** +for dynamic pipelines or an **ahead-of-time (AOT) compiler** for static +pipelines. CMake provides robust support for both use cases. + +No matter how you intend to use Halide, you will need some basic CMake +boilerplate. + +```cmake +cmake_minimum_required(VERSION 3.28) +project(HalideExample) + +set(CMAKE_CXX_STANDARD 17) # or newer +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS NO) + +find_package(Halide REQUIRED) +``` + +The [`cmake_minimum_required`][cmake_minimum_required] command is required to be +the first command executed in a CMake program. It disables all the deprecated +behavior ("policies" in CMake lingo) from earlier versions. The +[`project`][project] command sets the name of the project (and accepts arguments +for versioning, language support, etc.) and is required by CMake to be called +immediately after setting the minimum version. + +The next three variables set the project-wide C++ standard. The first, +[`CMAKE_CXX_STANDARD`][cmake_cxx_standard], simply sets the standard version. +Halide requires at least C++17. The second, +[`CMAKE_CXX_STANDARD_REQUIRED`][cmake_cxx_standard_required], tells CMake to +fail if the compiler cannot provide the requested standard version. Lastly, +[`CMAKE_CXX_EXTENSIONS`][cmake_cxx_extensions] tells CMake to disable +vendor-specific extensions to C++. This is not necessary to simply use Halide, +but we do not allow such extensions in the Halide repo. + +Finally, we use [`find_package`][find_package] to locate Halide on your system. +When using the pip package on Linux and macOS, CMake's `find_package` +command should find Halide as long as you're in the same virtual environment you +installed it in. On Windows, you will need to add the virtual environment root +directory to [`CMAKE_PREFIX_PATH`][cmake_prefix_path]: + +```shell +$ cmake -G Ninja -S . -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=%VIRTUAL_ENV% +``` + +If `find_package` cannot find Halide, set `CMAKE_PREFIX_PATH` to the Halide +installation directory. + +## JIT mode + +To use Halide in JIT mode (like the [tutorials][halide-tutorials] do, for +example), you can simply link to `Halide::Halide`. + +```cmake +# ... same project setup as before ... +add_executable(my_halide_app main.cpp) +target_link_libraries(my_halide_app PRIVATE Halide::Halide) +``` + +Then `Halide.h` will be available to your code and everything should just work. +That's it! + +## AOT mode + +Using Halide in AOT mode is more complicated so we'll walk through it step by +step. Note that this only applies to Halide generators, so it might be useful to +re-read the [tutorial on generators][halide-generator-tutorial]. Assume (like in +the tutorial) that you have a source file named `my_generators.cpp` and that in +it, you have generator classes `MyFirstGenerator` and `MySecondGenerator` with +registered names `my_first_generator` and `my_second_generator` respectively. + +Then the first step is to add a **generator executable** to your build: + +```cmake +# ... same project setup as before ... +add_halide_generator(my_generators SOURCES my_generators.cpp) +``` + +Using the generator executable, we can add a Halide library corresponding to +`MyFirstGenerator`. + +```cmake +# ... continuing from above +add_halide_library(my_first_generator FROM my_generators) +``` + +This will create a static library target in CMake that corresponds to the output +of running your generator. The second generator in the file requires generator +parameters to be passed to it. These are also easy to handle: + +```cmake +# ... continuing from above +add_halide_library(my_second_generator FROM my_generators + PARAMS parallel=false scale=3.0 rotation=ccw output.type=uint16) +``` + +Adding multiple configurations is easy, too: + +```cmake +# ... continuing from above +add_halide_library(my_second_generator_2 FROM my_generators + GENERATOR my_second_generator + PARAMS scale=9.0 rotation=ccw output.type=float32) + +add_halide_library(my_second_generator_3 FROM my_generators + GENERATOR my_second_generator + PARAMS parallel=false output.type=float64) +``` + +Here, we had to specify which generator to use (`my_second_generator`) since it +uses the target name by default. The functions in these libraries will be named +after the target names, `my_second_generator_2` and `my_second_generator_3`, by +default, but it is possible to control this via the `FUNCTION_NAME` parameter. + +Each one of these targets, ``, carries an associated `.runtime` +target, which is also a static library containing the Halide runtime. It is +transitively linked through `` to targets that link to ``. On an +operating system like Linux, where weak linking is available, this is not an +issue. However, on Windows, this can fail due to symbol redefinitions. In these +cases, you must declare that two Halide libraries share a runtime, like so: + +```cmake +# ... updating above +add_halide_library(my_second_generator_2 FROM my_generators + GENERATOR my_second_generator + USE_RUNTIME my_first_generator.runtime + PARAMS scale=9.0 rotation=ccw output.type=float32) + +add_halide_library(my_second_generator_3 FROM my_generators + GENERATOR my_second_generator + USE_RUNTIME my_first_generator.runtime + PARAMS parallel=false output.type=float64) +``` + +This will even work correctly when different combinations of targets are +specified for each halide library. A "greatest common denominator" target will +be chosen that is compatible with all of them (or the build will fail). + +### Autoschedulers + +When the autoschedulers are included in the release package, they are very +simple to apply to your own generators. For example, we could update the +definition of the `my_first_generator` library above to use the `Adams2019` +autoscheduler: + +```cmake +add_halide_library(my_second_generator FROM my_generators + AUTOSCHEDULER Halide::Adams2019) +``` + +### RunGenMain + +Halide provides a generic driver for generators to be used during development +for benchmarking and debugging. Suppose you have a generator executable called +`my_gen` and a generator within called `my_filter`. Then you can pass a variable +name to the `REGISTRATION` parameter of `add_halide_library` which will contain +the name of a generated C++ source that should be linked to `Halide::RunGenMain` +and `my_filter`. + +For example: + +```cmake +add_halide_library(my_filter FROM my_gen + REGISTRATION filter_reg_cpp) +add_executable(runner ${filter_reg_cpp}) +target_link_libraries(runner PRIVATE my_filter Halide::RunGenMain) +``` + +Then you can run, debug, and benchmark your generator through the `runner` +executable. Learn how to interact with these executables +in [RunGen.md](./RunGen.md). + +# Halide package documentation + +Halide provides a CMake _package configuration_ module. The intended way to use +the CMake build is to run `find_package(Halide ...)` in your `CMakeLists.txt` +file. Closely read the [`find_package` documentation][find_package] before +proceeding. + +## Components + +The Halide package script understands a handful of optional components when +loading the package. + +First, if you plan to use the Halide Image IO library, you will want to include +the `png` and `jpeg` components when loading Halide. + +Second, Halide releases can contain a variety of configurations: static, shared, +debug, release, etc. CMake handles Debug/Release configurations automatically, +but generally only allows one type of library to be loaded. + +The package understands two components, `static` and `shared`, that specify +which type of library you would like to load. For example, if you want to make +sure that you link against shared Halide, you can write: + +```cmake +find_package(Halide REQUIRED COMPONENTS shared) +``` + +If the shared libraries are not available, this will result in a failure. + +If no component is specified, then the `Halide_SHARED_LIBS` variable is checked. +If it is defined and set to true, then the shared libraries will be loaded or +the package loading will fail. Similarly, if it is defined and set to false, the +static libraries will be loaded. + +If no component is specified and `Halide_SHARED_LIBS` is _not_ defined, then the +[`BUILD_SHARED_LIBS`][build_shared_libs] variable will be inspected. If it is +**not defined** or **defined and set to true**, then it will attempt to load the +shared libs and fall back to the static libs if they are not available. +Similarly, if `BUILD_SHARED_LIBS` is **defined and set to false**, then it will +try the static libs first then fall back to the shared libs. + +To ensure that the Python bindings are available, include the `Python` +component. + +## Variables + +Variables that control package loading: + +| Variable | Description | +|-----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Halide_SHARED_LIBS` | override `BUILD_SHARED_LIBS` when loading the Halide package via `find_package`. Has no effect when using Halide via `add_subdirectory` as a Git or `FetchContent` submodule. | +| `Halide_RUNTIME_NO_THREADS` | skip linking of Threads library to runtime. Should be set if your toolchain does not support it (e.g. baremetal). | +| `Halide_RUNTIME_NO_DL_LIBS` | skip linking of DL library to runtime. Should be set if your toolchain does not support it (e.g. baremetal). | + +Variables set by the package: + +| Variable | Description | +|----------------------------|--------------------------------------------------------------------| +| `Halide_VERSION` | The full version string of the loaded Halide package | +| `Halide_VERSION_MAJOR` | The major version of the loaded Halide package | +| `Halide_VERSION_MINOR` | The minor version of the loaded Halide package | +| `Halide_VERSION_PATCH` | The patch version of the loaded Halide package | +| `Halide_VERSION_TWEAK` | The tweak version of the loaded Halide package | +| `Halide_HOST_TARGET` | The Halide target triple corresponding to "host" for this build. | +| `Halide_CMAKE_TARGET` | The Halide target triple corresponding to the active CMake target. | +| `Halide_ENABLE_EXCEPTIONS` | Whether Halide was compiled with exception support | +| `Halide_ENABLE_RTTI` | Whether Halide was compiled with RTTI | +| `WITH_AUTOSCHEDULERS` | Whether the autoschedulers are available | + +Variables that control package behavior: + +| Variable | Description | +|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------| +| `Halide_PYTHON_LAUNCHER` | Semicolon separated list containing a command to launch the Python interpreter. Can be used to set environment variables for Python generators. | +| `Halide_NO_DEFAULT_FLAGS` | Off by default. When enabled, suppresses recommended compiler flags that would be added by `add_halide_generator` | + +## Imported targets + +Halide defines the following targets that are available to users: + +| Imported target | Description | +|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Halide::Halide` | this is the JIT-mode library to use when using Halide from C++. | +| `Halide::Generator` | this is the target to use when manually defining a generator executable. It supplies a `main()` function. | +| `Halide::Runtime` | adds include paths to the Halide runtime headers | +| `Halide::Tools` | adds include paths to the Halide tools, including the benchmarking utility. | +| `Halide::ImageIO` | adds include paths to the Halide image IO utility. Depends on `PNG::PNG` and `JPEG::JPEG` if they exist or were loaded through the corresponding package components. | +| `Halide::ThreadPool` | adds include paths to the Halide _simple_ thread pool utility library. This is not the same as the runtime's thread pool and is intended only for use by tests. Depends on `Threads::Threads`. | +| `Halide::RunGenMain` | used with the `REGISTRATION` parameter of `add_halide_library` to create simple runners and benchmarking tools for Halide libraries. | + +The following targets only guaranteed when requesting the `Python` component +(`Halide_Python_FOUND` will be true): + +| Imported target | Description | +|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Halide::Python` | this is a Python 3 package that can be referenced as `$/..` when setting up `PYTHONPATH` for Python tests or the like from CMake. | + +The following targets only guaranteed when `WITH_AUTOSCHEDULERS` is true: + +| Imported target | Description | +|-------------------------|-----------------------------------------------------------------| +| `Halide::Adams2019` | the Adams et.al. 2019 autoscheduler (no GPU support) | +| `Halide::Anderson2021` | the Anderson, et.al. 2021 autoscheduler (full GPU support) | +| `Halide::Li2018` | the Li et.al. 2018 gradient autoscheduler (limited GPU support) | +| `Halide::Mullapudi2016` | the Mullapudi et.al. 2016 autoscheduler (no GPU support) | + +## Functions + +The Halide package provides several useful functions for dealing with AOT +compilation steps. + +### `add_halide_generator` + +This function aids in creating cross-compilable builds that use Halide +generators. + +``` +add_halide_generator( + target + [PACKAGE_NAME package-name] + [PACKAGE_NAMESPACE namespace] + [EXPORT_FILE export-file] + [PYSTUB generator-name] + [LINK_LIBRARIES lib1 ...] + [[SOURCES] source1 ...] +) +``` + +Every named argument is optional, and the function uses the following default +arguments: + +- If `PACKAGE_NAME` is not provided, it defaults to + `${PROJECT_NAME}-halide_generators`. +- If `PACKAGE_NAMESPACE` is not provided, it defaults to + `${PROJECT_NAME}::halide_generators::`. +- If `EXPORT_FILE` is not provided, it defaults to + `${PROJECT_BINARY_DIR}/cmake/${ARG_PACKAGE_NAME}-config.cmake` + +This function guarantees that a Halide generator target named +`` is available. It will first search for a package named +`` using `find_package`; if it is found, it is assumed that it +provides the target. Otherwise, it will create an executable target named +`target` and an `ALIAS` target ``. This function also creates +a custom target named `` if it does not exist and +`` would exist. In this case, `` will depend on +``, this enables easy building of _just_ the Halide generators managed +by this function. + +After the call, `_FOUND` will be set to true if the host +generators were imported (and hence won't be built). Otherwise, it will be set +to false. This variable may be used to conditionally set properties on +``. + +Please +see [test/integration/xc](https://github.com/halide/Halide/tree/main/test/integration/xc) +for a simple example +and [apps/hannk](https://github.com/halide/Halide/tree/main/apps/hannk) for a +complete app that uses it extensively. + +The `SOURCES` keyword marks the beginning of sources to be used to build +``, if it is not loaded. All unparsed arguments will be interpreted as +sources. + +The `LINK_LIBRARIES` argument lists libraries that should be linked to +`` when it is being built in the present build system. + +If `PYSTUB` is specified, then a Python Extension will be built that wraps the +Generator with CPython glue to allow use of the Generator Python 3. The result +will be a shared library of the form +`_pystub..so`, where `` describes the specific Python +version and platform (e.g., `cpython-310-darwin` for Python 3.10 on macOS). See +[Python.md](Python.md) for examples of use. + +### `add_halide_library` + +This is the main function for managing generators in AOT compilation. The full +signature follows: + +``` +add_halide_library( FROM + [GENERATOR generator-name] + [FUNCTION_NAME function-name] + [NAMESPACE cpp-namespace] + [USE_RUNTIME hl-target] + [PARAMS param1 [param2 ...]] + [TARGETS target1 [target2 ...]] + [FEATURES feature1 [feature2 ...]] + [FEATURES[] feature1 [feature2 ...]] + [PLUGINS plugin1 [plugin2 ...]] + [AUTOSCHEDULER scheduler-name] + [FUNCTION_INFO_HEADER OUTVAR] + [HEADER OUTVAR] + [REGISTRATION OUTVAR] + [ OUTVAR] + [GRADIENT_DESCENT] + [C_BACKEND] + [NO_THREADS] + [NO_DL_LIBS]) + +triple = -- +arch = x86 | arm | powerpc | hexagon | wasm | riscv +bits = 32 | 64 +os = linux | windows | osx | android | ios | qurt | noos | fuchsia | wasmrt + +extra-output = ASSEMBLY | BITCODE | COMPILER_LOG | C_SOURCE | FEATURIZATION + | HLPIPE | LLVM_ASSEMBLY | PYTHON_EXTENSION | PYTORCH_WRAPPER + | SCHEDULE | STMT | STMT_HTML +``` + +This function creates a called `` corresponding to running the +`` (an executable target which links to `Halide::Generator`) +one time, using command line arguments derived from the other parameters. + +The arguments `GENERATOR` and `FUNCTION_NAME` default to ``. They +correspond to the `-g` and `-f` command line flags, respectively. + +`NAMESPACE` is syntactic sugar to specify the C++ namespace (if any) of the +generated function; you can also specify the C++ namespace (if any) directly in +the `FUNCTION_NAME` argument, but for repeated declarations or very long +namespaces, specifying this separately can provide more readable build files. + +If `USE_RUNTIME` is not specified, this function will create another target +called `.runtime` which corresponds to running the generator with `-r` +and a compatible list of targets. This runtime target is an `INTERFACE` +dependency of ``. If multiple runtime targets need to be linked +together, setting `USE_RUNTIME` to another Halide runtime library, `` +will prevent the generation of `.runtime` and instead use +`.runtime`. This argument is most commonly used in conjunction with [ +`add_halide_runtime`](#add_halide_runtime). + +Parameters can be passed to a generator via the `PARAMS` argument. Parameters +should be space-separated. Similarly, `TARGETS` is a space-separated list of +targets for which to generate code in a single function. They must all share the +same platform/bits/os triple (e.g. `arm-32-linux`). Features that are in common +among all targets, including device libraries (like `cuda`) should go in +`FEATURES`. If `TARGETS` is not specified, the value of `Halide_TARGET` +specified at configure time will be used. + +Every element of `TARGETS` must begin with the same `arch-bits-os` triple. This +function understands two _meta-triples_, `host` and `cmake`. The meta-triple +`host` is equal to the `arch-bits-os` triple used to compile Halide along with +all the supported instruction set extensions. On platforms that support running +both 32 and 64-bit programs, this will not necessarily equal the platform the +compiler is running on or that CMake is targeting. + +The meta-triple `cmake` is equal to the `arch-bits-os` of the current CMake +target. This is useful if you want to make sure you are not unintentionally +cross-compiling, which would result in an [`IMPORTED` target][imported-target] +being created. When `TARGETS` is empty and the `host` target would not +cross-compile, then `host` will be used. Otherwise, `cmake` will be used and an +author warning will be issued. + +When `CMAKE_OSX_ARCHITECTURES` is set and the `TARGETS` argument resolves to +`cmake`, the generator will be run once for each architecture and the results +will be fused together using `lipo`. This behavior extends to runtime targets. + +To use an autoscheduler, set the `AUTOSCHEDULER` argument to a target named like +`Namespace::Scheduler`, for example `Halide::Adams2019`. This will set the +`autoscheduler` GeneratorParam on the generator command line to `Scheduler` +and add the target to the list of plugins. Additional plugins can be loaded by +setting the `PLUGINS` argument. If the argument to `AUTOSCHEDULER` does not +contain `::` or it does not name a target, it will be passed to the `-s` flag +verbatim. + +If `GRADIENT_DESCENT` is set, then the module will be built suitably for +gradient descent calculation in TensorFlow or PyTorch. See +`Generator::build_gradient_module()` for more documentation. This corresponds to +passing `-d 1` at the generator command line. + +If the `C_BACKEND` option is set, this command will invoke the configured C++ +compiler on a generated source. Note that a `.runtime` target is _not_ +created in this case, and the `USE_RUNTIME` option is ignored. Other options +work as expected. + +If `REGISTRATION` is set, the path (relative to `CMAKE_CURRENT_BINARY_DIR`) +to the generated `.registration.cpp` file will be set in `OUTVAR`. This can be +used to generate a runner for a Halide library that is useful for benchmarking +and testing, as documented above. This is equivalent to setting +`-e registration` at the generator command line. + +If `HEADER` is set, the path (relative to `CMAKE_CURRENT_BINARY_DIR`) to the +generated `.h` header file will be set in `OUTVAR`. This can be used with +`install(FILES)` to conveniently deploy the generated header along with your +library. + +If `FUNCTION_INFO_HEADER` is set, the path (relative to +`CMAKE_CURRENT_BINARY_DIR`) to the generated `.function_info.h` header file will +be set in `OUTVAR`. This produces a file that contains `constexpr` +descriptions of information about the generated functions (e.g., argument type +and information). It is generated separately from the normal `HEADER` +file because `HEADER` is intended to work with basic `extern "C"` linkage, while +`FUNCTION_INFO_HEADER` requires C++17 or later to use effectively. +(This can be quite useful for advanced usages, such as producing automatic call +wrappers, etc.) Examples of usage can be found in the generated file. + +Each of the `extra-output` arguments directly correspond to an extra output (via +`-e`) from the generator. The value `OUTVAR` names a variable into which a +path (relative to +[`CMAKE_CURRENT_BINARY_DIR`][cmake_current_binary_dir]) to the extra file will +be written. + +When `NO_THREADS` is passed, the library targets will not depend on +`Threads::Threads`. It is your responsibility to link to an equivalent target. + +When `NO_DL_LIBS` is passed, the library targets will not depend on +`${CMAKE_DL_LIBS}`. It is your responsibility to link to an equivalent library. + +### `add_halide_python_extension_library` + +This function wraps the outputs of one or more `add_halide_library` targets with +glue code to produce a Python Extension library. + +``` +add_halide_python_extension_library( + target + [MODULE_NAME module-name] + HALIDE_LIBRARIES library1 ... +) +``` + +`HALIDE_LIBRARIES` is a list of one of more `add_halide_library` targets. Each +will be added to the extension as a callable method of the module. Note that +every library specified must be built with the `PYTHON_EXTENSION` keyword +specified, and all libraries must use the same Halide runtime. + +The result will be a shared library of the form `..so`, where +`` describes the specific Python version and platform (e.g., +`cpython-310-darwin` for Python 3.10 on macOS.) + +### `add_halide_runtime` + +This function generates a library containing a Halide runtime. Most user code +will never need to use this, as `add_halide_library()` will call it for you if +necessary. The most common use case is usually in conjunction with +`add_halide_python_extension_library()`, as a way to ensure that all the halide +libraries share an identical runtime. + +``` +add_halide_runtime( + target + [TARGETS target1 [target2 ...]] + [NO_THREADS] + [NO_DL_LIBS] +) +``` + +The `TARGETS`, `NO_THREADS`, and `NO_DL_LIBS` arguments have identical semantics +to the argument of the same name for [ +`add_halide_library`](#add_halide_library). + +# Cross compiling + +Cross-compiling in CMake can be tricky, since CMake doesn't easily support +compiling for both the host platform and the cross platform within the same +build. Unfortunately, Halide generator executables are just about always +designed to run on the host platform. Each project will be set up differently +and have different requirements, but here are some suggestions for effective use +of CMake in these scenarios. + +## Use `add_halide_generator` + +If you are writing new programs that use Halide, you might wish to use +`add_halide_generator`. When using this helper, you are expected to build your +project twice: once for your build host and again for your intended target. + +When building the host build, you can use the `` (see the +documentation above) target to build _just_ the generators. Then, in the target +build, set `_ROOT` to the host build directory. + +For example: + +``` +$ cmake -G Ninja -S . -B build-host -DCMAKE_BUILD_TYPE=Release +$ cmake --build build-host --target +$ cmake -G Ninja -S . -B build-target --toolchain /path/to/target-tc.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -D_ROOT:FILEPATH=$PWD/build-host +$ cmake --build build-target +``` + +## Use a super-build + +A CMake super-build consists of breaking down a project into subprojects that +are isolated by [toolchain][cmake-toolchains]. The basic structure is to have an +outermost project that only coordinates the sub-builds via the +[`ExternalProject`][ExternalProject] module. + +One would then use Halide to build a generator executable in one self-contained +project, then export that target to be used in a separate project. The second +project would be configured with the target [toolchain][cmake-toolchains] and +would call `add_halide_library` with no `TARGETS` option and set `FROM` equal to +the name of the imported generator executable. Obviously, this is a significant +increase in complexity over a typical CMake project. + +This is very compatible with the `add_halide_generator` strategy above. + +## Use `ExternalProject` directly + +A lighter weight alternative to the above is to use +[`ExternalProject`][ExternalProject] directly in your parent build. Configure +the parent build with the target [toolchain][cmake-toolchains], and configure +the inner project to use the host toolchain. Then, manually create an +[`IMPORTED` target][imported-executable] for your generator executable and call +`add_halide_library` as described above. + +The main drawback of this approach is that creating accurate `IMPORTED` targets +is difficult since predicting the names and locations of your binaries across +all possible platform and CMake project generators is difficult. In particular, +it is hard to predict executable extensions in cross-OS builds. + +## Use an emulator or run on device + +The [`CMAKE_CROSSCOMPILING_EMULATOR`][cmake_crosscompiling_emulator] variable +allows one to specify a command _prefix_ to run a target-system binary on the +host machine. One could set this to a custom shell script that uploads the +generator executable, runs it on the device and copies back the results. + +Another option is to install `qemu-user-static` to transparently emulate the +cross-built generator. + +## Bypass CMake + +The previous two options ensure that the targets generated by +`add_halide_library` will be _normal_ static libraries. This approach does not +use [`ExternalProject`][ExternalProject], but instead produces `IMPORTED` +targets. The main drawback of `IMPORTED` targets is that they are considered +second-class in CMake. In particular, they cannot be installed with the typical +[`install(TARGETS)` command][install-targets]. Instead, they must be installed +using [`install(FILES)`][install-files] and the +[`$`][target-file] generator expression. + + +[BuildingHalideWithCMake.md]: ./BuildingHalideWithCMake.md + +[CodeStyleCMake.md]: ./CodeStyleCMake.md + +[ExternalProject]: https://cmake.org/cmake/help/latest/module/ExternalProject.html + +[HalideCMakePackage.md]: ./HalideCMakePackage.md + +[add_custom_command]: https://cmake.org/cmake/help/latest/command/add_custom_command.html + +[add_library]: https://cmake.org/cmake/help/latest/command/add_library.html + +[add_subdirectory]: https://cmake.org/cmake/help/latest/command/add_subdirectory.html + +[atlas]: http://math-atlas.sourceforge.net/ + +[brew-cmake]: https://formulae.brew.sh/cask/cmake#default + +[build_shared_libs]: https://cmake.org/cmake/help/latest/variable/BUILD_SHARED_LIBS.html + +[cmake-apt]: https://apt.kitware.com/ + +[cmake-discourse]: https://discourse.cmake.org/ + +[cmake-docs]: https://cmake.org/cmake/help/latest/ + +[cmake-download]: https://cmake.org/download/ + +[cmake-from-source]: https://cmake.org/install/ + +[cmake-genex]: https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + +[cmake-install]: https://cmake.org/cmake/help/latest/manual/cmake.1.html#install-a-project + +[cmake-propagation]: https://cmake.org/cmake/help/latest/manual/cmake-buildsystem.7.html#transitive-usage-requirements + +[cmake-toolchains]: https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html + +[cmake-user-interaction]: https://cmake.org/cmake/help/latest/guide/user-interaction/index.html#setting-build-variables + +[cmake_binary_dir]: https://cmake.org/cmake/help/latest/variable/CMAKE_BINARY_DIR.html + +[cmake_build_type]: https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html + +[cmake_crosscompiling]: https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html + +[cmake_crosscompiling_emulator]: https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING_EMULATOR.html + +[cmake_ctest_command]: https://cmake.org/cmake/help/latest/variable/CMAKE_CTEST_COMMAND.html + +[cmake_current_binary_dir]: https://cmake.org/cmake/help/latest/variable/CMAKE_CURRENT_BINARY_DIR.html + +[cmake_cxx_extensions]: https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_EXTENSIONS.html + +[cmake_cxx_standard]: https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD.html + +[cmake_cxx_standard_required]: https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD_REQUIRED.html + +[cmake_foreach]: https://cmake.org/cmake/help/latest/command/foreach.html + +[cmake_if]: https://cmake.org/cmake/help/latest/command/if.html + +[cmake_lang_compiler_id]: https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER_ID.html + +[cmake_make_program]: https://cmake.org/cmake/help/latest/variable/CMAKE_MAKE_PROGRAM.html + +[cmake_minimum_required]: https://cmake.org/cmake/help/latest/command/cmake_minimum_required.html + +[cmake_prefix_path]: https://cmake.org/cmake/help/latest/variable/CMAKE_PREFIX_PATH.html + +[cmake_presets]: https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html + +[cmake_sizeof_void_p]: https://cmake.org/cmake/help/latest/variable/CMAKE_SIZEOF_VOID_P.html + +[cmake_source_dir]: https://cmake.org/cmake/help/latest/variable/CMAKE_SOURCE_DIR.html + +[cmake_system_name]: https://cmake.org/cmake/help/latest/variable/CMAKE_SYSTEM_NAME.html + +[doxygen-download]: https://www.doxygen.nl/download.html + +[doxygen]: https://www.doxygen.nl/index.html + +[eigen]: http://eigen.tuxfamily.org/index.php?title=Main_Page + +[enable_testing]: https://cmake.org/cmake/help/latest/command/enable_testing.html + +[fetchcontent]: https://cmake.org/cmake/help/latest/module/FetchContent.html + +[find_package]: https://cmake.org/cmake/help/latest/command/find_package.html + +[findcuda]: https://cmake.org/cmake/help/latest/module/FindCUDA.html + +[findcudatoolkit]: https://cmake.org/cmake/help/latest/module/FindCUDAToolkit.html + +[finddoxygen]: https://cmake.org/cmake/help/latest/module/FindDoxygen.html + +[findjpeg]: https://cmake.org/cmake/help/latest/module/FindJPEG.html + +[findopencl]: https://cmake.org/cmake/help/latest/module/FindOpenCL.html + +[findpng]: https://cmake.org/cmake/help/latest/module/FindPNG.html + +[findpython3]: https://cmake.org/cmake/help/latest/module/FindPython3.html + +[findx11]: https://cmake.org/cmake/help/latest/module/FindX11.html + +[halide-generator-tutorial]: https://halide-lang.org/tutorials/tutorial_lesson_15_generators.html + +[halide-tutorials]: https://halide-lang.org/tutorials/tutorial_introduction.html + +[homebrew]: https://brew.sh + +[imported-executable]: https://cmake.org/cmake/help/latest/command/add_executable.html#imported-executables + +[imported-target]: https://cmake.org/cmake/help/latest/manual/cmake-buildsystem.7.html#imported-targets + +[include]: https://cmake.org/cmake/help/latest/command/include.html + +[install-files]: https://cmake.org/cmake/help/latest/command/install.html#files + +[install-targets]: https://cmake.org/cmake/help/latest/command/install.html#targets + +[libjpeg]: https://www.libjpeg-turbo.org/ + +[libpng]: http://www.libpng.org/pub/png/libpng.html + +[lld]: https://lld.llvm.org/ + +[msvc-cmd]: https://docs.microsoft.com/en-us/cpp/build/building-on-the-command-line + +[msvc]: https://cmake.org/cmake/help/latest/variable/MSVC.html + +[ninja-download]: https://github.com/ninja-build/ninja/releases + +[ninja]: https://ninja-build.org/ + +[openblas]: https://www.openblas.net/ + +[project-name_binary_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT-NAME_BINARY_DIR.html + +[project-name_source_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT-NAME_SOURCE_DIR.html + +[project]: https://cmake.org/cmake/help/latest/command/project.html + +[project_binary_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT_BINARY_DIR.html + +[project_source_dir]: https://cmake.org/cmake/help/latest/variable/PROJECT_SOURCE_DIR.html + +[pypi-cmake]: https://pypi.org/project/cmake/ + +[python]: https://www.python.org/downloads/ + +[target-file]: https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html#target-dependent-queries + +[target_compile_definitions]: https://cmake.org/cmake/help/latest/command/target_compile_definitions.html + +[target_compile_options]: https://cmake.org/cmake/help/latest/command/target_compile_options.html + +[target_include_directories]: https://cmake.org/cmake/help/latest/command/target_include_directories.html + +[target_link_libraries]: https://cmake.org/cmake/help/latest/command/target_link_libraries.html + +[target_link_options]: https://cmake.org/cmake/help/latest/command/target_link_options.html + +[vcpkg]: https://github.com/Microsoft/vcpkg + +[vcvarsall]: https://docs.microsoft.com/en-us/cpp/build/building-on-the-command-line#vcvarsall-syntax + +[venv]: https://docs.python.org/3/tutorial/venv.html + +[win32]: https://cmake.org/cmake/help/latest/variable/WIN32.html diff --git a/doc/Hexagon.md b/doc/Hexagon.md new file mode 100644 index 000000000000..2bcdc8c99c16 --- /dev/null +++ b/doc/Hexagon.md @@ -0,0 +1,73 @@ +# Halide for Hexagon HVX + +Halide supports offloading work to Qualcomm Hexagon DSP on Qualcomm Snapdragon +845/710 devices or newer. The Hexagon DSP provides a set of 128 byte vector +instruction extensions - the Hexagon Vector eXtensions (HVX). HVX is well suited +for image processing, and Halide for Hexagon HVX will generate the appropriate +HVX vector instructions from a program authored in Halide. + +Halide can be used to compile Hexagon object files directly, by using a target +such as `hexagon-32-qurt-hvx`. + +Halide can also be used to offload parts of a pipeline to Hexagon using the +`hexagon` scheduling directive. To enable the `hexagon` scheduling directive, +include the `hvx` target feature in your target. The currently supported +combination of targets is to use the HVX target features with an x86 linux +host (to use the simulator) or with an ARM android target (to use Hexagon DSP +hardware). For examples of using the `hexagon` scheduling directive on both the +simulator and a Hexagon DSP, see the blur example app. + +To build and run an example app using the Hexagon target, + +1. Obtain and build trunk LLVM and Clang. (Earlier versions of LLVM may work but + are not actively tested and thus not recommended.) +2. Download and install the Hexagon SDK and Hexagon Tools. Hexagon SDK 4.3.0 or + later is needed. Hexagon Tools 8.4 or later is needed. +3. Build and run an example for Hexagon HVX + +## 1. Obtain and build trunk LLVM and Clang + +(Follow the instructions given previously, just be sure to check out the `main` +branch.) + +## 2. Download and install the Hexagon SDK and Hexagon Tools + +Go to https://qpm.qualcomm.com/#/main/home + +1. Go to Tools, and download Qualcomm Package Manager 3. Install the package + manager on your machine. +2. Run the installed Qualcomm Package Manager and install the Qualcomm Hexagon + SDK 5.x (or 4.x). The SDK can be selected from the Qualcomm Hexagon SDK + Products. +3. Set an environment variable to point to the SDK installation location + ``` + export SDK_LOC=/location/of/SDK + ``` + +## 3. Build and run an example for Hexagon HVX + +In addition to running Hexagon code on device, Halide also supports running +Hexagon code on the simulator from the Hexagon tools. + +To build and run the blur example in Halide/apps/blur on the simulator: + +``` +cd apps/blur +export HL_HEXAGON_SIM_REMOTE=../../src/runtime/hexagon_remote/bin/v65/hexagon_sim_remote +export HL_HEXAGON_TOOLS=$SDK_LOC/Hexagon_Tools/8.x/Tools/ +LD_LIBRARY_PATH=../../src/runtime/hexagon_remote/bin/host/:$HL_HEXAGON_TOOLS/lib/iss/:. HL_TARGET=host-hvx make test +``` + +## To build and run the blur example in Halide/apps/blur on Android: + +To build the example for Android, first ensure that you have Android NDK r19b or +later installed, and the ANDROID_NDK_ROOT environment variable points to it. +(Note that Qualcomm Hexagon SDK v4.3.0 includes Android NDK r19c, which is +fine.) + +Now build and run the blur example using the script to run it on device: + +``` +export HL_HEXAGON_TOOLS=$SDK_LOC/HEXAGON_Tools/8.4.11/Tools/ +HL_TARGET=arm-64-android-hvx ./adb_run_on_device.sh +``` diff --git a/README_python.md b/doc/Python.md similarity index 56% rename from README_python.md rename to doc/Python.md index 0ac7f94abd96..48274fd6368d 100644 --- a/README_python.md +++ b/doc/Python.md @@ -1,38 +1,39 @@ # Halide Bindings for Python - - -- [Python Requirements](#python-requirements) -- [Compilation Instructions](#compilation-instructions) -- [Documentation and Examples](#documentation-and-examples) -- [Differences from C++ API](#differences-from-c-api) -- [Example of Simple Usage](#example-of-simple-usage) -- [Halide Generators In Python](#halide-generators-in-python) - - [Writing a Generator in Python](#writing-a-generator-in-python) - - [@hl.generator\("name"\)](#hlgeneratorname) - - [hl.GeneratorParam](#hlgeneratorparam) - - [hl.InputBuffer, hl.InputScalar](#hlinputbuffer-hlinputscalar) - - [hl.OutputBuffer, hl.OutputScalar](#hloutputbuffer-hloutputscalar) - - [Names](#names) - - [generate\(\) method](#generate-method) - - [Types for Inputs and Outputs](#types-for-inputs-and-outputs) - - [Using a Generator for JIT compilation](#using-a-generator-for-jit-compilation) - - [Using a Generator for AOT compilation](#using-a-generator-for-aot-compilation) - - [Calling Generator-Produced code from Python](#calling-generator-produced-code-from-python) - - [Advanced Generator-Related Topics](#advanced-generator-related-topics) - - [Generator Aliases](#generator-aliases) - - [Dynamic Inputs and Outputs](#dynamic-inputs-and-outputs) - - [Calling a Generator Directly](#calling-a-generator-directly) - - [The Lifecycle Of A Generator](#the-lifecycle-of-a-generator) - - [Notable Differences Between C++ and Python Generators](#notable-differences-between-c-and-python-generators) -- [Keeping Up To Date](#keeping-up-to-date) -- [License](#license) - - + +* [Halide Bindings for Python](#halide-bindings-for-python) + * [Acquiring the Python bindings](#acquiring-the-python-bindings) + * [Building the Python bindings](#building-the-python-bindings) + * [Using CMake directly](#using-cmake-directly) + * [Using wheel infrastructure](#using-wheel-infrastructure) + * [Documentation and Examples](#documentation-and-examples) + * [Differences from C++ API](#differences-from-c-api) + * [Example of Simple Usage](#example-of-simple-usage) + * [Halide Generators In Python](#halide-generators-in-python) + * [Writing a Generator in Python](#writing-a-generator-in-python) + * [`hl.generator("name")`](#hlgeneratorname) + * [hl.GeneratorParam](#hlgeneratorparam) + * [hl.InputBuffer, hl.InputScalar](#hlinputbuffer-hlinputscalar) + * [hl.OutputBuffer, hl.OutputScalar](#hloutputbuffer-hloutputscalar) + * [Names](#names) + * [generate() method](#generate-method) + * [Types for Inputs and Outputs](#types-for-inputs-and-outputs) + * [Using a Generator for JIT compilation](#using-a-generator-for-jit-compilation) + * [Using a Generator for AOT compilation](#using-a-generator-for-aot-compilation) + * [Calling Generator-Produced code from Python](#calling-generator-produced-code-from-python) + * [Advanced Generator-Related Topics](#advanced-generator-related-topics) + * [Generator Aliases](#generator-aliases) + * [Dynamic Inputs and Outputs](#dynamic-inputs-and-outputs) + * [Calling a Generator Directly](#calling-a-generator-directly) + * [The Lifecycle Of A Generator](#the-lifecycle-of-a-generator) + * [Notable Differences Between C++ and Python Generators](#notable-differences-between-c-and-python-generators) + * [Keeping Up To Date](#keeping-up-to-date) + * [License](#license) + Halide provides Python bindings for most of its public API. Python 3.8 (or -higher) is required. The Python bindings are supported on 64-bit Linux, OSX, -and Windows systems. +higher) is required. The Python bindings are supported on 64-bit Linux, OSX, and +Windows systems. In addition to the ability to write just-in-time Halide code using Python, you can write [Generators](#halide-generators-in-python) using the Python bindings, @@ -42,24 +43,88 @@ is required). You can also use existing Halide Generators (written in either C++ or Python) to produce Python extensions that can be used within Python code. -## Python Requirements +## Acquiring the Python bindings -Before building, you should ensure you have prerequite packages installed in -your local Python environment. The best way to get set up is to use a virtual -environment: +As of Halide 19.0.0, we provide binary wheels on PyPI which include the Python +bindings and the C++/CMake package for native development. Full releases may be +installed with `pip` like so: -```console +```shell +$ pip install halide +``` + +Every commit to `main` is published to Test PyPI as a development version and +these may be installed with a few extra flags: + +```shell +$ pip install halide --pre --extra-index-url https://test.pypi.org/simple +``` + +Currently, we provide wheels for: Windows x86-64, macOS x86-64, macOS arm64, and +Linux x86-64. The Linux wheels are built for manylinux_2_28, which makes them +broadly compatible (Debian 10, Ubuntu 18.10, Fedora 29). + +## Building the Python bindings + +If `pip` isn't enough for your purposes, or you are developing Halide directly, +you have two options for building and using the Python bindings. Note that the +bindings require Halide to be built with RTTI and exceptions **enabled**, which +in turn requires LLVM to be built with RTTI, but this is not the default for +LLVM. + +### Using CMake directly + +Before configuring with CMake, you should ensure you have prerequisite packages +installed in your local Python environment. The best way to get set up is to use +a virtual environment: + +```shell $ python3 -m venv venv $ . venv/bin/activate -$ python3 -m pip install -U setuptools wheel +$ python3 -m pip install -U pip "setuptools[core]" wheel $ python3 -m pip install -r requirements.txt ``` -## Compilation Instructions +Then build and install Halide: + +```shell +$ cmake -G Ninja -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_PYTHON_BINDINGS=ON +$ cmake --build build +$ cmake --install build --prefix .local +``` + +Now you can set the `PYTHONPATH` variable to point to the freshly built Python +package: + +```shell +$ export PYTHONPATH="$PWD/.local/lib/python3/site-packages" +``` + +### Using wheel infrastructure + +You can also follow the same procedure that we use to build the published +wheels. First, create a virtual environment as before, but omit +`requirements.txt` + +```shell +$ python3 -m venv venv +$ . venv/bin/activate +$ python3 -m pip install -U pip "setuptools[core]" wheel +``` + +Next, ensure you have installed Halide's dependencies to locations where CMake +can find them, given your environment. The variables `Halide_LLVM_ROOT`, +`flatbuffers_ROOT`, and `wabt_ROOT` specify locations for the relevant packages +directly. If they are all installed to a common prefix, you can add it to the +environment variable `CMAKE_PREFIX_PATH`. + +Then it should be as simple as: -Build as part of the CMake build with `-DWITH_PYTHON_BINDINGS=ON` (this is the -default). Note that this requires both Halide and LLVM to be built with RTTI and -exceptions **enabled**, which is not the default for LLVM. +```shell +$ pip install . +``` ## Documentation and Examples @@ -84,90 +149,87 @@ from the Halide build directory. The Python bindings attempt to mimic the Halide C++ API as closely as possible, with some differences where the C++ idiom is either inappropriate or impossible: -- Most APIs that take a variadic argument list of ints in C++ take an explicit - list in Python. For instance, the usual version of the `Buffer` ctor in C++ - offers both variadic and list versions: +- Most APIs that take a variadic argument list of ints in C++ take an explicit + list in Python. For instance, the usual version of the `Buffer` ctor in C++ + offers both variadic and list versions: - ``` - Buffer<>(Type t, int extent_dim_0, int extent_dim_1, ...., extent_dim_N, string name = ""); - Buffer<>(Type t, vector extents, string name = ""); - ``` + ```cpp + Buffer<>(Type t, int extent_dim_0, int extent_dim_1, ...., extent_dim_N, string name = ""); + Buffer<>(Type t, vector extents, string name = ""); + ``` - In Python, only the second variant is provided. + In Python, only the second variant is provided. -- `Func` and `Buffer` access is done using `[]` rather than `()` +- `Func` and `Buffer` access is done using `[]` rather than `()` - - For zero-dimensional `Func` and `Buffer`, you must explicitly specify - `[()]` -- that is, use an empty tuple as the index -- because `[]` is - not syntactically acceptable in Python. + - For zero-dimensional `Func` and `Buffer`, you must explicitly specify + `[()]` -- that is, use an empty tuple as the index -- because `[]` is not + syntactically acceptable in Python. -- Some classes in the Halide API aren't provided because standard Python - idioms are a better fit: +- Some classes in the Halide API aren't provided because standard Python idioms + are a better fit: - - `Halide::Tuple` doesn't exist in the Python bindings; an ordinary Python - tuple of `Halide::Expr` is used instead. - - `Halide::Realization` doesn't exist in the Python bindings; an ordinary - Python tuple of `Halide::Buffer` is used instead. + - `Halide::Tuple` doesn't exist in the Python bindings; an ordinary Python + tuple of `Halide::Expr` is used instead. + - `Halide::Realization` doesn't exist in the Python bindings; an ordinary + Python tuple of `Halide::Buffer` is used instead. -- static and instance method overloads with the same name in the same class - aren't allowed, so some convenience methods are missing from `Halide::Var` +- static and instance method overloads with the same name in the same class + aren't allowed, so some convenience methods are missing from `Halide::Var` -- Templated types (notably `Halide::Buffer<>` and `Halide::Param<>`) aren't - provided, for obvious reasons; only the equivalents of - `Halide::Buffer` and `Halide::Param` are supported. +- Templated types (notably `Halide::Buffer<>` and `Halide::Param<>`) aren't + provided, for obvious reasons; only the equivalents of + `Halide::Buffer` and `Halide::Param` are supported. -- The functions in `Halide::ConciseCasts` are present in the toplevel Halide - module in Python, rather than a submodule: e.g., use `halide.i8_sat()`, not - `halide.ConciseCasts.i8_sat()`. +- The functions in `Halide::ConciseCasts` are present in the toplevel Halide + module in Python, rather than a submodule: e.g., use `halide.i8_sat()`, not + `halide.ConciseCasts.i8_sat()`. -- Only things in the `Halide` namespace are supported; classes and methods - that involve using the `Halide::Internal` namespace are not provided. +- Only things in the `Halide` namespace are supported; classes and methods that + involve using the `Halide::Internal` namespace are not provided. -- No mechanism is provided for overriding any runtime functions from Python - for JIT-compiled code. (Runtime functions for AOT-compiled code can be - overridden by building and linking a custom runtime, but not currently - via any runtime API, e.g. halide_set_custom_print() does not exist.) +- No mechanism is provided for overriding any runtime functions from Python for + JIT-compiled code. (Runtime functions for AOT-compiled code can be overridden + by building and linking a custom runtime, but not currently via any runtime + API, e.g. halide_set_custom_print() does not exist.) -- No mechanism is provided for supporting `Func::define_extern`. +- No mechanism is provided for supporting `Func::define_extern`. -- `Buffer::for_each_value()` isn't supported yet. +- `Buffer::for_each_value()` isn't supported yet. -- `Func::in` becomes `Func.in_` because `in` is a Python keyword. +- `Func::in` becomes `Func.in_` because `in` is a Python keyword. -- `Func::async` becomes `Func.async_` because `async` is a Python keyword. +- `Func::async` becomes `Func.async_` because `async` is a Python keyword. -- The `not` keyword cannot be used to negate boolean Halide expressions. - Instead, the `logical_not` function can be used and is equivalent to using - `operator!` in C++. +- The `not` keyword cannot be used to negate boolean Halide expressions. + Instead, the `logical_not` function can be used and is equivalent to using + `operator!` in C++. -- There is no way to override the logical `and`/`or` operators in Python to - work with `Expr`: you must use the bitwise `|` and `&` instead. (Note that - incorrectly using using `and`/`or` just short-circuits weirdly, rather than - failing with some helpful error; this is an issue that we have not yet found - any way to improve, unfortunately.) +- There is no way to override the logical `and`/`or` operators in Python to work + with `Expr`: you must use the bitwise `|` and `&` instead. (Note that + incorrectly using `and`/`or` just short-circuits weirdly, rather than failing + with some helpful error; this is an issue that we have not yet found any way + to improve, unfortunately.) -- Some error messages need to be made more informative. +- Some error messages need to be made more informative. -- Some exceptions are the "incorrect" type (compared to C++ expectations). +- Some exceptions are the "incorrect" type (compared to C++ expectations). -- Many hooks to override runtime functions (e.g. Func::set_error_handler) - aren't yet implemented. +- Many hooks to override runtime functions (e.g. Func::set_error_handler) + aren't yet implemented. -- The following parts of the Halide public API are currently missing entirely - from the Python bindings (but are all likely to be supported at some point - in the future): +- The following parts of the Halide public API are currently missing entirely + from the Python bindings (but are all likely to be supported at some point in + the future): - - `DeviceInterface` - - `evaluate()` + - `DeviceInterface` + - `evaluate()` ## Example of Simple Usage -The Python bindings for Halide are built as a standard part of the `install` -target, and are present in the Halide install location at -`$HALIDE_INSTALL/lib/python3/site-packages`; adding that to your `PYTHONPATH` -should allow you to simply `import halide`: +Here is a basic example of using Halide to produce a procedural image. -``` +```python # By convention, we import halide as 'hl' for terseness import halide as hl @@ -187,6 +249,7 @@ buf = f.realize([edge, edge, 3]) # Do something with the image. We'll just save it to a PNG. from halide import imageio + imageio.imwrite("/tmp/example.png", buf) ``` @@ -202,13 +265,13 @@ objects without any explicit conversion necessary. In Halide, a "Generator" is a unit of encapsulation for Halide code. It is a self-contained piece of code that can: -- Produce a chunk of Halide IR (in the form of an `hl.Pipeline`) that is - appropriate for compilation (via either JIT or AOT) -- Expose itself to the build system in a discoverable way -- Fully describe itself for the build system with metadata for (at least) the - type and number of inputs and outputs expected -- Allow for build-time customization of coder-specified parameters in a way - that doesn't require editing of source code +- Produce a chunk of Halide IR (in the form of an `hl.Pipeline`) that is + appropriate for compilation (via either JIT or AOT) +- Expose itself to the build system in a discoverable way +- Fully describe itself for the build system with metadata for (at least) the + type and number of inputs and outputs expected +- Allow for build-time customization of coder-specified parameters in a way that + doesn't require editing of source code Originally, Halide only supported writing Generators in C++. In this document, we'll use the term "C++ Generator" to mean "Generator written in C++ using the @@ -220,42 +283,43 @@ neutral with respect to the implementation language/API. A Python Generator is a class that: -- has the `@hl.generator` decorator applied to it -- declares zero or more member fields that are initialized with values of - `hl.InputBuffer` or `hl.InputScalar`, which specify the expected input(s) of - the resulting `Pipeline`. -- declares one or more member fields that are initialized with values of - `hl.OutputBuffer` or `hl.OutputScalar`, which specify the expected output(s) - of the resulting `Pipeline`. -- declares zero or more member fields that are initialized with values of - `hl.GeneratorParam`, which can be used to pass arbitrary information from - the build system to the Generator. A GeneratorParam can carry a value of - type `bool`, `int`, `float`, `str`, or `hl.Type`. -- declares a `generate()` method that fill in the Halide IR needed to define - all of the Outputs -- optionally declares a `configure()` method to dynamically add Inputs or - Outputs to the pipeline, based on (e.g.) the values of `GeneratorParam` - values or other external inputs +- has the `@hl.generator` decorator applied to it +- declares zero or more member fields that are initialized with values of + `hl.InputBuffer` or `hl.InputScalar`, which specify the expected input(s) of + the resulting `Pipeline`. +- declares one or more member fields that are initialized with values of + `hl.OutputBuffer` or `hl.OutputScalar`, which specify the expected output(s) + of the resulting `Pipeline`. +- declares zero or more member fields that are initialized with values of + `hl.GeneratorParam`, which can be used to pass arbitrary information from the + build system to the Generator. A GeneratorParam can carry a value of type + `bool`, `int`, `float`, `str`, or `hl.Type`. +- declares a `generate()` method that fill in the Halide IR needed to define all + the Outputs +- optionally declares a `configure()` method to dynamically add Inputs or + Outputs to the pipeline, based on (e.g.) the values of `GeneratorParam` + values or other external inputs Let's look at a fairly simple example: > **TODO:** this example is pretty contrived; is there an equally simple > Generator to use here that would demonstrate the basics? -``` +```python import halide as hl x = hl.Var('x') y = hl.Var('y') _operators = { - 'xor': lambda a, b: a ^ b, - 'and': lambda a, b: a & b, - 'or': lambda a, b: a | b + 'xor': lambda a, b: a ^ b, + 'and': lambda a, b: a & b, + 'or': lambda a, b: a | b } + # Apply a mask value to a 2D image using a logical operator that is selected at compile-time. -@hl.generator(name = "logical_op_generator") +@hl.generator(name="logical_op_generator") class LogicalOpGenerator: op = hl.GeneratorParam("xor") @@ -273,6 +337,7 @@ class LogicalOpGenerator: v = g.natural_vector_size(hl.UInt(8)) g.output.vectorize(x, v) + if __name__ == "__main__": hl.main() ``` @@ -283,16 +348,16 @@ C++ Generators for the following to make sense.) Let's take the details here one at a time. -#### @hl.generator("name") +#### `hl.generator("name")` This decorator adds appropriate "glue" machinery to the class to enforce various invariants. It also serves as the declares a "registered name" for the Generator, which is a unique name that the build system will use to identify the -Generator. If you omit the name, it defaults to defaults to `module.classname`; -if module is `__main__` then we omit it and just use the plain classname. Note -that the registered name need not match the classname. (Inside Halide, we use -the convention of `CamelCase` for class names and `snake_case` for registered -names, but you can use whatever convention you like.) +Generator. If you omit the name, it defaults to `module.classname`; if module is +`__main__` then we omit it and just use the plain classname. Note that the +registered name need not match the classname. (Inside Halide, we use the +convention of `CamelCase` for class names and `snake_case` for registered names, +but you can use whatever convention you like.) #### hl.GeneratorParam @@ -305,9 +370,9 @@ Note that the type of the default value *is* used to define the expected type of the `GeneratorParam`, and trying to set it to an incompatible value will throw an exception. The types that are acceptable to use in a `GeneratorParam` are: -- Python's `bool`, `int`, `float`, or `str` -- Halide's `hl.Type` -- ...that's all +- Python's `bool`, `int`, `float`, or `str` +- Halide's `hl.Type` +- ...that's all Note that the value of a `GeneratorParam` is read-only from the point of view of the Generator; they are set at Generator construction time and attempting to @@ -355,8 +420,8 @@ in the build system. #### Names -Note that all of the GeneratorParams, Inputs, and Outputs have names that are -implicitly filled in based on the fieldname of their initial assignment; unlike +Note that all the GeneratorParams, Inputs, and Outputs have names that are +implicitly filled in based on the field name of their initial assignment; unlike in C++ Generators, there isn't a way to "override" this name (i.e., the name in the IR will always exactly match the Python field name). Names have the same constraints as for C++ Generators (essentially, a C identifier, but without an @@ -377,9 +442,9 @@ way required, but is recommended to improve readability.) #### Types for Inputs and Outputs -For all of the Input and Output fields of Generators, you can specify native -Python types (instead of `hl.Type`) for certain cases that are unambiguous. At -present, we allow `bool` as an alias for `hl.Bool()`, `int` as an alias for +For all the Input and Output fields of Generators, you can specify native Python +types (instead of `hl.Type`) for certain cases that are unambiguous. At present, +we allow `bool` as an alias for `hl.Bool()`, `int` as an alias for `hl.Int(32)`, and `float` as an alias for `hl.Float(32)`. ### Using a Generator for JIT compilation @@ -387,7 +452,7 @@ present, we allow `bool` as an alias for `hl.Bool()`, `int` as an alias for You can use the `compile_to_callable()` method to JIT-compile a Generator into a `hl.Callable`, which is (essentially) just a dynamically-created function. -``` +```python import LogicalOpGenerator from halide import imageio import numpy as np @@ -421,7 +486,7 @@ value of the `HL_JIT_TARGET` environment variable, if set); you can override this behavior selectively by activating a `GeneratorContext` when the Generator is *created*: -``` +```python import LogicalOpGenerator # Compile with debugging enabled @@ -436,7 +501,7 @@ with hl.GeneratorContext(t): If you are using CMake, the simplest thing is to use `add_halide_library` and `add_halide_python_extension_library()`: -``` +```cmake # Build a Halide library as you usually would, but be sure to include `PYTHON_EXTENSION` add_halide_library(xor_filter FROM logical_op_generator @@ -456,16 +521,18 @@ add_halide_python_extension_library(my_extension (Note that this rule works for both C++ and Python Generators.) This compiles the Generator code in `logical_op_generator.py` with the -registered name `logical_op_generator` to produce the target `xor_filter`, and then wraps -the compiled output with a Python extension. The result will be a shared library of the form -`..so`, where describes the specific Python version and -platform (e.g., `cpython-310-darwin` for Python 3.10 on OSX.) +registered name `logical_op_generator` to produce the target `xor_filter`, and +then wraps the compiled output with a Python extension. The result will be a +shared library of the form `..so`, where `` describes +the specific Python version and platform (e.g., `cpython-310-darwin` for +Python 3.10 on OSX.) Note that you can combine multiple Halide libraries into a single Python module; -this is convenient for packagaing, but also because all the libraries in a single -extension module share the same Halide runtime (and thus, the same caches, thread pools, etc.). +this is convenient for packaging, but also because all the libraries in a single +extension module share the same Halide runtime (and thus, the same caches, +thread pools, etc.). -``` +```cmake add_halide_library(xor_filter ...) add_halide_library(and_filter ...) add_halide_library(or_filter ...) @@ -475,11 +542,12 @@ add_halide_python_extension_library(my_extension HALIDE_LIBRARIES xor_filter and_filter or_filter) ``` -Note that you must take care to ensure that all of the `add_halide_library` targets -specified use the same Halide runtime; it may be necessary to use `add_halide_runtime` -to define an explicit runtime that is shared by all of the targets: +Note that you must take care to ensure that all of the `add_halide_library` +targets specified use the same Halide runtime; it may be necessary to use +`add_halide_runtime` +to define an explicit runtime that is shared by all the targets: -``` +```cmake add_halide_runtime(my_runtime) add_halide_library(xor_filter USE_RUNTIME my_runtime ...) @@ -495,7 +563,7 @@ If you're not using CMake, you can "drive" a Generator directly from your build system via command-line flags. The most common, minimal set looks something like this: -``` +```shell python3 /path/to/my/generator.py -g \ -o \ target= \ @@ -505,24 +573,25 @@ python3 /path/to/my/generator.py -g \ The argument to `-g` is the name supplied to the `@hl.generator` decorator. The argument to -o is a directory to use for the output files; by default, we'll produce a static library containing the object code, and a C++ header file with -a forward declaration. `target` specifies a Halide `Target` string decribing the -OS, architecture, features, etc that should be used for compilation. Any other -arguments to the command line that don't begin with `-` are presumed to name +a forward declaration. `target` specifies a Halide `Target` string describing +the OS, architecture, features, etc. that should be used for compilation. Any +other arguments to the command line that don't begin with `-` are presumed to +name `GeneratorParam` values to set. There are other flags and options too, of course; use `python3 /path/to/my/generator.py -help` to see a list with explanations. (Unfortunately, there isn't (yet) a way to produce a Python Extension just by -running a Generator; the logic for `add_halide_python_extension_library` is currently all -in the CMake helper files.) +running a Generator; the logic for `add_halide_python_extension_library` is +currently all in the CMake helper files.) ### Calling Generator-Produced code from Python As long as the shared library is in `PYTHONPATH`, it can be imported and used directly. For the example above: -``` +```python from my_module import xor_filter from halide import imageio import numpy as np @@ -548,17 +617,17 @@ Above, we're using common Python utilities (`numpy`) to construct the input/output buffers we want to pass to Halide. **Note**: Getting the memory order correct can be a little confusing for numpy. -By default numpy uses "C-style" +By default, numpy uses "C-style" [row-major](https://docs.scipy.org/doc/numpy-1.13.0/reference/internals.html) order, which sounds like the right option for Halide; however, this nomenclature assumes the matrix-math convention of ordering axes as `[rows, cols]`, whereas Halide (and imaging code in general) generally assumes `[x, y]` (i.e., `[cols, -rows]`). Thus what you usually want in Halide is column-major ordering. This +rows]`). Thus, what you usually want in Halide is column-major ordering. This means numpy arrays, by default, come with the wrong memory layout for Halide. But if you construct the numpy arrays yourself (like above), you can pass `order='F'` to make numpy use the Halide-compatible memory layout. If you're passing in an array constructed somewhere else, the easiest thing to do is to -`.transpose()` it before passing it to your Halide code.) +`.transpose()` it before passing it to your Halide code. ### Advanced Generator-Related Topics @@ -570,7 +639,7 @@ offers a convenient alternative to specifying multiple sets of GeneratorParams via the build system. To define alias(es) for a Generator, just add the `@hl.alias` decorator before `@hl.generator` decorator: -``` +```python @hl.alias( xor_generator={"op": "xor"}, and_generator={"op": "and"}, @@ -588,20 +657,21 @@ If you need to build `Input` and/or `Output` dynamically, you can define a are valid, but before `generate()` is called. Let's take our example and add an option to pass an offset to be added after the logical operator is done: -``` +```python import halide as hl x = hl.Var('x') y = hl.Var('y') _operators = { - 'xor': lambda a, b: a ^ b, - 'and': lambda a, b: a & b, - 'or': lambda a, b: a | b + 'xor': lambda a, b: a ^ b, + 'and': lambda a, b: a & b, + 'or': lambda a, b: a | b } + # Apply a mask value to a 2D image using a logical operator that is selected at compile-time. -@hl.generator(name = "logical_op_generator") +@hl.generator(name="logical_op_generator") class LogicalOpGenerator: op = hl.GeneratorParam("xor") with_offset = hl.GeneratorParam(False) @@ -614,7 +684,7 @@ class LogicalOpGenerator: def configure(g): # If with_offset is specified, we if g.with_offset: - g.add_input("offset", hl.InputScalar(hl.Int(32))) + g.add_input("offset", hl.InputScalar(hl.Int(32))) # See note the use of 'g' instead of 'self' here def generate(g): @@ -629,6 +699,7 @@ class LogicalOpGenerator: v = g.natural_vector_size(hl.UInt(8)) g.output.vectorize(x, v) + if __name__ == "__main__": hl.main() ``` @@ -649,7 +720,7 @@ it. This can be especially useful when writing library code, as you can This method is named `call()` and looks like this: -``` +```python @classmethod def call(cls, *args, **kwargs): ... @@ -661,7 +732,7 @@ which is a simple Python dict that allows for overriding `GeneratorParam`s. It returns a tuple of the Output values. For the earlier example, usage might be something like: -``` +```python import LogicalOpFilter x, y = hl.Var(), hl.Var() @@ -677,8 +748,8 @@ func_out = LogicalOpFilter.call(mask=mask_value, input=input_buf) # Above again, but with generator_params func_out = LogicalOpFilter.call(input_buf, mask_value, - generator_params = {"op": "and"}) -func_out = LogicalOpFilter.call(generator_params = {"op": and}, + generator_params={"op": "and"}) +func_out = LogicalOpFilter.call(generator_params={"op": "and"}, input=input_buf, mask=mask_value) ``` @@ -688,32 +759,32 @@ Whether being driven by a build system (for AOT use) or by another piece of Python code (typically for JIT use), the lifecycle of a Generator looks something like this: -- An instance of the Generator in question is created. It uses the - currently-active `GeneratorContext` (which contains the `Target` to be used - for code generation), which is stored in a thread-local stack. -- Some (or all) of the default values of the `GeneratorParam` members may be - replaced based on (e.g.) command-line arguments in the build system -- All `GeneratorParam` members are made immutable. -- The `configure()` method is called, allowing the Generator to use - `add_input()` or `add_output()` to dynamically add inputs and/or outputs. -- If any `Input` or `Output` members were defined with unspecified type or - dimensions (e.g. `some_input = hl.InputBuffer(None, 3)`), those types and - dimensions are filled in from `GeneratorParam` values (e.g. - `some_input.type` in this case). If any types or dimensions are left - unspecified after this step, an exception will be thrown. -- If the Generator is being invoked via its `call()` method (see below), the - default values for `Inputs` will be replaced by the values from the argument - list. -- The Generator instance has its `generate()` method called. -- The calling code will extract the values of all `Output` values and validate - that they match the type, dimensions, etc of the declarations. -- The calling code will then either call `compile_to_file()` and friends (for - AOT use), or return the output values to the caller (for JIT use). -- Finally, the Generator instance will be discarded, never to be used again. - -Note that almost all of the code doing the hand-wavy bits above is injected by -the `@hl.generator` decorator – the Generator author doesn't need to know or -care about the specific details, only that they happen. +- An instance of the Generator in question is created. It uses the + currently-active `GeneratorContext` (which contains the `Target` to be used + for code generation), which is stored in a thread-local stack. +- Some (or all) of the default values of the `GeneratorParam` members may be + replaced based on (e.g.) command-line arguments in the build system +- All `GeneratorParam` members are made immutable. +- The `configure()` method is called, allowing the Generator to use + `add_input()` or `add_output()` to dynamically add inputs and/or outputs. +- If any `Input` or `Output` members were defined with unspecified type or + dimensions (e.g. `some_input = hl.InputBuffer(None, 3)`), those types and + dimensions are filled in from `GeneratorParam` values (e.g. + `some_input.type` in this case). If any types or dimensions are left + unspecified after this step, an exception will be thrown. +- If the Generator is being invoked via its `call()` method (see below), the + default values for `Inputs` will be replaced by the values from the argument + list. +- The Generator instance has its `generate()` method called. +- The calling code will extract the values of all `Output` values and validate + that they match the type, dimensions, etc. of the declarations. +- The calling code will then either call `compile_to_file()` and friends (for + AOT use), or return the output values to the caller (for JIT use). +- Finally, the Generator instance will be discarded, never to be used again. + +Note that almost all the code doing the hand-wavy bits above is injected by the +`@hl.generator` decorator – the Generator author doesn't need to know or care +about the specific details, only that they happen. All Halide Generators are **single-use** instances – that is, any given Generator instance should be used at most once. If a Generator is to be executed @@ -726,35 +797,35 @@ If you have written C++ Generators in Halide in the past, you might notice some features are missing and/or different for Python Generators. Among the differences are: -- In C++, you can create a Generator, then call `set_generatorparam_value()` - to alter the values of GeneratorParams. In Python, there is no public - method to alter a GeneratorParam after the Generator is created; instead, - you must pass a dict of GeneratorParam values to the constructor, after - which the values are immutable for that Generator instance. -- Array Inputs/Outputs: in our experience, they are pretty rarely used, it - complicates the implementation in nontrivial ways, and the majority of use - cases for them can all be reasonably supported by dynamically adding inputs - or outputs (and saving the results in a local array). -- `Input` and `Output`: these were deliberately left out in order - to simplify Python Generators. It's possible that something similar might be - added in the future. -- GeneratorParams with LoopLevel types: these aren't useful without - `Input`/`Output`. -- GeneratorParams with Enum types: using a plain `str` type in Python is - arguably just as easy, if not easier. -- `get_externs_map()`: this allows registering ExternalCode objects to be - appended to the Generator's code. In our experience, this feature is very - rarely used. We will consider adding this in the future if necessary. -- Lazy Binding of Unspecified Input/Output Types: for C++ Generators, if you - left an Output's type (or dimensionality) unspecified, you didn't always - have to specify a `GeneratorParam` to make it into a concrete type: if the - type was always fully specified by the contents of the `generate()` method, - that was good enough. In Python Generators, by contrast, **all** types and - dimensions must be **explicitly** specified by either code declaration or by - `GeneratorParam` setting. This simplifies the internal code in nontrivial - ways, and also allows for (arguably) more readable code, since there are no - longer cases that require the reader to execute the code in their head in - order to deduce the output types. +- In C++, you can create a Generator, then call `set_generatorparam_value()` + to alter the values of GeneratorParams. In Python, there is no public method + to alter a GeneratorParam after the Generator is created; instead, you must + pass a dict of GeneratorParam values to the constructor, after which the + values are immutable for that Generator instance. +- Array Inputs/Outputs: in our experience, they are pretty rarely used, it + complicates the implementation in nontrivial ways, and the majority of use + cases for them can all be reasonably supported by dynamically adding inputs or + outputs (and saving the results in a local array). +- `Input` and `Output`: these were deliberately left out in order to + simplify Python Generators. It's possible that something similar might be + added in the future. +- GeneratorParams with LoopLevel types: these aren't useful without + `Input`/`Output`. +- GeneratorParams with Enum types: using a plain `str` type in Python is + arguably just as easy, if not easier. +- `get_externs_map()`: this allows registering ExternalCode objects to be + appended to the Generator's code. In our experience, this feature is very + rarely used. We will consider adding this in the future if necessary. +- Lazy Binding of Unspecified Input/Output Types: for C++ Generators, if you + left an Output's type (or dimensionality) unspecified, you didn't always have + to specify a `GeneratorParam` to make it into a concrete type: if the type was + always fully specified by the contents of the `generate()` method, that was + good enough. In Python Generators, by contrast, **all** types and dimensions + must be **explicitly** specified by either code declaration or by + `GeneratorParam` setting. This simplifies the internal code in nontrivial + ways, and also allows for (arguably) more readable code, since there are no + longer cases that require the reader to execute the code in their head in + order to deduce the output types. ## Keeping Up To Date @@ -767,8 +838,7 @@ in future releases. ## License The Python bindings use the same -[MIT license](https://github.com/halide/Halide/blob/main/LICENSE.txt) as -Halide. +[MIT license](https://github.com/halide/Halide/blob/main/LICENSE.txt) as Halide. Python bindings provided by Connelly Barnes (2012-2013), Fred Rotbart (2014), Rodrigo Benenson (2015) and the Halide open-source community. diff --git a/README_rungen.md b/doc/RunGen.md similarity index 100% rename from README_rungen.md rename to doc/RunGen.md diff --git a/README_vulkan.md b/doc/Vulkan.md similarity index 100% rename from README_vulkan.md rename to doc/Vulkan.md diff --git a/README_webassembly.md b/doc/WebAssembly.md similarity index 100% rename from README_webassembly.md rename to doc/WebAssembly.md diff --git a/README_webgpu.md b/doc/WebGPU.md similarity index 100% rename from README_webgpu.md rename to doc/WebGPU.md diff --git a/packaging/CMakeLists.txt b/packaging/CMakeLists.txt index 8c263522f29d..ba6e0597ecc2 100644 --- a/packaging/CMakeLists.txt +++ b/packaging/CMakeLists.txt @@ -17,6 +17,25 @@ set(Halide_INSTALL_PLUGINDIR "${CMAKE_INSTALL_LIBDIR}" set(Halide_INSTALL_TOOLSDIR "${CMAKE_INSTALL_DATADIR}/tools" CACHE STRING "Path to Halide build-time tools and sources") +## +# RPATH patching helper +## + +function(_Halide_compute_rpath) + cmake_parse_arguments(PARSE_ARGV 0 ARG "" "ORIGIN_DIR;LIB_DIR" "TARGETS") + if (APPLE) + set(rbase @loader_path) + else () + set(rbase $ORIGIN) + endif () + + file(RELATIVE_PATH lib_dir + ${CMAKE_CURRENT_BINARY_DIR}/${ARG_ORIGIN_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/${ARG_LIB_DIR}) + + set_target_properties(${ARG_TARGETS} PROPERTIES INSTALL_RPATH "${rbase};${rbase}/${lib_dir}") +endfunction() + ## # Main library exports ## @@ -30,10 +49,20 @@ install(TARGETS Halide Halide_Generator Halide_GenGen FILE_SET HEADERS COMPONENT Halide_Development) if (WITH_AUTOSCHEDULERS) - install(TARGETS Halide_Adams2019 Halide_Li2018 Halide_Mullapudi2016 Halide_Anderson2021 + set(autoschedulers Halide_Adams2019 Halide_Li2018 Halide_Mullapudi2016 Halide_Anderson2021) + + install(TARGETS ${autoschedulers} EXPORT Halide_Interfaces LIBRARY DESTINATION ${Halide_INSTALL_PLUGINDIR} COMPONENT Halide_Runtime NAMELINK_COMPONENT Halide_Development) + + if (NOT CMAKE_INSTALL_RPATH) + _Halide_compute_rpath( + TARGETS ${autoschedulers} + ORIGIN_DIR "${Halide_INSTALL_PLUGINDIR}" + LIB_DIR "${CMAKE_INSTALL_LIBDIR}" + ) + endif () endif () ## @@ -62,18 +91,10 @@ install(TARGETS Halide_Tools Halide_ImageIO Halide_RunGenMain Halide_ThreadPool # Install command-line utils ## -if (WITH_AUTOSCHEDULERS AND WITH_UTILS) - file(RELATIVE_PATH lib_dir - ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR} - ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}) +set(utils Halide_GenRT) - if (APPLE) - set(rbase @loader_path) - else () - set(rbase $ORIGIN) - endif () - - set(utils +if (WITH_AUTOSCHEDULERS AND WITH_UTILS) + list(APPEND utils adams2019_retrain_cost_model adams2019_weightsdir_to_weightsfile anderson2021_retrain_cost_model @@ -81,26 +102,33 @@ if (WITH_AUTOSCHEDULERS AND WITH_UTILS) featurization_to_sample get_host_target ) - if (NOT CMAKE_INSTALL_RPATH) - set_target_properties(${utils} PROPERTIES INSTALL_RPATH "${rbase};${rbase}/${lib_dir}") - endif () - install(TARGETS ${utils} EXPORT Halide_Interfaces COMPONENT Halide_Development) endif () +if (NOT CMAKE_INSTALL_RPATH) + _Halide_compute_rpath( + TARGETS ${utils} + ORIGIN_DIR "${CMAKE_INSTALL_BINDIR}" + LIB_DIR "${CMAKE_INSTALL_LIBDIR}" + ) +endif () + +install(TARGETS ${utils} EXPORT Halide_Interfaces COMPONENT Halide_Development) + ## # READMEs and other top-level documentation ## install(FILES - ${Halide_SOURCE_DIR}/README_cmake.md ${Halide_SOURCE_DIR}/README.md - ${Halide_SOURCE_DIR}/README_python.md - ${Halide_SOURCE_DIR}/README_rungen.md - ${Halide_SOURCE_DIR}/README_webassembly.md ${Halide_SOURCE_DIR}/LICENSE.txt COMPONENT Halide_Documentation TYPE DOC) +install(DIRECTORY "${Halide_SOURCE_DIR}/doc" + COMPONENT Halide_Documentation + TYPE DOC + FILES_MATCHING PATTERN "*.md") + ## # Tools ## @@ -163,14 +191,25 @@ write_basic_package_version_file(HalideHelpersConfigVersion.cmake COMPATIBILITY SameMajorVersion ARCH_INDEPENDENT) -# Compute a hint to make it easier to find HalideHelpers from find_package(Halide) -# This is read by configure_file below. -file(RELATIVE_PATH HalideHelpers_HINT - "${CMAKE_CURRENT_BINARY_DIR}/${Halide_INSTALL_CMAKEDIR}" - "${CMAKE_CURRENT_BINARY_DIR}/${Halide_INSTALL_HELPERSDIR}") +if (WITH_PYTHON_BINDINGS) + set(extra_paths Halide_Python_INSTALL_CMAKEDIR) +else () + set(extra_paths "") +endif () -configure_file(common/HalideConfig.cmake HalideConfig.cmake @ONLY) -configure_file(common/HalideHelpersConfig.cmake HalideHelpersConfig.cmake @ONLY) +configure_package_config_file( + common/HalideConfig.cmake HalideConfig.cmake + PATH_VARS Halide_INSTALL_HELPERSDIR ${extra_paths} + INSTALL_DESTINATION "${Halide_INSTALL_CMAKEDIR}" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) + +configure_package_config_file( + common/HalideHelpersConfig.cmake HalideHelpersConfig.cmake + INSTALL_DESTINATION "${Halide_INSTALL_HELPERSDIR}" + NO_SET_AND_CHECK_MACRO +) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/HalideConfig.cmake @@ -210,6 +249,14 @@ if (WITH_DOCS) COMPONENT Halide_Documentation) endif () +## +# Pip overrides +## + +if (SKBUILD) + add_subdirectory(pip) +endif () + ## # General packaging variables. ## diff --git a/packaging/common/HalideConfig.cmake b/packaging/common/HalideConfig.cmake index 664479f33a29..383371dd7251 100644 --- a/packaging/common/HalideConfig.cmake +++ b/packaging/common/HalideConfig.cmake @@ -1,4 +1,5 @@ cmake_minimum_required(VERSION 3.28) +@PACKAGE_INIT@ macro(Halide_fail message) set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${message}") @@ -19,7 +20,7 @@ macro(Halide_find_component_dependency comp dep) endif () endmacro() -set(Halide_known_components Halide PNG JPEG static shared) +set(Halide_known_components Halide Python PNG JPEG static shared) set(Halide_components Halide PNG JPEG) foreach (Halide_comp IN LISTS Halide_known_components) @@ -46,6 +47,7 @@ endif () # Inform downstreams of potential compatibility issues. For instance, exceptions # and RTTI must both be enabled to build Python bindings and ASAN builds should # not be mixed with non-ASAN builds. +set(WITH_AUTOSCHEDULERS "@WITH_AUTOSCHEDULERS@") set(Halide_ENABLE_EXCEPTIONS "@Halide_ENABLE_EXCEPTIONS@") set(Halide_ENABLE_RTTI "@Halide_ENABLE_RTTI@") set(Halide_ASAN_ENABLED "@Halide_ASAN_ENABLED@") @@ -58,7 +60,7 @@ include(CMakeFindDependencyMacro) find_dependency( HalideHelpers "@Halide_VERSION@" EXACT - HINTS "${CMAKE_CURRENT_LIST_DIR}/@HalideHelpers_HINT@" + HINTS "@PACKAGE_Halide_INSTALL_HELPERSDIR@" ) if (Halide_comp_PNG) @@ -110,11 +112,19 @@ else () endif () endif () +## Load Python component +if (Halide_comp_Python OR "@WITH_PYTHON_BINDINGS@") + Halide_find_component_dependency( + Python Halide_Python + HINTS "@PACKAGE_Halide_Python_INSTALL_CMAKEDIR@" + ) +endif () + ## Hide variables and helper macros that are not part of our API. # Delete internal component tracking foreach (comp IN LISTS Halide_known_components) - unset(Halide_comp_${comp}) + unset(Halide_comp_${comp}) endforeach () unset(Halide_components) diff --git a/packaging/common/HalideHelpersConfig.cmake b/packaging/common/HalideHelpersConfig.cmake index aa98ce7847e2..57462d747f68 100644 --- a/packaging/common/HalideHelpersConfig.cmake +++ b/packaging/common/HalideHelpersConfig.cmake @@ -1,4 +1,5 @@ cmake_minimum_required(VERSION 3.28) +@PACKAGE_INIT@ set(Halide_HOST_TARGET @Halide_HOST_TARGET@) @@ -6,3 +7,5 @@ include(${CMAKE_CURRENT_LIST_DIR}/Halide-Interfaces.cmake) include(${CMAKE_CURRENT_LIST_DIR}/HalideTargetHelpers.cmake) include(${CMAKE_CURRENT_LIST_DIR}/HalideGeneratorHelpers.cmake) include(${CMAKE_CURRENT_LIST_DIR}/TargetExportScript.cmake) + +check_required_components(${CMAKE_FIND_PACKAGE_NAME}) diff --git a/packaging/pip/CMakeLists.txt b/packaging/pip/CMakeLists.txt new file mode 100644 index 000000000000..6f12ca6054e2 --- /dev/null +++ b/packaging/pip/CMakeLists.txt @@ -0,0 +1,48 @@ +## +# Create a trampoline to the real *Config.cmake. +# +# The various wheel directories get grafted to one of an unpredictable set +# of paths determined by sysconfig. The trampoline finds platlib via sysconfig +# before jumping to the real *Config.cmake inside our pip package. + +function(configure_trampoline PACKAGE INSTALL_DIR) + configure_file(TrampolineConfig.cmake.in "${PACKAGE}Config.cmake" @ONLY) +endfunction() + +configure_trampoline(Halide "${Halide_INSTALL_CMAKEDIR}") +configure_trampoline(HalideHelpers "${Halide_INSTALL_HELPERSDIR}") + +install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/HalideConfig.cmake" + # It's better to duplicate the version file than to trampoline to it, as + # this would require calling find_package(Python) in the version file. + "${CMAKE_CURRENT_BINARY_DIR}/../HalideConfigVersion.cmake" + # It's okay to hard-code the destination because this code is only + # called by scikit-build-core. Installing to /data ultimately installs + # to the Python root installation directory, whose bin/ directory is + # located on the PATH. On Unix systems, this is directly compatible with + # find_package. Python on Windows unfortunately uses Scripts/ instead of + # bin/, which CMake does not understand. These users can add %VIRTUAL_ENV% + # to their CMAKE_PREFIX_PATH. + DESTINATION "${SKBUILD_DATA_DIR}/share/cmake/Halide" + COMPONENT Halide_Python +) + +# Same thing for HalideHelpers +install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/HalideHelpersConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/../HalideHelpersConfigVersion.cmake" + DESTINATION "${SKBUILD_DATA_DIR}/share/cmake/HalideHelpers" + COMPONENT Halide_Python +) + +## +# Set up RPATH for the Python bindings plugin. + +if (WITH_PYTHON_BINDINGS) + _Halide_compute_rpath( + TARGETS Halide_Python + ORIGIN_DIR "${Halide_INSTALL_PYTHONDIR}" + LIB_DIR "${CMAKE_INSTALL_LIBDIR}" + ) +endif () \ No newline at end of file diff --git a/packaging/pip/README.md b/packaging/pip/README.md new file mode 100644 index 000000000000..0c785e432959 --- /dev/null +++ b/packaging/pip/README.md @@ -0,0 +1,24 @@ +Halide is a programming language designed to make it easier to write +high-performance image and array processing code on modern machines. Halide +currently targets: + +- CPU architectures: X86, ARM, Hexagon, PowerPC, RISC-V +- Operating systems: Linux, Windows, macOS, Android, iOS, Qualcomm QuRT +- GPU Compute APIs: CUDA, OpenCL, Apple Metal, Microsoft Direct X 12, Vulkan + +Rather than being a standalone programming language, Halide is embedded in +Python. This means you write Python code that builds an in-memory representation +of a Halide pipeline using Halide's Python API. You can then compile this +representation to an object file, or JIT-compile it and run it in the same +process. + +## Using Halide from C++ +Halide is also available as a C++ library. This package provides the development +files necessary to use Halide from C++, including a CMake package. On Linux and +macOS, CMake's `find_package` command should find Halide as long as you're in +the same virtual environment you installed it in. On Windows, you will need to +add the virtual environment root directory to `CMAKE_PREFIX_PATH`. This can be +done by running `set CMAKE_PREFIX_PATH=%VIRTUAL_ENV%` in `cmd`. + +Other build systems can find the Halide root path by running `python -c +"import halide; print(halide.install_dir())"`. \ No newline at end of file diff --git a/packaging/pip/TrampolineConfig.cmake.in b/packaging/pip/TrampolineConfig.cmake.in new file mode 100644 index 000000000000..5b05801c2ba3 --- /dev/null +++ b/packaging/pip/TrampolineConfig.cmake.in @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.28) + +include(CMakeFindDependencyMacro) +find_dependency(Python 3) + +include("${Python_SITEARCH}/halide/@INSTALL_DIR@/@PACKAGE@Config.cmake") diff --git a/pyproject.toml b/pyproject.toml index b91c918d1d6e..b20a83567cfc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,141 @@ [build-system] requires = [ - "setuptools>=43", - "wheel", - "scikit-build", - "pybind11==2.10.4", - "cmake>=3.22", - "ninja; platform_system!='Windows'" -] -build-backend = "setuptools.build_meta" + "scikit-build-core==0.10.5", + "pybind11==2.10.4", +] +build-backend = "scikit_build_core.build" + +[project] +name = "halide" +authors = [{ name = "The Halide team", email = "halide-dev@lists.csail.mit.edu" }] +maintainers = [{ name = "Alex Reinking", email = "areinking@adobe.com" }] +description = "Halide is a programming language designed to make it easier to write high-performance image and array processing code." +license = { file = "LICENSE.txt" } +readme = "./packaging/pip/README.md" +requires-python = ">=3.8" +dependencies = [ + "numpy", + "imageio", +] +dynamic = ['version'] +keywords = [ + "array", + "compiler", + "domain-specific language", + "dsl", + "gpu", + "hexagon", + "image processing", + "machine learning", + "performance", + "programming language", +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: GPU", + "Environment :: GPU :: NVIDIA CUDA", + "Environment :: WebAssembly", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX", + "Programming Language :: C++", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Multimedia :: Graphics", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Image Processing", + "Topic :: Software Development :: Code Generators", + "Topic :: Software Development :: Compilers", + "Topic :: Software Development :: Libraries", +] + +[project.urls] +Homepage = "https://halide-lang.org" +Documentation = "https://github.com/halide/Halide/blob/main/doc/Python.md" +"Documentation (C++)" = "https://halide-lang.org/docs" +Issues = "https://github.com/halide/Halide/issues" +Repository = "https://github.com/halide/Halide.git" + +[tool.scikit-build] +cmake.version = ">=3.28" +wheel.install-dir = "halide" +sdist.include = ["dependencies/"] +sdist.exclude = [".github/", "apps/", "test/", "tutorial/"] +metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" + +[tool.scikit-build.cmake.define] +CMAKE_DISABLE_FIND_PACKAGE_JPEG = true +CMAKE_DISABLE_FIND_PACKAGE_PNG = true +Halide_ENABLE_EXCEPTIONS = true +Halide_ENABLE_RTTI = true +Halide_INSTALL_PYTHONDIR = "." +Halide_USE_FETCHCONTENT = false +Halide_WASM_BACKEND = "wabt" +WITH_PYTHON_BINDINGS = true +WITH_TESTS = false +WITH_TUTORIALS = false + +## +# Don't version libHalide.so/dylib -- wheels are zip files that do +# not understand symbolic links. Including version information here +# causes the final wheel to have three copies of our library. Not good. +Halide_VERSION_OVERRIDE = "" +Halide_SOVERSION_OVERRIDE = "" + +[[tool.scikit-build.overrides]] +if.platform-system = "^win32" +inherit.cmake.define = "append" +cmake.define.Halide_WASM_BACKEND = "OFF" + +[tool.tbump] +github_url = "https://github.com/halide/Halide/" + +[tool.tbump.version] +current = "19.0.0" +regex = '(?P\d+)\.(?P\d+)\.(?P\d+)' + +[tool.tbump.git] +message_template = "Bump version to {new_version}" +tag_template = "v{new_version}.dev0" + +[[tool.tbump.file]] +src = "CMakeLists.txt" +search = "VERSION {current_version}" + +[[tool.tbump.file]] +src = "python_bindings/CMakeLists.txt" +search = "VERSION {current_version}" + +[[tool.tbump.file]] +src = "vcpkg.json" + +[[tool.tbump.file]] +src = "src/runtime/HalideRuntime.h" +version_template = "{major}" +search = "#define HALIDE_VERSION_MAJOR {current_version}" + +[[tool.tbump.file]] +src = "src/runtime/HalideRuntime.h" +version_template = "{minor}" +search = "#define HALIDE_VERSION_MINOR {current_version}" + +[[tool.tbump.file]] +src = "src/runtime/HalideRuntime.h" +version_template = "{patch}" +search = "#define HALIDE_VERSION_PATCH {current_version}" + +# Must be last table in file since pip.yml appends to it +# See: https://github.com/pypa/setuptools-scm/issues/455 +[tool.setuptools_scm] diff --git a/python_bindings/CMakeLists.txt b/python_bindings/CMakeLists.txt index c97fe35128ea..2d3a7ac77da9 100644 --- a/python_bindings/CMakeLists.txt +++ b/python_bindings/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.28) -project(Halide_Python) +project(Halide_Python VERSION 19.0.0) if (PROJECT_IS_TOP_LEVEL) enable_testing() @@ -16,17 +16,21 @@ set(CMAKE_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard to use") option(CMAKE_CXX_STANDARD_REQUIRED "Prevent CMake C++ standard selection decay" ON) option(CMAKE_CXX_EXTENSIONS "Enable C++ vendor extensions (e.g. GNU)" OFF) +# Duplicated options from parent project +option(WITH_TESTS "Build tests" "${PROJECT_IS_TOP_LEVEL}") +option(WITH_TUTORIALS "Build tutorials" "${PROJECT_IS_TOP_LEVEL}") +option(WITH_PACKAGING "Include install() rules" "${PROJECT_IS_TOP_LEVEL}") + # Support not actually building the bindings, but using the ones we find # via `find_package(Halide)`. This allows running tests against the # installed Halide package. option(WITH_PYTHON_BINDINGS "Build Python bindings" ON) -# Duplicated options from parent project -option(WITH_TESTS "Build tests" ON) -option(WITH_TUTORIALS "Build tutorials" ON) -option(WITH_PYTHON_STUBS "Build Python stubs" ON) +cmake_dependent_option( + WITH_PYTHON_STUBS "Build Python stubs" ON + WITH_PYTHON_BINDINGS OFF +) -# Enable/disable testing cmake_dependent_option( WITH_TEST_PYTHON "Build Python tests" ON WITH_TESTS OFF @@ -40,7 +44,7 @@ cmake_dependent_option( # Development.Module and Development.Embed. We don't need the Embed # part, so only requesting Module avoids failures when Embed is not # available, as is the case in the manylinux Docker images. -find_package(Python3 3.8 REQUIRED Interpreter Development.Module) +find_package(Python 3.8 REQUIRED Interpreter Development.Module) if (WITH_PYTHON_BINDINGS) find_package(pybind11 2.10.4 REQUIRED) @@ -52,34 +56,8 @@ if (NOT Halide_ENABLE_RTTI OR NOT Halide_ENABLE_EXCEPTIONS) message(FATAL_ERROR "Python bindings require RTTI and exceptions to be enabled.") endif () -## -# A helper for creating tests with correct PYTHONPATH and sanitizer preloading -## - -function(add_python_test) - cmake_parse_arguments(ARG "" "FILE;LABEL" "PYTHONPATH;ENVIRONMENT;TEST_ARGS" ${ARGN}) - - list(PREPEND ARG_PYTHONPATH "$/..") - list(TRANSFORM ARG_PYTHONPATH PREPEND "PYTHONPATH=path_list_prepend:") - - list(PREPEND ARG_ENVIRONMENT "HL_TARGET=${Halide_TARGET};HL_JIT_TARGET=${Halide_TARGET}") - - cmake_path(GET ARG_FILE STEM test_name) - set(test_name "${ARG_LABEL}_${test_name}") - - add_test( - NAME "${test_name}" - COMMAND ${Halide_PYTHON_LAUNCHER} "$" "$" ${ARG_TEST_ARGS} - ) - set_tests_properties( - "${test_name}" - PROPERTIES - LABELS "python" - ENVIRONMENT "${ARG_ENVIRONMENT}" - ENVIRONMENT_MODIFICATION "${ARG_PYTHONPATH}" - ) -endfunction() - +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include(AddPythonTest) ## # Add our sources to this sub-tree. @@ -89,7 +67,7 @@ if (WITH_PYTHON_BINDINGS) add_subdirectory(src) endif () -if (WITH_PYTHON_BINDINGS AND WITH_PYTHON_STUBS) +if (WITH_PYTHON_STUBS) add_subdirectory(stub) endif () @@ -101,3 +79,7 @@ endif () if (WITH_TUTORIALS) add_subdirectory(tutorial) endif () + +if (WITH_PACKAGING) + add_subdirectory(packaging) +endif () diff --git a/python_bindings/apps/CMakeLists.txt b/python_bindings/apps/CMakeLists.txt index e63b38e8f31e..ebaf96b0de40 100644 --- a/python_bindings/apps/CMakeLists.txt +++ b/python_bindings/apps/CMakeLists.txt @@ -1,3 +1,12 @@ +cmake_minimum_required(VERSION 3.28) +project(Halide_Python_apps) + +if (PROJECT_IS_TOP_LEVEL) + enable_testing() +endif () + +find_package(Halide REQUIRED COMPONENTS Python) + if (Halide_TARGET MATCHES "wasm") message(WARNING "Python apps are skipped under WASM.") return() @@ -13,6 +22,9 @@ if (NOT WITH_AUTOSCHEDULERS) return() endif () +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") +include(AddPythonTest) + set(TEST_TMPDIR "$") set(TEST_IMAGES_DIR "$") @@ -53,7 +65,7 @@ foreach (app IN LISTS APPS) MODULE_NAME ${G} HALIDE_LIBRARIES app_aot_${G}) list(APPEND DEPS app_ext_${G}) - endforeach() + endforeach () set(app_src "${app}_app.py") add_python_test( diff --git a/python_bindings/cmake/AddPythonTest.cmake b/python_bindings/cmake/AddPythonTest.cmake new file mode 100644 index 000000000000..4cf12fc4316e --- /dev/null +++ b/python_bindings/cmake/AddPythonTest.cmake @@ -0,0 +1,27 @@ +## +# A helper for creating tests with correct PYTHONPATH and sanitizer preloading +## + +function(add_python_test) + cmake_parse_arguments(ARG "" "FILE;LABEL" "PYTHONPATH;ENVIRONMENT;TEST_ARGS" ${ARGN}) + + list(PREPEND ARG_PYTHONPATH "$/..>") + list(TRANSFORM ARG_PYTHONPATH PREPEND "PYTHONPATH=path_list_prepend:") + + list(PREPEND ARG_ENVIRONMENT "HL_TARGET=${Halide_TARGET};HL_JIT_TARGET=${Halide_TARGET}") + + cmake_path(GET ARG_FILE STEM test_name) + set(test_name "${ARG_LABEL}_${test_name}") + + add_test( + NAME "${test_name}" + COMMAND ${Halide_PYTHON_LAUNCHER} "$" "$" ${ARG_TEST_ARGS} + ) + set_tests_properties( + "${test_name}" + PROPERTIES + LABELS "python" + ENVIRONMENT "${ARG_ENVIRONMENT}" + ENVIRONMENT_MODIFICATION "${ARG_PYTHONPATH}" + ) +endfunction() diff --git a/python_bindings/packaging/CMakeLists.txt b/python_bindings/packaging/CMakeLists.txt new file mode 100644 index 000000000000..a6816af45d7c --- /dev/null +++ b/python_bindings/packaging/CMakeLists.txt @@ -0,0 +1,81 @@ +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +set(Halide_INSTALL_PYTHONDIR "${CMAKE_INSTALL_LIBDIR}/python3/site-packages/halide" + CACHE STRING "Path to the Python site-packages folder") + +if (WITH_PYTHON_BINDINGS) + install(DIRECTORY "${Halide_Python_SOURCE_DIR}/src/halide/" + DESTINATION "${Halide_INSTALL_PYTHONDIR}" + COMPONENT Halide_Python + FILES_MATCHING + PATTERN "*.py" + PATTERN "halide_" EXCLUDE) + + install(TARGETS Halide_Python + EXPORT Halide_Python-targets + LIBRARY DESTINATION "${Halide_INSTALL_PYTHONDIR}" + COMPONENT Halide_Python) + + get_property(halide_is_imported TARGET Halide::Halide PROPERTY IMPORTED) + get_property(halide_type TARGET Halide::Halide PROPERTY TYPE) + + if ( + NOT CMAKE_INSTALL_RPATH # Honor user overrides + AND NOT halide_is_imported # Imported Halide means user is responsible for RPATH + AND halide_type STREQUAL "SHARED_LIBRARY" # No need to set RPATH if statically linked + ) + if (APPLE) + set(rbase @loader_path) + else () + set(rbase $ORIGIN) + endif () + + file(RELATIVE_PATH lib_dir + "${CMAKE_CURRENT_BINARY_DIR}/${Halide_INSTALL_PYTHONDIR}" + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}") + + set_target_properties(Halide_Python PROPERTIES INSTALL_RPATH "${rbase}/${lib_dir}") + endif () +endif () + +if (WITH_PYTHON_STUBS) + install(TARGETS Halide_PyStubs + EXPORT Halide_Python-targets + COMPONENT Halide_Python) +endif () + +if (WITH_TUTORIALS) + install(DIRECTORY ${Halide_Python_SOURCE_DIR}/tutorial/ + DESTINATION ${CMAKE_INSTALL_DOCDIR}/tutorial-python + COMPONENT Halide_Documentation + FILES_MATCHING PATTERN "*.py") +endif () + +if (WITH_PYTHON_BINDINGS OR WITH_PYTHON_STUBS) + set(Halide_Python_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/Halide_Python" + CACHE STRING "Path to Halide_Python CMake files") + + install(EXPORT Halide_Python-targets + DESTINATION ${Halide_Python_INSTALL_CMAKEDIR} + NAMESPACE Halide:: + COMPONENT Halide_Python) + + configure_package_config_file( + Halide_PythonConfig.cmake.in Halide_PythonConfig.cmake + INSTALL_DESTINATION "${Halide_Python_INSTALL_CMAKEDIR}" + NO_SET_AND_CHECK_MACRO + ) + + write_basic_package_version_file( + Halide_PythonConfigVersion.cmake COMPATIBILITY SameMajorVersion + ) + + install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/Halide_PythonConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/Halide_PythonConfigVersion.cmake" + DESTINATION "${Halide_Python_INSTALL_CMAKEDIR}" + COMPONENT Halide_Python + ) +endif () diff --git a/python_bindings/packaging/Halide_PythonConfig.cmake.in b/python_bindings/packaging/Halide_PythonConfig.cmake.in new file mode 100644 index 000000000000..12a31690805c --- /dev/null +++ b/python_bindings/packaging/Halide_PythonConfig.cmake.in @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.28) +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(Python 3 COMPONENTS Interpreter Development.Module) + +include("${CMAKE_CURRENT_LIST_DIR}/Halide_Python-targets.cmake") + +check_required_components(${CMAKE_FIND_PACKAGE_NAME}) diff --git a/python_bindings/src/halide/CMakeLists.txt b/python_bindings/src/halide/CMakeLists.txt index 9d2523de9169..d5c48e6a3f68 100644 --- a/python_bindings/src/halide/CMakeLists.txt +++ b/python_bindings/src/halide/CMakeLists.txt @@ -1,64 +1,63 @@ -set(native_sources - PyArgument.cpp - PyBoundaryConditions.cpp - PyBuffer.cpp - PyCallable.cpp - PyConciseCasts.cpp - PyDerivative.cpp - PyEnums.cpp - PyError.cpp - PyExpr.cpp - PyExternFuncArgument.cpp - PyFunc.cpp - PyFuncRef.cpp - PyGenerator.cpp - PyHalide.cpp - PyImageParam.cpp - PyInlineReductions.cpp - PyIROperator.cpp - PyLambda.cpp - PyLoopLevel.cpp - PyModule.cpp - PyParam.cpp - PyParameter.cpp - PyPipeline.cpp - PyRDom.cpp - PyStage.cpp - PyTarget.cpp - PyTuple.cpp - PyType.cpp - PyVar.cpp - PyVarOrRVar.cpp - ) -list(TRANSFORM native_sources PREPEND "halide_/") +pybind11_add_module(Halide_Python) +add_library(Halide::Python ALIAS Halide_Python) -set(python_sources - __init__.py - _generator_helpers.py - imageio.py - ) +set_target_properties( + Halide_Python + PROPERTIES + OUTPUT_NAME halide_ + EXPORT_NAME Python +) + +target_sources( + Halide_Python + PRIVATE + halide_/PyArgument.cpp + halide_/PyBoundaryConditions.cpp + halide_/PyBuffer.cpp + halide_/PyCallable.cpp + halide_/PyConciseCasts.cpp + halide_/PyDerivative.cpp + halide_/PyEnums.cpp + halide_/PyError.cpp + halide_/PyExpr.cpp + halide_/PyExternFuncArgument.cpp + halide_/PyFunc.cpp + halide_/PyFuncRef.cpp + halide_/PyGenerator.cpp + halide_/PyHalide.cpp + halide_/PyImageParam.cpp + halide_/PyInlineReductions.cpp + halide_/PyIROperator.cpp + halide_/PyLambda.cpp + halide_/PyLoopLevel.cpp + halide_/PyModule.cpp + halide_/PyParam.cpp + halide_/PyParameter.cpp + halide_/PyPipeline.cpp + halide_/PyRDom.cpp + halide_/PyStage.cpp + halide_/PyTarget.cpp + halide_/PyTuple.cpp + halide_/PyType.cpp + halide_/PyVar.cpp + halide_/PyVarOrRVar.cpp +) # It is technically still possible for a user to override the LIBRARY_OUTPUT_DIRECTORY by setting # CMAKE_LIBRARY_OUTPUT_DIRECTORY_, but they do so at their own peril. If a user needs to # do this, they should use the CMAKE_PROJECT_Halide_Python_INCLUDE_BEFORE variable to override it # just for this project, rather than globally, and they should ensure that the last path component # is `halide`. Otherwise, the tests will break. -pybind11_add_module(Halide_Python MODULE ${native_sources}) -add_library(Halide::Python ALIAS Halide_Python) set_target_properties( - Halide_Python - PROPERTIES - LIBRARY_OUTPUT_NAME halide_ - LIBRARY_OUTPUT_DIRECTORY "$/halide" - EXPORT_NAME Python + Halide_Python PROPERTIES LIBRARY_OUTPUT_DIRECTORY "$/halide" ) + if (Halide_ASAN_ENABLED) set_target_properties( - Halide_Python - PROPERTIES - CMAKE_SHARED_LINKER_FLAGS -shared-libasan + Halide_Python PROPERTIES CMAKE_SHARED_LINKER_FLAGS -shared-libasan ) endif () + target_link_libraries(Halide_Python PRIVATE Halide::Halide) # TODO: There's precious little information about why Python only sometimes prevents DLLs from loading from the PATH @@ -74,97 +73,23 @@ add_custom_command( ) # Copy our Python source files over so that we have a valid package in the binary directory. -# TODO: When upgrading to CMake 3.23 or beyond, investigate the FILE_SET feature. -set(build_tree_pys "") -foreach (pysrc IN LISTS python_sources) - # TODO: CMake 3.22 still doesn't allow target-dependent genex in OUTPUT, but we can hack around this using a stamp - # file. Fix this hack up if and when they ever improve this feature. - set(stamp_file "${CMAKE_CURRENT_BINARY_DIR}/.${pysrc}.stamp") - add_custom_command( - OUTPUT "${stamp_file}" - COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${pysrc}" "$/${pysrc}" - COMMAND ${CMAKE_COMMAND} -E touch "${stamp_file}" - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${pysrc}" - VERBATIM - ) - list(APPEND build_tree_pys "${stamp_file}") -endforeach () -add_custom_target(Halide_Python_sources ALL DEPENDS ${build_tree_pys}) -add_dependencies(Halide_Python Halide_Python_sources) - -## -# Packaging -## - -include(CMakeDependentOption) -include(GNUInstallDirs) - -set(Halide_INSTALL_PYTHONDIR "${CMAKE_INSTALL_LIBDIR}/python3/site-packages" - CACHE STRING "Path to the Python site-packages folder") - -install(DIRECTORY "$/" - DESTINATION "${Halide_INSTALL_PYTHONDIR}/halide" - COMPONENT Halide_Python - FILES_MATCHING - PATTERN "*.py" - PATTERN "*/halide_" EXCLUDE - PATTERN "*/CMakeFiles" EXCLUDE - PATTERN "*/__pycache__" EXCLUDE) - -install(TARGETS Halide_Python - EXPORT Halide_Targets - LIBRARY DESTINATION "${Halide_INSTALL_PYTHONDIR}/halide" - COMPONENT Halide_Python) - -get_property(halide_is_imported TARGET Halide::Halide PROPERTY IMPORTED) -get_property(halide_type TARGET Halide::Halide PROPERTY TYPE) -cmake_dependent_option( - Halide_Python_INSTALL_IMPORTED_DEPS "" OFF - "halide_is_imported;halide_type STREQUAL \"SHARED_LIBRARY\"" OFF -) - -if (Halide_Python_INSTALL_IMPORTED_DEPS) - # The following might be a bit confusing, but installing both libHalide - # and its SONAME symbolic link causes the following bad behavior: - # 1. CMake does the right thing and installs libHalide.so.X.Y.Z - # (TARGET_FILE) as a real file and libHalide.so.X - # (TARGET_SONAME_FILE_NAME) as a symbolic link to the former. - # 2. Setuptools dutifully packs both of these into a Python wheel, which - # is a structured zip file. Zip files do not support symbolic links. - # Thus, two independent copies of libHalide are inserted, bloating the - # package. - # The Python module (on Unix systems) links to the SONAME file, and - # installing the symbolic link directly results in a broken link. Hence, - # the renaming dance here. - - if (NOT MSVC) - set(rename_arg RENAME "$") - else () - # DLL systems do not have sonames. - set(rename_arg "") - endif () +set(python_sources + __init__.py + _generator_helpers.py + imageio.py) - # TODO: when we upgrade to CMake 3.22, replace with RUNTIME_DEPENDENCY_SET? - install(FILES "$" - DESTINATION "${Halide_INSTALL_PYTHONDIR}/halide" - COMPONENT Halide_Python - ${rename_arg}) -endif () +list(TRANSFORM python_sources PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/" + OUTPUT_VARIABLE python_sources_source_dir) -if ( - NOT CMAKE_INSTALL_RPATH # Honor user overrides - AND NOT halide_is_imported # Imported Halide means user is responsible for RPATH - AND halide_type STREQUAL "SHARED_LIBRARY" # No need to set RPATH if statically linked +set(stamp_file "$/Halide_Python_sources.stamp") +add_custom_command( + OUTPUT "${stamp_file}" + COMMAND "${CMAKE_COMMAND}" -E make_directory $ + COMMAND "${CMAKE_COMMAND}" -E copy -t $ ${python_sources_source_dir} + COMMAND "${CMAKE_COMMAND}" -E touch ${stamp_file} + DEPENDS ${python_sources_source_dir} + VERBATIM ) - if (APPLE) - set(rbase @loader_path) - else () - set(rbase $ORIGIN) - endif () - - file(RELATIVE_PATH lib_dir - "${CMAKE_CURRENT_BINARY_DIR}/${Halide_INSTALL_PYTHONDIR}/halide" - "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}") - set_target_properties(Halide_Python PROPERTIES INSTALL_RPATH "${rbase}/${lib_dir}") -endif () +add_custom_target(Halide_Python_sources DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${stamp_file}") +add_dependencies(Halide_Python Halide_Python_sources) diff --git a/python_bindings/src/halide/__init__.py b/python_bindings/src/halide/__init__.py index b2abca11e090..d75844d4460e 100644 --- a/python_bindings/src/halide/__init__.py +++ b/python_bindings/src/halide/__init__.py @@ -1,5 +1,25 @@ +def patch_dll_dirs(): + import os + if hasattr(os, 'add_dll_directory'): + from pathlib import Path + bin_dir = Path(__file__).parent / 'bin' + if bin_dir.exists(): + os.add_dll_directory(str(bin_dir)) + + +patch_dll_dirs() +del patch_dll_dirs + from .halide_ import * +# noinspection PyUnresolvedReferences from .halide_ import _, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9 + + +def install_dir(): + import os + return os.path.dirname(__file__) + + from ._generator_helpers import ( _create_python_generator, _generatorcontext_enter, diff --git a/python_bindings/stub/CMakeLists.txt b/python_bindings/stub/CMakeLists.txt index cc5354470466..85edb47b47cd 100644 --- a/python_bindings/stub/CMakeLists.txt +++ b/python_bindings/stub/CMakeLists.txt @@ -13,7 +13,3 @@ set_target_properties(Halide_PyStubs PROPERTIES CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN TRUE POSITION_INDEPENDENT_CODE ON) - -install(TARGETS Halide_PyStubs - EXPORT Halide_Targets - COMPONENT Halide_Python) diff --git a/python_bindings/tutorial/CMakeLists.txt b/python_bindings/tutorial/CMakeLists.txt index 9b17c7516f77..285ed1ae3d08 100644 --- a/python_bindings/tutorial/CMakeLists.txt +++ b/python_bindings/tutorial/CMakeLists.txt @@ -19,7 +19,7 @@ set(tests lesson_12_using_the_gpu.py lesson_13_tuples.py lesson_14_types.py - ) +) set(PYPATH_lesson_10_aot_compilation_run "$") @@ -64,9 +64,9 @@ else () # This target allows CMake to build lesson_10_halide.so (or whatever the correct extension is) as part of the tests # later. It is excluded from ALL since it isn't valid to build outside of this context. - Python3_add_library(lesson_10_halide MODULE EXCLUDE_FROM_ALL - lesson_10_halide.py.cpp - lesson_10_halide.o) + Python_add_library(lesson_10_halide MODULE EXCLUDE_FROM_ALL + lesson_10_halide.py.cpp + lesson_10_halide.o) target_link_libraries(lesson_10_halide PRIVATE Halide::Runtime) @@ -91,14 +91,3 @@ else () set_tests_properties(python_tutorial_lesson_10_aot_compilation_run PROPERTIES FIXTURES_REQUIRED py_lesson_10) endif () - -## -# Packaging -## - -include(GNUInstallDirs) - -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ - DESTINATION ${CMAKE_INSTALL_DOCDIR}/tutorial-python - COMPONENT Halide_Documentation - FILES_MATCHING PATTERN "*.py") diff --git a/requirements.txt b/requirements.txt index 16b9d8ff48b1..129c450c2715 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ -cmake>=3.22 +build +cmake>=3.28 imageio -ninja; platform_system!='Windows' +ninja numpy pillow pybind11==2.10.4 -scikit-build +scikit-build-core==0.10.5 scipy setuptools>=43 +tbump==6.11.0 wheel -build diff --git a/run-clang-format.sh b/run-clang-format.sh index 9b5712c5e56a..33a0ac6d7152 100755 --- a/run-clang-format.sh +++ b/run-clang-format.sh @@ -11,13 +11,23 @@ ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # # sudo apt-get install llvm-17 clang-17 libclang-17-dev clang-tidy-17 # export CLANG_FORMAT_LLVM_INSTALL_DIR=/usr/lib/llvm-17 +# +# On macOS: +# +# brew install llvm@17 +# export CLANG_FORMAT_LLVM_INSTALL_DIR=/opt/homebrew/opt/llvm@17 + +if [ -z "$CLANG_FORMAT_LLVM_INSTALL_DIR" ]; then + echo "CLANG_FORMAT_LLVM_INSTALL_DIR must point to an LLVM installation dir for this script." + exit 1 +fi + +echo "CLANG_FORMAT_LLVM_INSTALL_DIR = ${CLANG_FORMAT_LLVM_INSTALL_DIR}" -[ -z "$CLANG_FORMAT_LLVM_INSTALL_DIR" ] && echo "CLANG_FORMAT_LLVM_INSTALL_DIR must point to an LLVM installation dir for this script." && exit -echo CLANG_FORMAT_LLVM_INSTALL_DIR = ${CLANG_FORMAT_LLVM_INSTALL_DIR} +CLANG_FORMAT="${CLANG_FORMAT_LLVM_INSTALL_DIR}/bin/clang-format" -VERSION=$(${CLANG_FORMAT_LLVM_INSTALL_DIR}/bin/clang-format --version) -if [[ ${VERSION} =~ .*version\ 17.* ]] -then +VERSION=$("${CLANG_FORMAT}" --version) +if [[ ${VERSION} =~ .*version\ 17.* ]]; then echo "clang-format version 17 found." else echo "CLANG_FORMAT_LLVM_INSTALL_DIR must point to an LLVM 17 install!" @@ -33,5 +43,6 @@ find "${ROOT_DIR}/apps" \ "${ROOT_DIR}/util" \ "${ROOT_DIR}/python_bindings" \ -not -path "${ROOT_DIR}/src/runtime/hexagon_remote/bin/src/*" \ - \( -name "*.cpp" -o -name "*.h" -o -name "*.c" \) -and -not -wholename "*/.*" | \ - xargs ${CLANG_FORMAT_LLVM_INSTALL_DIR}/bin/clang-format -i -style=file \ No newline at end of file + \( -name "*.cpp" -o -name "*.h" -o -name "*.c" \) -and -not -wholename "*/.*" \ + -print0 | \ + xargs -0 "${CLANG_FORMAT}" -i -style=file \ No newline at end of file diff --git a/run-clang-tidy.sh b/run-clang-tidy.sh index baf896d3f78e..571b41a65503 100755 --- a/run-clang-tidy.sh +++ b/run-clang-tidy.sh @@ -6,7 +6,18 @@ ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" usage() { echo "Usage: $0 [-j MAX_PROCESS_COUNT] [-f]" 1>&2; exit 1; } -J=$(nproc) +get_thread_count () { + ([ -x "$(command -v nproc)" ] && nproc) || + ([ -x "$(command -v sysctl)" ] && sysctl -n hw.physicalcpu) +} + +if [ "$(uname)" == "Darwin" ]; then + patch_file () { sed -i '' -E "$@"; } +else + patch_file () { sed -i -E "$@"; } +fi + +J=$(get_thread_count) FIX= while getopts ":j:f" o; do @@ -30,18 +41,27 @@ if [ -n "${FIX}" ]; then echo "Operating in -fix mode!" fi -# We are currently standardized on using LLVM/Clang17 for this script. +# We are currently standardized on using LLVM/Clang 17 for this script. # Note that this is totally independent of the version of LLVM that you # are using to build Halide itself. If you don't have LLVM17 installed, # you can usually install what you need easily via: # # sudo apt-get install llvm-17 clang-17 libclang-17-dev clang-tidy-17 # export CLANG_TIDY_LLVM_INSTALL_DIR=/usr/lib/llvm-17 +# +# On macOS: +# +# brew install llvm@17 +# export CLANG_TIDY_LLVM_INSTALL_DIR=/opt/homebrew/opt/llvm@17 + +if [ -z "$CLANG_TIDY_LLVM_INSTALL_DIR" ]; then + echo "CLANG_TIDY_LLVM_INSTALL_DIR must point to an LLVM installation dir for this script." + exit +fi -[ -z "$CLANG_TIDY_LLVM_INSTALL_DIR" ] && echo "CLANG_TIDY_LLVM_INSTALL_DIR must point to an LLVM installation dir for this script." && exit -echo CLANG_TIDY_LLVM_INSTALL_DIR = ${CLANG_TIDY_LLVM_INSTALL_DIR} +echo "CLANG_TIDY_LLVM_INSTALL_DIR = ${CLANG_TIDY_LLVM_INSTALL_DIR}" -VERSION=$(${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang-tidy --version) +VERSION=$("${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang-tidy" --version) if [[ ${VERSION} =~ .*version\ 17.* ]] then echo "clang-tidy version 17 found." @@ -52,28 +72,32 @@ fi # Use a temp folder for the CMake stuff here, so it's fresh & correct every time -CLANG_TIDY_BUILD_DIR=`mktemp -d` -echo CLANG_TIDY_BUILD_DIR = ${CLANG_TIDY_BUILD_DIR} +CLANG_TIDY_BUILD_DIR=$(mktemp -d) +echo "CLANG_TIDY_BUILD_DIR = ${CLANG_TIDY_BUILD_DIR}" # Specify Halide_LLVM_SHARED_LIBS=ON because some installers may provide only that. echo Building compile_commands.json... -cmake -DCMAKE_BUILD_TYPE=Debug \ +cmake -G Ninja -S "${ROOT_DIR}" -B "${CLANG_TIDY_BUILD_DIR}" -Wno-dev \ + -DCMAKE_C_COMPILER="${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang" \ + -DCMAKE_CXX_COMPILER="${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang++" \ + -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DHalide_CLANG_TIDY_BUILD=ON \ - -DHalide_LLVM_SHARED_LIBS=ON \ - -DLLVM_DIR=${CLANG_TIDY_LLVM_INSTALL_DIR}/lib/cmake/llvm \ - -S ${ROOT_DIR} \ - -B ${CLANG_TIDY_BUILD_DIR} \ + -DHalide_LLVM_ROOT="${CLANG_TIDY_LLVM_INSTALL_DIR}" \ > /dev/null -[ -a ${CLANG_TIDY_BUILD_DIR}/compile_commands.json ] +[ -a "${CLANG_TIDY_BUILD_DIR}/compile_commands.json" ] + +# We need to remove -arch flags where -target flags also exist. These break our fake runtime compilation steps on macOS +echo Patching compile_commands.json... +patch_file '/-target/ s/-arch *[^ ]+//' "${CLANG_TIDY_BUILD_DIR}/compile_commands.json" # We must populate the includes directory to check things outside of src/ echo Building HalideIncludes... -cmake --build ${CLANG_TIDY_BUILD_DIR} -j $(nproc) --target HalideIncludes +cmake --build "${CLANG_TIDY_BUILD_DIR}" -j "${J}" --target HalideIncludes echo Building flatbuffer stuff... -cmake --build ${CLANG_TIDY_BUILD_DIR} -j $(nproc) --target generate_fb_header +cmake --build "${CLANG_TIDY_BUILD_DIR}" -j "${J}" --target generate_fb_header RUN_CLANG_TIDY=${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/run-clang-tidy @@ -101,19 +125,19 @@ CLANG_TIDY_HEADER_FILTER=".*/src/.*|.*/python_bindings/.*|.*/tools/.*|.*/util/.* echo Running clang-tidy... ${RUN_CLANG_TIDY} \ ${FIX} \ - -j ${J} \ + -j "${J}" \ -header-filter="${CLANG_TIDY_HEADER_FILTER}" \ -quiet \ - -p ${CLANG_TIDY_BUILD_DIR} \ - -clang-tidy-binary ${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang-tidy \ - -clang-apply-replacements-binary ${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang-apply-replacements \ + -p "${CLANG_TIDY_BUILD_DIR}" \ + -clang-tidy-binary "${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang-tidy" \ + -clang-apply-replacements-binary "${CLANG_TIDY_LLVM_INSTALL_DIR}/bin/clang-apply-replacements" \ ${CLANG_TIDY_TARGETS} \ 2>&1 | grep -v "warnings generated" | sed "s|.*/||" RESULT=${PIPESTATUS[0]} -echo run-clang-tidy finished with status ${RESULT} +echo "run-clang-tidy finished with status ${RESULT}" -rm -rf ${CLANG_TIDY_BUILD_DIR} +rm -rf "${CLANG_TIDY_BUILD_DIR}" -exit $RESULT \ No newline at end of file +exit "${RESULT}" \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index bea2e7f1dd92..000000000000 --- a/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -import pybind11 -from setuptools import find_packages -from skbuild import setup -from pathlib import Path - -this_directory = Path(__file__).parent -long_description = (this_directory / "README_python.md").read_text() - -setup( - name="halide", - version='19.0.0', - author="The Halide team", - author_email="halide-dev@lists.csail.mit.edu", - description="Halide is a programming language designed to make it easier " - "to write high-performance image and array processing code.", - long_description=long_description, - long_description_content_type='text/markdown', - python_requires=">=3.8", - packages=find_packages(where="python_bindings/src"), - package_dir={"": "python_bindings/src"}, - cmake_source_dir="python_bindings", - cmake_args=[ - f"-Dpybind11_ROOT={pybind11.get_cmake_dir()}", - "-DCMAKE_REQUIRE_FIND_PACKAGE_pybind11=YES", - "-DHalide_INSTALL_PYTHONDIR=python_bindings/src", - "-DCMAKE_INSTALL_RPATH=$,@loader_path,$ORIGIN>", - "-DHalide_Python_INSTALL_IMPORTED_DEPS=ON", - "-DWITH_TESTS=NO", - "-DWITH_TUTORIALS=NO", - "-DWITH_PYTHON_STUBS=NO", - "-DCMAKE_PREFIX_PATH=$ENV{CMAKE_PREFIX_PATH}", - "--no-warn-unused-cli", - ], -) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6f0df8f90a65..f5eb3e64f97f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,13 +14,23 @@ if (NOT BUILD_SHARED_LIBS) endif () # Set the (shared) library version +set(Halide_VERSION_OVERRIDE "${Halide_VERSION}" + CACHE STRING "VERSION to set for custom Halide packaging") +mark_as_advanced(Halide_VERSION_OVERRIDE) + +if (Halide_VERSION_OVERRIDE) + # Empty is considered a value distinct from not-defined + set_target_properties(Halide PROPERTIES VERSION "${Halide_VERSION_OVERRIDE}") +endif () + set(Halide_SOVERSION_OVERRIDE "${Halide_VERSION_MAJOR}" CACHE STRING "SOVERSION to set for custom Halide packaging") mark_as_advanced(Halide_SOVERSION_OVERRIDE) -set_target_properties(Halide PROPERTIES - VERSION "${Halide_VERSION}" - SOVERSION "${Halide_SOVERSION_OVERRIDE}") +if (Halide_SOVERSION_OVERRIDE) + # Empty is considered a value distinct from not-defined + set_target_properties(Halide PROPERTIES SOVERSION "${Halide_SOVERSION_OVERRIDE}") +endif () # Always build with PIC, even when static set_target_properties(Halide PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/src/CodeGen_D3D12Compute_Dev.cpp b/src/CodeGen_D3D12Compute_Dev.cpp index 15ca77ab56a0..462150aa30be 100644 --- a/src/CodeGen_D3D12Compute_Dev.cpp +++ b/src/CodeGen_D3D12Compute_Dev.cpp @@ -87,6 +87,7 @@ class CodeGen_D3D12Compute_Dev : public CodeGen_GPU_Dev { std::string print_reinterpret_cast(Type type, const std::string &value_expr); std::string print_assignment(Type t, const std::string &rhs) override; + std::string print_parameters(std::vector const& args, bool as_global); using CodeGen_GPU_C::visit; void visit(const Evaluate *op) override; @@ -130,24 +131,61 @@ string CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::print_type_maybe_storag // dispatch, there is no need to complicate things with packoffset. } + // TODO(soufianekhiat): + // Add support for HLSL 6.6: + // Type: + // uint8_t4_packed // 4 packed uint8_t values in a uint32_t + // int8_t4_packed // 4 packed int8_t values in a uint32_t + // Intrinsics: + // int16_t4 unpack_s8s16(int8_t4_packed packedVal); // Sign Extended + // uint16_t4 unpack_u8u16(uint8_t4_packed packedVal); // Non-Sign Extended + // + // int32_t4 unpack_s8s32(int8_t4_packed packedVal); // Sign Extended + // uint32_t4 unpack_u8u32(uint8_t4_packed packedVal); // Non-Sign Extended + // And: + // uint8_t4_packed pack_u8(uint32_t4 unpackedVal); // Pack lower 8 bits, drop unused bits + // int8_t4_packed pack_s8(int32_t4 unpackedVal); // Pack lower 8 bits, drop unused bits + // + // uint8_t4_packed pack_u8(uint16_t4 unpackedVal); // Pack lower 8 bits, drop unused bits + // int8_t4_packed pack_s8(int16_t4 unpackedVal); // Pack lower 8 bits, drop unused bits + // + // uint8_t4_packed pack_clamp_u8(int32_t4 unpackedVal); // Pack and Clamp [0, 255] + // int8_t4_packed pack_clamp_s8(int32_t4 unpackedVal); // Pack and Clamp [-128, 127] + // + // uint8_t4_packed pack_clamp_u8(int16_t4 unpackedVal); // Pack and Clamp [0, 255] + // int8_t4_packed pack_clamp_s8(int16_t4 unpackedVal); // Pack and Clamp [-128, 127] + if (type.is_float()) { switch (type.bits()) { case 16: - // 16-bit floating point value. This data type is provided only for language compatibility. - // Direct3D 10 shader targets map all half data types to float data types. - // A half data type cannot be used on a uniform global variable (use the /Gec flag if this functionality is desired). - oss << "half"; + if (target.get_d3d12_capability_lower_bound() < 62) { + // 16-bit floating point value. This data type is provided only for language compatibility. + // Direct3D 10 shader targets map all half data types to float data types. + // A half data type cannot be used on a uniform global variable (use the /Gec flag if this functionality is desired). + oss << "half"; + } else { + // NOTE(soufianekhiat): need options "-enable-16bit-types -T c_s_6_2" for compilation + oss << "float16_t"; + } break; case 32: - oss << "float"; + if (target.get_d3d12_capability_lower_bound() < 60) { + oss << "float"; + } else { + oss << "float32_t"; + } break; case 64: - // "64-bit floating point value. You cannot use double precision values as inputs and outputs for a stream. - // To pass double precision values between shaders, declare each double as a pair of uint data types. - // Then, use the asdouble function to pack each double into the pair of uints and the asuint function to - // unpack the pair of uints back into the double." - user_error << "HLSL (SM 5.1) does not have transparent support for 'double' types.\n"; - oss << "double"; + if (target.get_d3d12_capability_lower_bound() == 51) { + // "64-bit floating point value. You cannot use double precision values as inputs and outputs for a stream. + // To pass double precision values between shaders, declare each double as a pair of uint data types. + // Then, use the asdouble function to pack each double into the pair of uints and the asuint function to + // unpack the pair of uints back into the double." + user_error << "HLSL (SM 5.1) does not have transparent support for 'double' types.\n"; + oss << "double"; + } else { + oss << "float64_t"; + } break; default: user_error << "Can't represent a float with this many bits in HLSL (SM 5.1): " << type << "\n"; @@ -158,18 +196,39 @@ string CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::print_type_maybe_storag oss << "bool"; break; case 8: + if (type.is_uint()) { + oss << "u"; + } + oss << "int"; + break; case 16: + if (type.is_uint()) { + oss << "u"; + } + oss << "short"; + break; case 32: if (type.is_uint()) { oss << "u"; } oss << "int"; + if (target.get_d3d12_capability_lower_bound() >= 60) { + oss << "32_t"; + } else { #if DEBUG_TYPES oss << type.bits(); #endif + } break; case 64: - user_error << "HLSL (SM 5.1) does not support 64-bit integers.\n"; + if (target.get_d3d12_capability_lower_bound() == 51) { + user_error << "HLSL (SM 5.1) does not support 64-bit integers.\n"; + } else { + if (type.is_uint()) { + oss << "u"; + } + oss << "int64_t"; + } break; default: user_error << "Can't represent an integer with this many bits in HLSL (SM 5.1): " << type << "\n"; @@ -194,7 +253,7 @@ string CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::print_type_maybe_storag // TODO(marcos): are there 8-wide and 16-wide types in HLSL? // (CodeGen_GLSLBase seems to happily generate invalid vector types) default: - user_error << "Unsupported vector width in HLSL (SM 5.1): " << type << "\n"; + user_error << "Unsupported vector width in HLSL (SM 5.1 or 6.x): " << type << "\n"; } if (space == AppendSpace) { @@ -407,6 +466,8 @@ string hex_literal(T value) { struct StoragePackUnpack { using CodeGen = CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C; + // TODO(soufianekhiat): https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Pack_Unpack_Intrinsics.html + // Shader Model 5.1: threadgroup shared memory is limited 32KB static const size_t ThreadGroupSharedStorageLimit = 32 * 1024; @@ -429,6 +490,7 @@ struct StoragePackUnpack { std::ostringstream lhs; // NOTE(marcos): 8bit and 16bit word packing -- the smallest integer // type granularity available in HLSL SM 5.1 is 32bit (int/uint): + // TODO(soufianekhiat): Type value_type = op->value.type(); if (value_type.bits() == 32) { // storing a 32bit word? great! just reinterpret value to uint32: @@ -448,21 +510,21 @@ struct StoragePackUnpack { index << i << " / " << divisor; ostringstream word; word << cg.print_name(op->name) - << "[" + index.str() + "]"; + << "[" + index.str() + "]"; // now mask the appropriate bits: ostringstream mask; mask << "(" - << hex_literal((1 << bits) - 1) - << " << " - << "(" << bits << "*(" << i << " % " << divisor << " ))" - << ")"; + << hex_literal((1 << bits) - 1) + << " << " + << "(" << bits << "*(" << i << " % " << divisor << " ))" + << ")"; // apply the mask to the rhs value: ostringstream value; value << "(" - << mask.str() - << " & " - << "(" << cg.print_expr(op->value) << ")" - << ")"; + << mask.str() + << " & " + << "(" << cg.print_expr(op->value) << ")" + << ")"; // the performance impact of atomic operations on shared memory is // not well documented... here is something: @@ -813,6 +875,37 @@ string CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::print_assignment(Type t return CodeGen_GPU_C::print_assignment(type, rhs_modified); } +string CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::print_parameters(std::vector const &args, bool as_global) { + ostringstream ss; + for (const auto &arg : args) { + if (!as_global) { + ss << ",\n"; + ss << " "; + } + if (arg.is_buffer) { + // NOTE(marcos): Passing all buffers as RWBuffers in order to bind + // all buffers as UAVs since there is no way the runtime can know + // if a given halide_buffer_t is read-only (SRV) or read-write... + ss << "RW" + << "Buffer" + << "<" << print_type(arg.type) << ">" + << " " << print_name(arg.name); + Allocation alloc; + alloc.type = arg.type; + allocations.push(arg.name, alloc); + } else { + ss << "uniform" + << " " << print_type(arg.type) + << " " << print_name(arg.name); + } + if (as_global) { + ss << ";\n"; + } + } + + return ss.str(); +} + string CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::print_vanilla_cast(Type type, const string &value_expr) { ostringstream ss; ss << print_type(type) << "(" << value_expr << ")"; @@ -884,8 +977,12 @@ string CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::print_cast(Type target_ // some emulation in code... internal_assert(target_type.bits() >= 8); internal_assert(source_type.bits() >= 8); - internal_assert(target_type.bits() <= 32); - internal_assert(source_type.bits() <= 32); + // HLSL 6.0 support 64 bits integers and floats + if (target.get_d3d12_capability_lower_bound() == 51) + { + internal_assert(target_type.bits() <= 32); + internal_assert(source_type.bits() <= 32); + } internal_assert(target_type.bits() % 8 == 0); internal_assert(source_type.bits() % 8 == 0); @@ -1105,7 +1202,7 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::add_kernel(Stmt s, const Allocate *op = sop.as(); internal_assert(op->extents.size() == 1); internal_assert(op->type.lanes() == 1); - // In D3D12/HLSL, only 32bit types (int/uint/float) are suppoerted (even + // In D3D12/HLSL, only 32bit types (int/uint/float) are supported (even // though things are changing with newer shader models). Since there is // no uint8 type, we'll have to emulate it with 32bit types... // This will also require pack/unpack logic with bit-masking and aliased @@ -1180,6 +1277,11 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::add_kernel(Stmt s, }; FindThreadGroupSize ftg; s.accept(&ftg); + + if (target.get_d3d12_capability_lower_bound() >= 60) { + stream << print_parameters(args, true); + } + // for undetermined 'numthreads' dimensions, insert placeholders to the code // such as '__NUM_TREADS_X' that will later be patched when D3DCompile() is // invoked in halide_d3d12compute_run() @@ -1196,25 +1298,8 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::add_kernel(Stmt s, << "uint3 tgroup_index : SV_GroupID,\n" << " " << "uint3 tid_in_tgroup : SV_GroupThreadID"; - for (const auto &arg : args) { - stream << ",\n"; - stream << " "; - if (arg.is_buffer) { - // NOTE(marcos): Passing all buffers as RWBuffers in order to bind - // all buffers as UAVs since there is no way the runtime can know - // if a given halide_buffer_t is read-only (SRV) or read-write... - stream << "RW" - << "Buffer" - << "<" << print_type(arg.type) << ">" - << " " << print_name(arg.name); - Allocation alloc; - alloc.type = arg.type; - allocations.push(arg.name, alloc); - } else { - stream << "uniform" - << " " << print_type(arg.type) - << " " << print_name(arg.name); - } + if (target.get_d3d12_capability_lower_bound() < 60) { + stream << print_parameters(args, false); } stream << ")\n"; @@ -1237,6 +1322,8 @@ void CodeGen_D3D12Compute_Dev::CodeGen_D3D12Compute_C::add_kernel(Stmt s, void CodeGen_D3D12Compute_Dev::init_module() { debug(2) << "D3D12Compute device codegen init_module\n"; + Target const& target = d3d12compute_c.get_target(); + // wipe the internal kernel source src_stream.str(""); src_stream.clear(); @@ -1262,14 +1349,18 @@ void CodeGen_D3D12Compute_Dev::init_module() { // warning X4714 : sum of temp registers and indexable temp registers times 256 threads exceeds the recommended total 16384. Performance may be reduced << "#pragma warning( disable : 4714 )" "\n" + //// warning: magnitude of floating-point constant too large for type 'float'; maximum is 3.40282347E+38 [-Wliteral-range] + //<< "#pragma warning( disable : 462 )" // OR 4056 + // "\n" << "\n"; src_stream << "#define halide_maybe_unused(x) (void)(x)\n"; // Write out the Halide math functions. - src_stream + //src_stream //<< "namespace {\n" // HLSL does not support unnamed namespaces... #if DEBUG_TYPES + src_stream << "#define int8 int\n" << "#define int16 int\n" << "#define int32 int\n" @@ -1287,11 +1378,25 @@ void CodeGen_D3D12Compute_Dev::init_module() { << "\n" << "#define asint32 asint\n" << "#define asuint32 asuint\n" - << "\n" + << "\n"; #endif - << "float nan_f32() { return 1.#IND; } \n" // Quiet NaN with minimum fractional value. - << "float neg_inf_f32() { return -1.#INF; } \n" - << "float inf_f32() { return +1.#INF; } \n" + if (target.get_d3d12_capability_lower_bound() < 60) { + src_stream + << "float nan_f32() { return 1.#IND; } \n" // Quiet NaN with minimum fractional value. + << "float neg_inf_f32() { return -1.#INF; } \n" + << "float inf_f32() { return +1.#INF; } \n"; + } + else { + // HLSL 6.x: Still a proposal https://microsoft.github.io/hlsl-specs/proposals/0003-numeric-constants.html + src_stream + // NOTE(soufianekhiat): cf. https://github.com/microsoft/hlsl-specs/issues/210 + << "float nan_f32() { return 1.0f/0.0f; } \n" + << "float neg_inf_f32() { return -1.#INF; } \n" + << "float inf_f32() { return +1.#INF; } \n"; + //<< "float neg_inf_f32() { return -1.e1000f; } \n" + //<< "float inf_f32() { return +1.e1000f; } \n"; + } + src_stream << "#define is_inf_f32 isinf \n" << "#define is_finite_f32 isfinite \n" << "#define is_nan_f32 isnan \n" diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp index 53329ed52172..666d97d7ed93 100644 --- a/src/CodeGen_WebAssembly.cpp +++ b/src/CodeGen_WebAssembly.cpp @@ -65,11 +65,13 @@ const WasmIntrinsic intrinsic_defs[] = { {"llvm.sadd.sat.v16i8", Int(8, 16), "saturating_add", {Int(8, 16), Int(8, 16)}, Target::WasmSimd128}, {"llvm.uadd.sat.v16i8", UInt(8, 16), "saturating_add", {UInt(8, 16), UInt(8, 16)}, Target::WasmSimd128}, +#if LLVM_VERSION < 200 // TODO: Are these really different than the standard llvm.*sub.sat.*? {"llvm.wasm.sub.sat.signed.v16i8", Int(8, 16), "saturating_sub", {Int(8, 16), Int(8, 16)}, Target::WasmSimd128}, {"llvm.wasm.sub.sat.unsigned.v16i8", UInt(8, 16), "saturating_sub", {UInt(8, 16), UInt(8, 16)}, Target::WasmSimd128}, {"llvm.wasm.sub.sat.signed.v8i16", Int(16, 8), "saturating_sub", {Int(16, 8), Int(16, 8)}, Target::WasmSimd128}, {"llvm.wasm.sub.sat.unsigned.v8i16", UInt(16, 8), "saturating_sub", {UInt(16, 8), UInt(16, 8)}, Target::WasmSimd128}, +#endif {"llvm.wasm.avgr.unsigned.v16i8", UInt(8, 16), "rounding_halving_add", {UInt(8, 16), UInt(8, 16)}, Target::WasmSimd128}, {"llvm.wasm.avgr.unsigned.v8i16", UInt(16, 8), "rounding_halving_add", {UInt(16, 8), UInt(16, 8)}, Target::WasmSimd128}, diff --git a/src/DeviceInterface.cpp b/src/DeviceInterface.cpp index 27f6b549ee7d..8d4917025cdb 100644 --- a/src/DeviceInterface.cpp +++ b/src/DeviceInterface.cpp @@ -70,7 +70,6 @@ bool host_supports_target_device(const Target &t) { const halide_device_interface_t *get_device_interface_for_device_api(DeviceAPI d, const Target &t, const char *error_site) { - if (d == DeviceAPI::Default_GPU) { d = get_default_device_api_for_target(t); if (d == DeviceAPI::Host) { diff --git a/src/LLVM_Headers.h b/src/LLVM_Headers.h index bd76de8baae6..416a62cd7390 100644 --- a/src/LLVM_Headers.h +++ b/src/LLVM_Headers.h @@ -85,7 +85,9 @@ #include #include #include +#if LLVM_VERSION < 200 #include +#endif #include #include #include diff --git a/src/MainPage.h b/src/MainPage.h index db451ed8efaf..1882a667307e 100644 --- a/src/MainPage.h +++ b/src/MainPage.h @@ -25,6 +25,15 @@ * particular check out local_laplacian, bilateral_grid, and * interpolate. * + * If you are looking for a binary release, we suggest using pip to install + * either a stable release or + * a nightly build from + * Test PyPI. + * + * If you plan to build your program with CMake, you might be interested in + * documentation for + * the Halide CMake helpers. + * * Below are links to the documentation for the important classes in Halide. * * For defining, scheduling, and evaluating basic pipelines: diff --git a/src/Serialization.h b/src/Serialization.h index 9eb7f71c33cc..bad42cf0b4f8 100644 --- a/src/Serialization.h +++ b/src/Serialization.h @@ -8,7 +8,6 @@ namespace Halide { /// @brief Serialize a Halide pipeline into the given data buffer. /// @param pipeline The Halide pipeline to serialize. /// @param data The data buffer to store the serialized Halide pipeline into. Any existing contents will be destroyed. -/// @param params Map of named parameters which will get populated during serialization (can be used to bind external parameters to objects in the pipeline by name). void serialize_pipeline(const Pipeline &pipeline, std::vector &data); /// @brief Serialize a Halide pipeline into the given data buffer. diff --git a/src/Target.cpp b/src/Target.cpp index 8d13e1e79677..9fade9a6bfd2 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -483,6 +483,43 @@ Target get_host_target() { namespace { +Target::Feature calculate_host_d3d12_capability(Target t) { + const auto *interface = get_device_interface_for_device_api(DeviceAPI::D3D12Compute, t); + internal_assert(interface->compute_capability); + int major, minor; + int err = interface->compute_capability(nullptr, &major, &minor); + internal_assert(err == 0) << "Failed to query d3d12 compute capability\n"; + int ver = major * 10 + minor; + if (ver < 51) { + return Target::FeatureEnd; + } else if (ver < 60) { + return Target::D3D12Compute; + } else if (ver < 61) { + return Target::D3D12ComputeSM60; + } else if (ver < 62) { + return Target::D3D12ComputeSM61; + } else if (ver < 63) { + return Target::D3D12ComputeSM62; + } else if (ver < 64) { + return Target::D3D12ComputeSM63; + } else if (ver < 65) { + return Target::D3D12ComputeSM64; + } else if (ver < 66) { + return Target::D3D12ComputeSM65; + } else if (ver < 67) { + return Target::D3D12ComputeSM66; + } else if (ver < 68) { + return Target::D3D12ComputeSM67; + } else { + return Target::D3D12ComputeSM68; + } +} + +Target::Feature get_host_d3d12_capability(Target t) { + static Target::Feature cap = calculate_host_d3d12_capability(t); + return cap; +} + Target::Feature calculate_host_cuda_capability(Target t) { const auto *interface = get_device_interface_for_device_api(DeviceAPI::CUDA, t); internal_assert(interface->compute_capability); @@ -541,6 +578,7 @@ Target::Feature get_host_vulkan_capability(Target t) { return cap; } +// Keep this list in sync in HalideGeneratorHelpers.cmake const std::map os_name_map = { {"os_unknown", Target::OSUnknown}, {"linux", Target::Linux}, @@ -562,6 +600,7 @@ bool lookup_os(const std::string &tok, Target::OS &result) { return false; } +// Keep this list in sync in HalideGeneratorHelpers.cmake const std::map arch_name_map = { {"arch_unknown", Target::ArchUnknown}, {"x86", Target::X86}, @@ -670,6 +709,15 @@ const std::map feature_name_map = { {"trace_realizations", Target::TraceRealizations}, {"trace_pipeline", Target::TracePipeline}, {"d3d12compute", Target::D3D12Compute}, + {"d3d12compute_sm60", Target::D3D12ComputeSM60}, + {"d3d12compute_sm61", Target::D3D12ComputeSM61}, + {"d3d12compute_sm62", Target::D3D12ComputeSM62}, + {"d3d12compute_sm63", Target::D3D12ComputeSM63}, + {"d3d12compute_sm64", Target::D3D12ComputeSM64}, + {"d3d12compute_sm65", Target::D3D12ComputeSM65}, + {"d3d12compute_sm66", Target::D3D12ComputeSM66}, + {"d3d12compute_sm67", Target::D3D12ComputeSM67}, + {"d3d12compute_sm68", Target::D3D12ComputeSM68}, {"strict_float", Target::StrictFloat}, {"tsan", Target::TSAN}, {"asan", Target::ASAN}, @@ -847,6 +895,21 @@ bool merge_string(Target &t, const std::string &target) { } } + if (is_host && + t.has_feature(Target::D3D12Compute) && + !t.has_feature(Target::D3D12ComputeSM60) && + !t.has_feature(Target::D3D12ComputeSM61) && + !t.has_feature(Target::D3D12ComputeSM62) && + !t.has_feature(Target::D3D12ComputeSM63) && + !t.has_feature(Target::D3D12ComputeSM64) && + !t.has_feature(Target::D3D12ComputeSM65) && + !t.has_feature(Target::D3D12ComputeSM66) && + !t.has_feature(Target::D3D12ComputeSM67) && + !t.has_feature(Target::D3D12ComputeSM68)) { + // Detect host d3d12 capability + t.set_feature(get_host_d3d12_capability(t)); + } + if (is_host && t.has_feature(Target::CUDA) && !t.has_feature(Target::CUDACapability30) && @@ -1217,6 +1280,40 @@ bool Target::has_gpu_feature() const { has_feature(WebGPU)); } +int Target::get_d3d12_capability_lower_bound() const { + if (!has_feature(Target::D3D12Compute)) { + return -1; + } + if (has_feature(Target::D3D12ComputeSM60)) { + return 60; + } + if (has_feature(Target::D3D12ComputeSM61)) { + return 61; + } + if (has_feature(Target::D3D12ComputeSM62)) { + return 62; + } + if (has_feature(Target::D3D12ComputeSM63)) { + return 63; + } + if (has_feature(Target::D3D12ComputeSM64)) { + return 64; + } + if (has_feature(Target::D3D12ComputeSM65)) { + return 65; + } + if (has_feature(Target::D3D12ComputeSM66)) { + return 66; + } + if (has_feature(Target::D3D12ComputeSM67)) { + return 67; + } + if (has_feature(Target::D3D12ComputeSM68)) { + return 68; + } + return 51; +} + int Target::get_cuda_capability_lower_bound() const { if (!has_feature(Target::CUDA)) { return -1; @@ -1305,13 +1402,13 @@ bool Target::supports_type(const Type &t) const { if (t.bits() == 64) { if (t.is_float()) { return (!has_feature(Metal) && - !has_feature(D3D12Compute) && + !(get_d3d12_capability_lower_bound() < 60) && (!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) && (!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) && !has_feature(WebGPU)); } else { return (!has_feature(Metal) && - !has_feature(D3D12Compute) && + !(get_d3d12_capability_lower_bound() < 60) && (!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) && !has_feature(WebGPU)); } @@ -1341,7 +1438,7 @@ bool Target::supports_type(const Type &t, DeviceAPI device) const { } else if (device == DeviceAPI::D3D12Compute) { // Shader Model 5.x can optionally support double-precision; 64-bit int // types are not supported. - return t.bits() < 64; + return get_d3d12_capability_lower_bound() >= 60; } else if (device == DeviceAPI::Vulkan) { if (t.is_float() && t.bits() == 64) { return has_feature(Target::VulkanFloat64); @@ -1504,7 +1601,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) // (c) must match across both targets; it is an error if one target has the feature and the other doesn't // clang-format off - const std::array union_features = {{ + const std::array union_features = {{ // These are true union features. CUDA, D3D12Compute, @@ -1526,6 +1623,16 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) CUDACapability80, CUDACapability86, + D3D12ComputeSM60, + D3D12ComputeSM61, + D3D12ComputeSM62, + D3D12ComputeSM63, + D3D12ComputeSM64, + D3D12ComputeSM65, + D3D12ComputeSM66, + D3D12ComputeSM67, + D3D12ComputeSM68, + HVX_v62, HVX_v65, HVX_v66, @@ -1618,6 +1725,42 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) Target output = Target{os, arch, bits, processor_tune}; output.features = ((features | other.features) & union_mask) | ((features | other.features) & matching_mask) | ((features & other.features) & intersection_mask); + // Pick tight lower bound for D3D12 capability. Use fall-through to clear redundant features + int d3d12_a = get_d3d12_capability_lower_bound(); + int d3d12_b = other.get_d3d12_capability_lower_bound(); + + // get_d3d12_capability_lower_bound returns -1 when unused. Casting to unsigned makes this + // large, so min selects the true lower bound when one target doesn't specify a capability, + // and the other doesn't use D3D12 at all. + int d3d12_capability = std::min((unsigned)d3d12_a, (unsigned)d3d12_b); + if (d3d12_capability < 60) { + output.features.reset(D3D12ComputeSM60); + } + if (d3d12_capability < 61) { + output.features.reset(D3D12ComputeSM61); + } + if (d3d12_capability < 62) { + output.features.reset(D3D12ComputeSM62); + } + if (d3d12_capability < 63) { + output.features.reset(D3D12ComputeSM63); + } + if (d3d12_capability < 64) { + output.features.reset(D3D12ComputeSM64); + } + if (d3d12_capability < 65) { + output.features.reset(D3D12ComputeSM65); + } + if (d3d12_capability < 66) { + output.features.reset(D3D12ComputeSM66); + } + if (d3d12_capability < 67) { + output.features.reset(D3D12ComputeSM67); + } + if (d3d12_capability < 68) { + output.features.reset(D3D12ComputeSM68); + } + // Pick tight lower bound for CUDA capability. Use fall-through to clear redundant features int cuda_a = get_cuda_capability_lower_bound(); int cuda_b = other.get_cuda_capability_lower_bound(); diff --git a/src/Target.h b/src/Target.h index e48fa9ded8de..b3e452ff6196 100644 --- a/src/Target.h +++ b/src/Target.h @@ -137,6 +137,15 @@ struct Target { TraceRealizations = halide_target_feature_trace_realizations, TracePipeline = halide_target_feature_trace_pipeline, D3D12Compute = halide_target_feature_d3d12compute, + D3D12ComputeSM60 = halide_target_feature_d3d12compute_sm_60, + D3D12ComputeSM61 = halide_target_feature_d3d12compute_sm_61, + D3D12ComputeSM62 = halide_target_feature_d3d12compute_sm_62, + D3D12ComputeSM63 = halide_target_feature_d3d12compute_sm_63, + D3D12ComputeSM64 = halide_target_feature_d3d12compute_sm_64, + D3D12ComputeSM65 = halide_target_feature_d3d12compute_sm_65, + D3D12ComputeSM66 = halide_target_feature_d3d12compute_sm_66, + D3D12ComputeSM67 = halide_target_feature_d3d12compute_sm_67, + D3D12ComputeSM68 = halide_target_feature_d3d12compute_sm_68, StrictFloat = halide_target_feature_strict_float, TSAN = halide_target_feature_tsan, ASAN = halide_target_feature_asan, @@ -334,6 +343,11 @@ struct Target { } } + /** Get the minimum shader model for D3D12 capability found as an integer. Returns + * 50 (our minimum supported d3d12 shader model compute capability) if no d3d12 + * features are set. */ + int get_d3d12_capability_lower_bound() const; + /** Get the minimum cuda capability found as an integer. Returns * 20 (our minimum supported cuda compute capability) if no cuda * features are set. */ diff --git a/src/autoschedulers/CMakeLists.txt b/src/autoschedulers/CMakeLists.txt index 6d2cb033ea7b..dc7328399996 100644 --- a/src/autoschedulers/CMakeLists.txt +++ b/src/autoschedulers/CMakeLists.txt @@ -22,6 +22,9 @@ function(add_autoscheduler) OUTPUT_NAME autoschedule_${name_lower}) endfunction() +## Autoschedulers must always be built for the same target as libHalide. +set(Halide_TARGET cmake) + add_subdirectory(common) add_subdirectory(adams2019) diff --git a/src/autoschedulers/adams2019/CMakeLists.txt b/src/autoschedulers/adams2019/CMakeLists.txt index a4db5e4d8910..0d4ae2a8564f 100644 --- a/src/autoschedulers/adams2019/CMakeLists.txt +++ b/src/autoschedulers/adams2019/CMakeLists.txt @@ -22,57 +22,16 @@ target_link_libraries(adams2019_cost_model.generator PRIVATE Halide::Generator) add_halide_library(adams2019_cost_model FROM adams2019_cost_model.generator GENERATOR cost_model FUNCTION_NAME cost_model - TARGETS cmake) + FEATURES[x86-64-osx] avx2 sse41 + FEATURES[arm-64-osx] arm_dot_prod-arm_fp16) + add_halide_library(adams2019_train_cost_model FROM adams2019_cost_model.generator GENERATOR train_cost_model FUNCTION_NAME train_cost_model - TARGETS cmake + FEATURES[x86-64-osx] avx2 sse41 + FEATURES[arm-64-osx] arm_dot_prod-arm_fp16 USE_RUNTIME adams2019_cost_model.runtime) -# TODO: replace when we support "fat" objects in generators -list(LENGTH CMAKE_OSX_ARCHITECTURES num_archs) -if (APPLE AND num_archs GREATER 1) - if (NOT "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES OR - NOT "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) - message(FATAL_ERROR "Don't know how to compile for ${CMAKE_OSX_ARCHITECTURES}") - endif () - - if (Halide_CMAKE_TARGET MATCHES "x86") - set(arch arm) - else () - set(arch x86) - endif () - - add_halide_library(adams2019_cost_model-arch FROM adams2019_cost_model.generator - GENERATOR cost_model - FUNCTION_NAME cost_model - TARGETS osx-${arch}-64) - - add_halide_library(adams2019_train_cost_model-arch FROM adams2019_cost_model.generator - GENERATOR train_cost_model - FUNCTION_NAME train_cost_model - TARGETS osx-${arch}-64 - USE_RUNTIME adams2019_cost_model-arch.runtime) - - add_custom_command( - TARGET adams2019_cost_model POST_BUILD - COMMAND lipo -create $ $ -output $ - VERBATIM - ) - - add_custom_command( - TARGET adams2019_cost_model.runtime POST_BUILD - COMMAND lipo -create $ $ -output $ - VERBATIM - ) - - add_custom_command( - TARGET adams2019_train_cost_model POST_BUILD - COMMAND lipo -create $ $ -output $ - VERBATIM - ) -endif () - # adams2019_retrain_cost_model if (WITH_UTILS) add_executable(adams2019_retrain_cost_model diff --git a/src/autoschedulers/anderson2021/CMakeLists.txt b/src/autoschedulers/anderson2021/CMakeLists.txt index 00fa4f297769..649fa59753e3 100644 --- a/src/autoschedulers/anderson2021/CMakeLists.txt +++ b/src/autoschedulers/anderson2021/CMakeLists.txt @@ -21,11 +21,13 @@ target_link_libraries(anderson2021_cost_model.generator PRIVATE Halide::Generato add_halide_library(anderson2021_cost_model FROM anderson2021_cost_model.generator GENERATOR cost_model FUNCTION_NAME cost_model - TARGETS cmake) + FEATURES[x86-64-osx] avx2 sse41 + FEATURES[arm-64-osx] arm_dot_prod-arm_fp16) add_halide_library(anderson2021_train_cost_model FROM anderson2021_cost_model.generator GENERATOR train_cost_model FUNCTION_NAME train_cost_model - TARGETS cmake + FEATURES[x86-64-osx] avx2 sse41 + FEATURES[arm-64-osx] arm_dot_prod-arm_fp16 USE_RUNTIME anderson2021_cost_model.runtime) ## retrain_cost_model @@ -121,7 +123,7 @@ if (WITH_TESTS) _add_test(anderson2021_test_storage_strides) add_executable(anderson2021_test_thread_info test/thread_info.cpp LoopNest.cpp - FunctionDAG.cpp GPULoopInfo.cpp Tiling.cpp) + FunctionDAG.cpp GPULoopInfo.cpp Tiling.cpp) target_include_directories(anderson2021_test_thread_info PRIVATE "${Halide_SOURCE_DIR}/src/autoschedulers/anderson2021") target_link_libraries(anderson2021_test_thread_info PRIVATE Halide::Plugin) @@ -132,4 +134,4 @@ if (WITH_TESTS) target_link_libraries(anderson2021_test_tiling PRIVATE Halide::Plugin) _add_test(anderson2021_test_tiling) -endif() +endif () diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h index f9079d5d7cb0..bc2d0dca6e7d 100644 --- a/src/runtime/HalideRuntime.h +++ b/src/runtime/HalideRuntime.h @@ -117,7 +117,7 @@ extern "C" { // For now, we say that if >= v12, and compiling on x86 or arm, // we assume support. This may need revision. #if defined(__GNUC__) && (__GNUC__ >= 12) -#if defined(__x86_64__) || (defined(__i386__) && (__GNUC__ >= 14) && defined(__SSE2__)) || defined(__arm__) || defined(__aarch64__) +#if defined(__x86_64__) || (defined(__i386__) && (__GNUC__ >= 14) && defined(__SSE2__)) || ((defined(__arm__) || defined(__aarch64__)) && (__GNUC__ >= 13)) #define HALIDE_CPP_COMPILER_HAS_FLOAT16 #endif #endif @@ -1417,7 +1417,16 @@ typedef enum halide_target_feature_t { halide_target_feature_strict_float, ///< Turn off all non-IEEE floating-point optimization. Currently applies only to LLVM targets. halide_target_feature_tsan, ///< Enable hooks for TSAN support. halide_target_feature_asan, ///< Enable hooks for ASAN support. - halide_target_feature_d3d12compute, ///< Enable Direct3D 12 Compute runtime. + halide_target_feature_d3d12compute, ///< Enable Direct3D 12 Compute runtime (default compatibility 5.1). + halide_target_feature_d3d12compute_sm_60, ///< Enable Direct3D 12 Compute runtime with HLSL 6.0. + halide_target_feature_d3d12compute_sm_61, ///< Enable Direct3D 12 Compute runtime with HLSL 6.1. + halide_target_feature_d3d12compute_sm_62, ///< Enable Direct3D 12 Compute runtime with HLSL 6.2. + halide_target_feature_d3d12compute_sm_63, ///< Enable Direct3D 12 Compute runtime with HLSL 6.3. + halide_target_feature_d3d12compute_sm_64, ///< Enable Direct3D 12 Compute runtime with HLSL 6.4. + halide_target_feature_d3d12compute_sm_65, ///< Enable Direct3D 12 Compute runtime with HLSL 6.5. + halide_target_feature_d3d12compute_sm_66, ///< Enable Direct3D 12 Compute runtime with HLSL 6.6. + halide_target_feature_d3d12compute_sm_67, ///< Enable Direct3D 12 Compute runtime with HLSL 6.7. + halide_target_feature_d3d12compute_sm_68, ///< Enable Direct3D 12 Compute runtime with HLSL 6.8. halide_target_feature_check_unsafe_promises, ///< Insert assertions for promises. halide_target_feature_hexagon_dma, ///< Enable Hexagon DMA buffers. halide_target_feature_embed_bitcode, ///< Emulate clang -fembed-bitcode flag. diff --git a/src/runtime/HalideRuntimeD3D12Compute.h b/src/runtime/HalideRuntimeD3D12Compute.h index 6a8feba37ee7..dfa24f973c49 100644 --- a/src/runtime/HalideRuntimeD3D12Compute.h +++ b/src/runtime/HalideRuntimeD3D12Compute.h @@ -51,6 +51,10 @@ extern int halide_d3d12compute_wrap_buffer(void *user_context, struct halide_buf */ extern int halide_d3d12compute_detach_buffer(void *user_context, struct halide_buffer_t *buf); +/** Return the version of the Shader Model, -1 if not found, 5.0 for default and from 6.0 to 6.8. +*/ +extern int halide_d3d12compute_compute_capability(void *user_context, int *major, int *minor); + /** Return the underlying ID3D12Resource for a halide_buffer_t. This resource * must be valid on an D3D12 device, unless halide_buffer_t has no associated * resource. If there is no device memory (device field is NULL), returns 0. diff --git a/src/runtime/d3d12compute.cpp b/src/runtime/d3d12compute.cpp index f4f85180a56e..c55a54dd42bb 100644 --- a/src/runtime/d3d12compute.cpp +++ b/src/runtime/d3d12compute.cpp @@ -55,7 +55,10 @@ #if !defined(COBJMACROS) #define COBJMACROS #endif +// For HLSL 6.x #include "mini_d3d12.h" +#include "dxcapi.h" +#include "d3d12shader.h" // For all intents and purposes, we always want to use COMPUTE command lists // (and queues) ... @@ -232,6 +235,12 @@ WEAK void d3d12_free(void *p) { free(p); } +template +WEAK T zero_struct() { + T zero = {}; + return zero; +} + template WEAK T *malloct() { TRACELOG; @@ -244,12 +253,6 @@ WEAK T *malloct() { return p; } -template -WEAK T zero_struct() { - T zero = {}; - return zero; -} - #define hashmap_malloc(user_context, size) d3d12_malloc(size) #define hashmap_free(user_context, memory) d3d12_free(memory) #include "hashmap.h" @@ -434,6 +437,7 @@ WEAK DXGI_FORMAT FindD3D12FormatForHalideType(void *user_context, halide_type_t WEAK void *lib_d3d12 = nullptr; WEAK void *lib_D3DCompiler_47 = nullptr; WEAK void *lib_dxgi = nullptr; +WEAK void *lib_dxcompiler = nullptr; struct LibrarySymbol { template @@ -455,6 +459,8 @@ WEAK PFN_D3D12_SERIALIZE_ROOT_SIGNATURE D3D12SerializeRootSignature = nullptr; WEAK PFN_D3DCOMPILE D3DCompile = nullptr; WEAK PFN_CREATEDXGIFACORY1 CreateDXGIFactory1 = nullptr; +WEAK DxcCreateInstanceProc DxcCreateInstanceFunc = nullptr; + #if defined(__cplusplus) && !defined(_MSC_VER) #if defined(__MINGW32__) #undef __uuidof @@ -672,6 +678,10 @@ WEAK HANDLE hFenceEvent = nullptr; WEAK d3d12_command_allocator *cmd_allocator_main = nullptr; +// NOTE(soufianekhiat): used by HLSL 6+ +WEAK IDxcUtils *dx_hlsl6_utils = nullptr; +WEAK IDxcCompiler3 *dx_hlsl6_compiler = nullptr; + // NOTE(marcos): the term "frame" here is borrowed from graphics to delineate the // lifetime of a kernel dispatch; more specifically, a number of "expensive" API // objects is necessary for each dispatch, and they must remain alive and immutable @@ -934,12 +944,14 @@ WEAK void D3D12LoadDependencies(void *user_context) { "d3d12.dll", "D3DCompiler_47.dll", "dxgi.dll", + "dxcompiler.dll" }; static const int num_libs = sizeof(lib_names) / sizeof(lib_names[0]); void **lib_handles[num_libs] = { &lib_d3d12, &lib_D3DCompiler_47, &lib_dxgi, + &lib_dxcompiler }; for (size_t i = 0; i < num_libs; i++) { // Only attempt to load a library if the it has not been loaded already @@ -963,6 +975,13 @@ WEAK void D3D12LoadDependencies(void *user_context) { D3D12SerializeRootSignature = LibrarySymbol::get(user_context, lib_d3d12, "D3D12SerializeRootSignature"); D3DCompile = LibrarySymbol::get(user_context, lib_D3DCompiler_47, "D3DCompile"); CreateDXGIFactory1 = LibrarySymbol::get(user_context, lib_dxgi, "CreateDXGIFactory1"); + DxcCreateInstanceFunc = LibrarySymbol::get(user_context, lib_dxcompiler, "DxcCreateInstance"); + + if (DxcCreateInstanceFunc != nullptr) + { + DxcCreateInstanceFunc(CLSID_DxcUtils, IID_PPV_ARGS(&dx_hlsl6_utils)); + DxcCreateInstanceFunc(CLSID_DxcCompiler, IID_PPV_ARGS(&dx_hlsl6_compiler)); + } // Windows x64 follows the LLP64 integer type convention: // https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751(v=vs.85).aspx @@ -989,11 +1008,8 @@ WEAK void D3D12LoadDependencies(void *user_context) { static_assert(sizeof(UINT16) == (16 / 8)); static_assert(sizeof(UINT32) == (32 / 8)); static_assert(sizeof(UINT64) == (64 / 8)); -#ifdef BITS_64 + // d3d12 is only allowed on 64 bits static_assert(sizeof(SIZE_T) == (64 / 8)); -#else - static_assert(sizeof(SIZE_T) == (32 / 8)); -#endif } #if HALIDE_D3D12_PIX @@ -1861,63 +1877,95 @@ WEAK d3d12_function *d3d12_compile_shader(d3d12_device *device, d3d12_library *l int source_size = library->source_length; using SS16 = StackStringStreamPrinter<16>; SS16 SS[4] = {SS16(nullptr), SS16(nullptr), SS16(nullptr), SS16(nullptr)}; - D3D_SHADER_MACRO pDefines[] = { - {"__GROUPSHARED_SIZE_IN_BYTES", (SS[0] << shared_mem_bytes).str()}, - {"__NUM_TREADS_X", (SS[1] << threadsX).str()}, - {"__NUM_TREADS_Y", (SS[2] << threadsY).str()}, - {"__NUM_TREADS_Z", (SS[3] << threadsZ).str()}, - {nullptr, nullptr}}; - const char *shaderName = name; // only used for debug information - ID3DInclude *includeHandler = nullptr; - const char *entryPoint = name; - const char *target = "cs_5_1"; // all d3d12 hardware support SM 5.1 - UINT flags1 = 0; - UINT flags2 = 0; // flags related to effects (.fx files) - ID3DBlob *shaderBlob = nullptr; - ID3DBlob *errorMsgs = nullptr; - - flags1 |= D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES; + + // dump_shader(source); + if (dx_hlsl6_compiler != nullptr) + { + DxcBuffer source_buffer; + source_buffer.Ptr = source; + source_buffer.Size = source_size; + source_buffer.Encoding = DXC_CP_ACP; // Assume BOM says UTF8 or UTF16 or this is ANSI text. + + LPCWSTR pszArgs[] = + { + L"-E", name, // Entry point. + L"-T", "cs_6_0", // Target. + //L"-Zs", // Enable debug information (slim format) + L"-D", (SS[0] << "__GROUPSHARED_SIZE_IN_BYTES=" << shared_mem_bytes).str(), + L"-D", (SS[1] << "__NUM_TREADS_X=" << threadsX).str(), + L"-D", (SS[2] << "__NUM_TREADS_Y=" << threadsY).str(), + L"-D", (SS[3] << "__NUM_TREADS_Z=" << threadsY).str(), + }; + + IDxcIncludeHandler* include_handler; + dx_hlsl6_utils->CreateDefaultIncludeHandler(&include_handler); + + IDxcResult* pResults; + dx_hlsl6_compiler->Compile( + &source_buffer, // Source buffer. + pszArgs, // Array of pointers to arguments. + 12, // Number of arguments. + nullptr, //include_handler, // User-provided interface to handle #include directives (optional). + IID_PPV_ARGS(&pResults) // Compiler output status, buffer, and errors. + ); + } else { + D3D_SHADER_MACRO pDefines[] = { + {"__GROUPSHARED_SIZE_IN_BYTES", (SS[0] << shared_mem_bytes).str()}, + {"__NUM_TREADS_X", (SS[1] << threadsX).str()}, + {"__NUM_TREADS_Y", (SS[2] << threadsY).str()}, + {"__NUM_TREADS_Z", (SS[3] << threadsY).str()}, + {nullptr, nullptr}}; + + const char *shaderName = name; // only used for debug information + ID3DInclude *includeHandler = nullptr; + const char *entryPoint = name; + const char *target = "cs_5_1"; // all d3d12 hardware support SM 5.1 + UINT flags1 = 0; + UINT flags2 = 0; // flags related to effects (.fx files) + ID3DBlob *shaderBlob = nullptr; + ID3DBlob *errorMsgs = nullptr; + + flags1 |= D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES; #if HALIDE_D3D12_DEBUG_SHADERS - flags1 |= D3DCOMPILE_DEBUG; - flags1 |= D3DCOMPILE_SKIP_OPTIMIZATION; - // flags1 |= D3DCOMPILE_RESOURCES_MAY_ALIAS; - // flags1 |= D3DCOMPILE_ALL_RESOURCES_BOUND; + flags1 |= D3DCOMPILE_DEBUG; + flags1 |= D3DCOMPILE_SKIP_OPTIMIZATION; + // flags1 |= D3DCOMPILE_RESOURCES_MAY_ALIAS; + // flags1 |= D3DCOMPILE_ALL_RESOURCES_BOUND; #endif - // dump_shader(source); + HRESULT result = D3DCompile(source, source_size, shaderName, pDefines, includeHandler, entryPoint, target, flags1, flags2, &shaderBlob, &errorMsgs); - HRESULT result = D3DCompile(source, source_size, shaderName, pDefines, includeHandler, entryPoint, target, flags1, flags2, &shaderBlob, &errorMsgs); + if (FAILED(result) || (shaderBlob == nullptr)) { + TRACEPRINT("Unable to compile D3D12 compute shader (HRESULT=" << (void *)(int64_t)result << ", ShaderBlob=" << shaderBlob << " entry=" << entryPoint << ").\n"); + dump_shader(source, errorMsgs); + Release_ID3D12Object(errorMsgs); + TRACEFATAL("[end-of-shader-dump]"); + return nullptr; + } - if (FAILED(result) || (shaderBlob == nullptr)) { - TRACEPRINT("Unable to compile D3D12 compute shader (HRESULT=" << (void *)(int64_t)result << ", ShaderBlob=" << shaderBlob << " entry=" << entryPoint << ").\n"); - dump_shader(source, errorMsgs); - Release_ID3D12Object(errorMsgs); - TRACEFATAL("[end-of-shader-dump]"); - return nullptr; - } + TRACEPRINT("SUCCESS while compiling D3D12 compute shader with entry name '" << entryPoint << "'!\n"); - TRACEPRINT("SUCCESS while compiling D3D12 compute shader with entry name '" << entryPoint << "'!\n"); + // even though it was successful, there may have been warning messages emitted by the compiler: + if (errorMsgs != nullptr) { + dump_shader(source, errorMsgs); + Release_ID3D12Object(errorMsgs); + } - // even though it was successful, there may have been warning messages emitted by the compiler: - if (errorMsgs != nullptr) { - dump_shader(source, errorMsgs); - Release_ID3D12Object(errorMsgs); - } + d3d12_function *function = malloct(); + function->shaderBlob = shaderBlob; + function->rootSignature = rootSignature; + rootSignature->AddRef(); - d3d12_function *function = malloct(); - function->shaderBlob = shaderBlob; - function->rootSignature = rootSignature; - rootSignature->AddRef(); + d3d12_compute_pipeline_state *pipeline_state = new_compute_pipeline_state_with_function(device, function); + if (pipeline_state == nullptr) { + TRACEFATAL("D3D12Compute: Could not allocate pipeline state."); + release_object(function); + return nullptr; + } + function->pipeline_state = pipeline_state; - d3d12_compute_pipeline_state *pipeline_state = new_compute_pipeline_state_with_function(device, function); - if (pipeline_state == nullptr) { - TRACEFATAL("D3D12Compute: Could not allocate pipeline state."); - release_object(function); - return nullptr; + return function; } - function->pipeline_state = pipeline_state; - - return function; } WEAK d3d12_function *new_function_with_name(d3d12_device *device, d3d12_library *library, const char *name, size_t name_len, @@ -3507,6 +3555,44 @@ WEAK int halide_d3d12compute_wrap_buffer(void *user_context, struct halide_buffe return halide_error_code_success; } +WEAK int halide_d3d12compute_compute_capability(void* user_context, int* major, int* minor) { + TRACELOG; + if (lib_dxcompiler == nullptr && lib_D3DCompiler_47 != nullptr) { + *major = 5; + *minor = 1; + + return halide_error_code_success; + } + else if (lib_dxcompiler != nullptr) { + D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = {}; + + shaderModel.HighestShaderModel = D3D_SHADER_MODEL_6_7; + + ID3D12Device *d3ddevice = (*device); + HRESULT hr = d3ddevice->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel)); + while (hr == E_INVALIDARG && shaderModel.HighestShaderModel > D3D_SHADER_MODEL_6_0) { + shaderModel.HighestShaderModel = static_cast(static_cast(shaderModel.HighestShaderModel) - 1); + hr = d3ddevice->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel)); + } + + if (FAILED(hr)) { + shaderModel.HighestShaderModel = D3D_SHADER_MODEL_5_1; + *major = 5; + *minor = 1; + } else { + *major = 6; + *minor = shaderModel.HighestShaderModel - 0x60; + } + + return halide_error_code_success; + } + + *major = -1; + *minor = -1; + + return halide_error_code_generic_error; +} + WEAK const struct halide_device_interface_t *halide_d3d12compute_device_interface() { TRACELOG; return &d3d12compute_device_interface; @@ -3560,7 +3646,7 @@ WEAK halide_device_interface_t d3d12compute_device_interface = { halide_device_release_crop, halide_device_wrap_native, halide_device_detach_native, - nullptr, + halide_d3d12compute_compute_capability, &d3d12compute_device_interface_impl}; } // namespace D3D12Compute diff --git a/src/runtime/d3d12shader.h b/src/runtime/d3d12shader.h new file mode 100644 index 000000000000..d305b48a84d5 --- /dev/null +++ b/src/runtime/d3d12shader.h @@ -0,0 +1,487 @@ +////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +// +// File: D3D12Shader.h +// Content: D3D12 Shader Types and APIs +// +////////////////////////////////////////////////////////////////////////////// + +#ifndef __D3D12SHADER_H__ +#define __D3D12SHADER_H__ + +//#include "d3dcommon.h" + +typedef enum D3D12_SHADER_VERSION_TYPE +{ + D3D12_SHVER_PIXEL_SHADER = 0, + D3D12_SHVER_VERTEX_SHADER = 1, + D3D12_SHVER_GEOMETRY_SHADER = 2, + + // D3D11 Shaders + D3D12_SHVER_HULL_SHADER = 3, + D3D12_SHVER_DOMAIN_SHADER = 4, + D3D12_SHVER_COMPUTE_SHADER = 5, + + // D3D12 Shaders + D3D12_SHVER_LIBRARY = 6, + + D3D12_SHVER_RAY_GENERATION_SHADER = 7, + D3D12_SHVER_INTERSECTION_SHADER = 8, + D3D12_SHVER_ANY_HIT_SHADER = 9, + D3D12_SHVER_CLOSEST_HIT_SHADER = 10, + D3D12_SHVER_MISS_SHADER = 11, + D3D12_SHVER_CALLABLE_SHADER = 12, + + D3D12_SHVER_MESH_SHADER = 13, + D3D12_SHVER_AMPLIFICATION_SHADER = 14, + + D3D12_SHVER_RESERVED0 = 0xFFF0, +} D3D12_SHADER_VERSION_TYPE; + +#define D3D12_SHVER_GET_TYPE(_Version) \ + (((_Version) >> 16) & 0xffff) +#define D3D12_SHVER_GET_MAJOR(_Version) \ + (((_Version) >> 4) & 0xf) +#define D3D12_SHVER_GET_MINOR(_Version) \ + (((_Version) >> 0) & 0xf) + +// Slot ID for library function return +#define D3D_RETURN_PARAMETER_INDEX (-1) + +typedef D3D_RESOURCE_RETURN_TYPE D3D12_RESOURCE_RETURN_TYPE; + +typedef D3D_CBUFFER_TYPE D3D12_CBUFFER_TYPE; + + +typedef struct _D3D12_SIGNATURE_PARAMETER_DESC +{ + LPCSTR SemanticName; // Name of the semantic + UINT SemanticIndex; // Index of the semantic + UINT Register; // Number of member variables + D3D_NAME SystemValueType;// A predefined system value, or D3D_NAME_UNDEFINED if not applicable + D3D_REGISTER_COMPONENT_TYPE ComponentType; // Scalar type (e.g. uint, float, etc.) + BYTE Mask; // Mask to indicate which components of the register + // are used (combination of D3D10_COMPONENT_MASK values) + BYTE ReadWriteMask; // Mask to indicate whether a given component is + // never written (if this is an output signature) or + // always read (if this is an input signature). + // (combination of D3D_MASK_* values) + UINT Stream; // Stream index + D3D_MIN_PRECISION MinPrecision; // Minimum desired interpolation precision +} D3D12_SIGNATURE_PARAMETER_DESC; + +typedef struct _D3D12_SHADER_BUFFER_DESC +{ + LPCSTR Name; // Name of the constant buffer + D3D_CBUFFER_TYPE Type; // Indicates type of buffer content + UINT Variables; // Number of member variables + UINT Size; // Size of CB (in bytes) + UINT uFlags; // Buffer description flags +} D3D12_SHADER_BUFFER_DESC; + +typedef struct _D3D12_SHADER_VARIABLE_DESC +{ + LPCSTR Name; // Name of the variable + UINT StartOffset; // Offset in constant buffer's backing store + UINT Size; // Size of variable (in bytes) + UINT uFlags; // Variable flags + LPVOID DefaultValue; // Raw pointer to default value + UINT StartTexture; // First texture index (or -1 if no textures used) + UINT TextureSize; // Number of texture slots possibly used. + UINT StartSampler; // First sampler index (or -1 if no textures used) + UINT SamplerSize; // Number of sampler slots possibly used. +} D3D12_SHADER_VARIABLE_DESC; + +typedef struct _D3D12_SHADER_TYPE_DESC +{ + D3D_SHADER_VARIABLE_CLASS Class; // Variable class (e.g. object, matrix, etc.) + D3D_SHADER_VARIABLE_TYPE Type; // Variable type (e.g. float, sampler, etc.) + UINT Rows; // Number of rows (for matrices, 1 for other numeric, 0 if not applicable) + UINT Columns; // Number of columns (for vectors & matrices, 1 for other numeric, 0 if not applicable) + UINT Elements; // Number of elements (0 if not an array) + UINT Members; // Number of members (0 if not a structure) + UINT Offset; // Offset from the start of structure (0 if not a structure member) + LPCSTR Name; // Name of type, can be NULL +} D3D12_SHADER_TYPE_DESC; + +typedef D3D_TESSELLATOR_DOMAIN D3D12_TESSELLATOR_DOMAIN; + +typedef D3D_TESSELLATOR_PARTITIONING D3D12_TESSELLATOR_PARTITIONING; + +typedef D3D_TESSELLATOR_OUTPUT_PRIMITIVE D3D12_TESSELLATOR_OUTPUT_PRIMITIVE; + +typedef struct _D3D12_SHADER_DESC +{ + UINT Version; // Shader version + LPCSTR Creator; // Creator string + UINT Flags; // Shader compilation/parse flags + + UINT ConstantBuffers; // Number of constant buffers + UINT BoundResources; // Number of bound resources + UINT InputParameters; // Number of parameters in the input signature + UINT OutputParameters; // Number of parameters in the output signature + + UINT InstructionCount; // Number of emitted instructions + UINT TempRegisterCount; // Number of temporary registers used + UINT TempArrayCount; // Number of temporary arrays used + UINT DefCount; // Number of constant defines + UINT DclCount; // Number of declarations (input + output) + UINT TextureNormalInstructions; // Number of non-categorized texture instructions + UINT TextureLoadInstructions; // Number of texture load instructions + UINT TextureCompInstructions; // Number of texture comparison instructions + UINT TextureBiasInstructions; // Number of texture bias instructions + UINT TextureGradientInstructions; // Number of texture gradient instructions + UINT FloatInstructionCount; // Number of floating point arithmetic instructions used + UINT IntInstructionCount; // Number of signed integer arithmetic instructions used + UINT UintInstructionCount; // Number of unsigned integer arithmetic instructions used + UINT StaticFlowControlCount; // Number of static flow control instructions used + UINT DynamicFlowControlCount; // Number of dynamic flow control instructions used + UINT MacroInstructionCount; // Number of macro instructions used + UINT ArrayInstructionCount; // Number of array instructions used + UINT CutInstructionCount; // Number of cut instructions used + UINT EmitInstructionCount; // Number of emit instructions used + D3D_PRIMITIVE_TOPOLOGY GSOutputTopology; // Geometry shader output topology + UINT GSMaxOutputVertexCount; // Geometry shader maximum output vertex count + D3D_PRIMITIVE InputPrimitive; // GS/HS input primitive + UINT PatchConstantParameters; // Number of parameters in the patch constant signature + UINT cGSInstanceCount; // Number of Geometry shader instances + UINT cControlPoints; // Number of control points in the HS->DS stage + D3D_TESSELLATOR_OUTPUT_PRIMITIVE HSOutputPrimitive; // Primitive output by the tessellator + D3D_TESSELLATOR_PARTITIONING HSPartitioning; // Partitioning mode of the tessellator + D3D_TESSELLATOR_DOMAIN TessellatorDomain; // Domain of the tessellator (quad, tri, isoline) + // instruction counts + UINT cBarrierInstructions; // Number of barrier instructions in a compute shader + UINT cInterlockedInstructions; // Number of interlocked instructions + UINT cTextureStoreInstructions; // Number of texture writes +} D3D12_SHADER_DESC; + +typedef struct _D3D12_SHADER_INPUT_BIND_DESC +{ + LPCSTR Name; // Name of the resource + D3D_SHADER_INPUT_TYPE Type; // Type of resource (e.g. texture, cbuffer, etc.) + UINT BindPoint; // Starting bind point + UINT BindCount; // Number of contiguous bind points (for arrays) + + UINT uFlags; // Input binding flags + D3D_RESOURCE_RETURN_TYPE ReturnType; // Return type (if texture) + D3D_SRV_DIMENSION Dimension; // Dimension (if texture) + UINT NumSamples; // Number of samples (0 if not MS texture) + UINT Space; // Register space + UINT uID; // Range ID in the bytecode +} D3D12_SHADER_INPUT_BIND_DESC; + +#define D3D_SHADER_REQUIRES_DOUBLES 0x00000001 +#define D3D_SHADER_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002 +#define D3D_SHADER_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004 +#define D3D_SHADER_REQUIRES_64_UAVS 0x00000008 +#define D3D_SHADER_REQUIRES_MINIMUM_PRECISION 0x00000010 +#define D3D_SHADER_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020 +#define D3D_SHADER_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040 +#define D3D_SHADER_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080 +#define D3D_SHADER_REQUIRES_TILED_RESOURCES 0x00000100 +#define D3D_SHADER_REQUIRES_STENCIL_REF 0x00000200 +#define D3D_SHADER_REQUIRES_INNER_COVERAGE 0x00000400 +#define D3D_SHADER_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800 +#define D3D_SHADER_REQUIRES_ROVS 0x00001000 +#define D3D_SHADER_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000 +#define D3D_SHADER_REQUIRES_WAVE_OPS 0x00004000 +#define D3D_SHADER_REQUIRES_INT64_OPS 0x00008000 +#define D3D_SHADER_REQUIRES_VIEW_ID 0x00010000 +#define D3D_SHADER_REQUIRES_BARYCENTRICS 0x00020000 +#define D3D_SHADER_REQUIRES_NATIVE_16BIT_OPS 0x00040000 +#define D3D_SHADER_REQUIRES_SHADING_RATE 0x00080000 +#define D3D_SHADER_REQUIRES_RAYTRACING_TIER_1_1 0x00100000 +#define D3D_SHADER_REQUIRES_SAMPLER_FEEDBACK 0x00200000 +#define D3D_SHADER_REQUIRES_ATOMIC_INT64_ON_TYPED_RESOURCE 0x00400000 +#define D3D_SHADER_REQUIRES_ATOMIC_INT64_ON_GROUP_SHARED 0x00800000 +#define D3D_SHADER_REQUIRES_DERIVATIVES_IN_MESH_AND_AMPLIFICATION_SHADERS 0x01000000 +#define D3D_SHADER_REQUIRES_RESOURCE_DESCRIPTOR_HEAP_INDEXING 0x02000000 +#define D3D_SHADER_REQUIRES_SAMPLER_DESCRIPTOR_HEAP_INDEXING 0x04000000 +#define D3D_SHADER_REQUIRES_WAVE_MMA 0x08000000 +#define D3D_SHADER_REQUIRES_ATOMIC_INT64_ON_DESCRIPTOR_HEAP_RESOURCE 0x10000000 + +typedef struct _D3D12_LIBRARY_DESC +{ + LPCSTR Creator; // The name of the originator of the library. + UINT Flags; // Compilation flags. + UINT FunctionCount; // Number of functions exported from the library. +} D3D12_LIBRARY_DESC; + +typedef struct _D3D12_FUNCTION_DESC +{ + UINT Version; // Shader version + LPCSTR Creator; // Creator string + UINT Flags; // Shader compilation/parse flags + + UINT ConstantBuffers; // Number of constant buffers + UINT BoundResources; // Number of bound resources + + UINT InstructionCount; // Number of emitted instructions + UINT TempRegisterCount; // Number of temporary registers used + UINT TempArrayCount; // Number of temporary arrays used + UINT DefCount; // Number of constant defines + UINT DclCount; // Number of declarations (input + output) + UINT TextureNormalInstructions; // Number of non-categorized texture instructions + UINT TextureLoadInstructions; // Number of texture load instructions + UINT TextureCompInstructions; // Number of texture comparison instructions + UINT TextureBiasInstructions; // Number of texture bias instructions + UINT TextureGradientInstructions; // Number of texture gradient instructions + UINT FloatInstructionCount; // Number of floating point arithmetic instructions used + UINT IntInstructionCount; // Number of signed integer arithmetic instructions used + UINT UintInstructionCount; // Number of unsigned integer arithmetic instructions used + UINT StaticFlowControlCount; // Number of static flow control instructions used + UINT DynamicFlowControlCount; // Number of dynamic flow control instructions used + UINT MacroInstructionCount; // Number of macro instructions used + UINT ArrayInstructionCount; // Number of array instructions used + UINT MovInstructionCount; // Number of mov instructions used + UINT MovcInstructionCount; // Number of movc instructions used + UINT ConversionInstructionCount; // Number of type conversion instructions used + UINT BitwiseInstructionCount; // Number of bitwise arithmetic instructions used + D3D_FEATURE_LEVEL MinFeatureLevel; // Min target of the function byte code + UINT64 RequiredFeatureFlags; // Required feature flags + + LPCSTR Name; // Function name + INT FunctionParameterCount; // Number of logical parameters in the function signature (not including return) + BOOL HasReturn; // TRUE, if function returns a value, false - it is a subroutine + BOOL Has10Level9VertexShader; // TRUE, if there is a 10L9 VS blob + BOOL Has10Level9PixelShader; // TRUE, if there is a 10L9 PS blob +} D3D12_FUNCTION_DESC; + +typedef struct _D3D12_PARAMETER_DESC +{ + LPCSTR Name; // Parameter name. + LPCSTR SemanticName; // Parameter semantic name (+index). + D3D_SHADER_VARIABLE_TYPE Type; // Element type. + D3D_SHADER_VARIABLE_CLASS Class; // Scalar/Vector/Matrix. + UINT Rows; // Rows are for matrix parameters. + UINT Columns; // Components or Columns in matrix. + D3D_INTERPOLATION_MODE InterpolationMode; // Interpolation mode. + D3D_PARAMETER_FLAGS Flags; // Parameter modifiers. + + UINT FirstInRegister; // The first input register for this parameter. + UINT FirstInComponent; // The first input register component for this parameter. + UINT FirstOutRegister; // The first output register for this parameter. + UINT FirstOutComponent; // The first output register component for this parameter. +} D3D12_PARAMETER_DESC; + + +////////////////////////////////////////////////////////////////////////////// +// Interfaces //////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +typedef interface ID3D12ShaderReflectionType ID3D12ShaderReflectionType; +typedef interface ID3D12ShaderReflectionType *LPD3D12SHADERREFLECTIONTYPE; + +typedef interface ID3D12ShaderReflectionVariable ID3D12ShaderReflectionVariable; +typedef interface ID3D12ShaderReflectionVariable *LPD3D12SHADERREFLECTIONVARIABLE; + +typedef interface ID3D12ShaderReflectionConstantBuffer ID3D12ShaderReflectionConstantBuffer; +typedef interface ID3D12ShaderReflectionConstantBuffer *LPD3D12SHADERREFLECTIONCONSTANTBUFFER; + +typedef interface ID3D12ShaderReflection ID3D12ShaderReflection; +typedef interface ID3D12ShaderReflection *LPD3D12SHADERREFLECTION; + +typedef interface ID3D12LibraryReflection ID3D12LibraryReflection; +typedef interface ID3D12LibraryReflection *LPD3D12LIBRARYREFLECTION; + +typedef interface ID3D12FunctionReflection ID3D12FunctionReflection; +typedef interface ID3D12FunctionReflection *LPD3D12FUNCTIONREFLECTION; + +typedef interface ID3D12FunctionParameterReflection ID3D12FunctionParameterReflection; +typedef interface ID3D12FunctionParameterReflection *LPD3D12FUNCTIONPARAMETERREFLECTION; + + +// {E913C351-783D-48CA-A1D1-4F306284AD56} +interface DECLSPEC_UUID("E913C351-783D-48CA-A1D1-4F306284AD56") ID3D12ShaderReflectionType; +DEFINE_GUID(IID_ID3D12ShaderReflectionType, +0xe913c351, 0x783d, 0x48ca, 0xa1, 0xd1, 0x4f, 0x30, 0x62, 0x84, 0xad, 0x56); + +#undef INTERFACE +#define INTERFACE ID3D12ShaderReflectionType + +DECLARE_INTERFACE(ID3D12ShaderReflectionType) +{ + STDMETHOD(GetDesc)(THIS_ _Out_ D3D12_SHADER_TYPE_DESC *pDesc) PURE; + + STDMETHOD_(ID3D12ShaderReflectionType*, GetMemberTypeByIndex)(THIS_ _In_ UINT Index) PURE; + STDMETHOD_(ID3D12ShaderReflectionType*, GetMemberTypeByName)(THIS_ _In_ LPCSTR Name) PURE; + STDMETHOD_(LPCSTR, GetMemberTypeName)(THIS_ _In_ UINT Index) PURE; + + STDMETHOD(IsEqual)(THIS_ _In_ ID3D12ShaderReflectionType* pType) PURE; + STDMETHOD_(ID3D12ShaderReflectionType*, GetSubType)(THIS) PURE; + STDMETHOD_(ID3D12ShaderReflectionType*, GetBaseClass)(THIS) PURE; + STDMETHOD_(UINT, GetNumInterfaces)(THIS) PURE; + STDMETHOD_(ID3D12ShaderReflectionType*, GetInterfaceByIndex)(THIS_ _In_ UINT uIndex) PURE; + STDMETHOD(IsOfType)(THIS_ _In_ ID3D12ShaderReflectionType* pType) PURE; + STDMETHOD(ImplementsInterface)(THIS_ _In_ ID3D12ShaderReflectionType* pBase) PURE; +}; + +// {8337A8A6-A216-444A-B2F4-314733A73AEA} +interface DECLSPEC_UUID("8337A8A6-A216-444A-B2F4-314733A73AEA") ID3D12ShaderReflectionVariable; +DEFINE_GUID(IID_ID3D12ShaderReflectionVariable, +0x8337a8a6, 0xa216, 0x444a, 0xb2, 0xf4, 0x31, 0x47, 0x33, 0xa7, 0x3a, 0xea); + +#undef INTERFACE +#define INTERFACE ID3D12ShaderReflectionVariable + +DECLARE_INTERFACE(ID3D12ShaderReflectionVariable) +{ + STDMETHOD(GetDesc)(THIS_ _Out_ D3D12_SHADER_VARIABLE_DESC *pDesc) PURE; + + STDMETHOD_(ID3D12ShaderReflectionType*, GetType)(THIS) PURE; + STDMETHOD_(ID3D12ShaderReflectionConstantBuffer*, GetBuffer)(THIS) PURE; + + STDMETHOD_(UINT, GetInterfaceSlot)(THIS_ _In_ UINT uArrayIndex) PURE; +}; + +// {C59598B4-48B3-4869-B9B1-B1618B14A8B7} +interface DECLSPEC_UUID("C59598B4-48B3-4869-B9B1-B1618B14A8B7") ID3D12ShaderReflectionConstantBuffer; +DEFINE_GUID(IID_ID3D12ShaderReflectionConstantBuffer, +0xc59598b4, 0x48b3, 0x4869, 0xb9, 0xb1, 0xb1, 0x61, 0x8b, 0x14, 0xa8, 0xb7); + +#undef INTERFACE +#define INTERFACE ID3D12ShaderReflectionConstantBuffer + +DECLARE_INTERFACE(ID3D12ShaderReflectionConstantBuffer) +{ + STDMETHOD(GetDesc)(THIS_ D3D12_SHADER_BUFFER_DESC *pDesc) PURE; + + STDMETHOD_(ID3D12ShaderReflectionVariable*, GetVariableByIndex)(THIS_ _In_ UINT Index) PURE; + STDMETHOD_(ID3D12ShaderReflectionVariable*, GetVariableByName)(THIS_ _In_ LPCSTR Name) PURE; +}; + +// The ID3D12ShaderReflection IID may change from SDK version to SDK version +// if the reflection API changes. This prevents new code with the new API +// from working with an old binary. Recompiling with the new header +// will pick up the new IID. + +// {5A58797D-A72C-478D-8BA2-EFC6B0EFE88E} +interface DECLSPEC_UUID("5A58797D-A72C-478D-8BA2-EFC6B0EFE88E") ID3D12ShaderReflection; +DEFINE_GUID(IID_ID3D12ShaderReflection, +0x5a58797d, 0xa72c, 0x478d, 0x8b, 0xa2, 0xef, 0xc6, 0xb0, 0xef, 0xe8, 0x8e); + +#undef INTERFACE +#define INTERFACE ID3D12ShaderReflection + +DECLARE_INTERFACE_(ID3D12ShaderReflection, IUnknown) +{ + STDMETHOD(QueryInterface)(THIS_ _In_ REFIID iid, + _Out_ LPVOID *ppv) PURE; + STDMETHOD_(ULONG, AddRef)(THIS) PURE; + STDMETHOD_(ULONG, Release)(THIS) PURE; + + STDMETHOD(GetDesc)(THIS_ _Out_ D3D12_SHADER_DESC *pDesc) PURE; + + STDMETHOD_(ID3D12ShaderReflectionConstantBuffer*, GetConstantBufferByIndex)(THIS_ _In_ UINT Index) PURE; + STDMETHOD_(ID3D12ShaderReflectionConstantBuffer*, GetConstantBufferByName)(THIS_ _In_ LPCSTR Name) PURE; + + STDMETHOD(GetResourceBindingDesc)(THIS_ _In_ UINT ResourceIndex, + _Out_ D3D12_SHADER_INPUT_BIND_DESC *pDesc) PURE; + + STDMETHOD(GetInputParameterDesc)(THIS_ _In_ UINT ParameterIndex, + _Out_ D3D12_SIGNATURE_PARAMETER_DESC *pDesc) PURE; + STDMETHOD(GetOutputParameterDesc)(THIS_ _In_ UINT ParameterIndex, + _Out_ D3D12_SIGNATURE_PARAMETER_DESC *pDesc) PURE; + STDMETHOD(GetPatchConstantParameterDesc)(THIS_ _In_ UINT ParameterIndex, + _Out_ D3D12_SIGNATURE_PARAMETER_DESC *pDesc) PURE; + + STDMETHOD_(ID3D12ShaderReflectionVariable*, GetVariableByName)(THIS_ _In_ LPCSTR Name) PURE; + + STDMETHOD(GetResourceBindingDescByName)(THIS_ _In_ LPCSTR Name, + _Out_ D3D12_SHADER_INPUT_BIND_DESC *pDesc) PURE; + + STDMETHOD_(UINT, GetMovInstructionCount)(THIS) PURE; + STDMETHOD_(UINT, GetMovcInstructionCount)(THIS) PURE; + STDMETHOD_(UINT, GetConversionInstructionCount)(THIS) PURE; + STDMETHOD_(UINT, GetBitwiseInstructionCount)(THIS) PURE; + + STDMETHOD_(D3D_PRIMITIVE, GetGSInputPrimitive)(THIS) PURE; + STDMETHOD_(BOOL, IsSampleFrequencyShader)(THIS) PURE; + + STDMETHOD_(UINT, GetNumInterfaceSlots)(THIS) PURE; + STDMETHOD(GetMinFeatureLevel)(THIS_ _Out_ enum D3D_FEATURE_LEVEL* pLevel) PURE; + + STDMETHOD_(UINT, GetThreadGroupSize)(THIS_ + _Out_opt_ UINT* pSizeX, + _Out_opt_ UINT* pSizeY, + _Out_opt_ UINT* pSizeZ) PURE; + + STDMETHOD_(UINT64, GetRequiresFlags)(THIS) PURE; +}; + +// {8E349D19-54DB-4A56-9DC9-119D87BDB804} +interface DECLSPEC_UUID("8E349D19-54DB-4A56-9DC9-119D87BDB804") ID3D12LibraryReflection; +DEFINE_GUID(IID_ID3D12LibraryReflection, +0x8e349d19, 0x54db, 0x4a56, 0x9d, 0xc9, 0x11, 0x9d, 0x87, 0xbd, 0xb8, 0x4); + +#undef INTERFACE +#define INTERFACE ID3D12LibraryReflection + +DECLARE_INTERFACE_(ID3D12LibraryReflection, IUnknown) +{ + STDMETHOD(QueryInterface)(THIS_ _In_ REFIID iid, _Out_ LPVOID * ppv) PURE; + STDMETHOD_(ULONG, AddRef)(THIS) PURE; + STDMETHOD_(ULONG, Release)(THIS) PURE; + + STDMETHOD(GetDesc)(THIS_ _Out_ D3D12_LIBRARY_DESC * pDesc) PURE; + + STDMETHOD_(ID3D12FunctionReflection *, GetFunctionByIndex)(THIS_ _In_ INT FunctionIndex) PURE; +}; + +// {1108795C-2772-4BA9-B2A8-D464DC7E2799} +interface DECLSPEC_UUID("1108795C-2772-4BA9-B2A8-D464DC7E2799") ID3D12FunctionReflection; +DEFINE_GUID(IID_ID3D12FunctionReflection, +0x1108795c, 0x2772, 0x4ba9, 0xb2, 0xa8, 0xd4, 0x64, 0xdc, 0x7e, 0x27, 0x99); + +#undef INTERFACE +#define INTERFACE ID3D12FunctionReflection + +DECLARE_INTERFACE(ID3D12FunctionReflection) +{ + STDMETHOD(GetDesc)(THIS_ _Out_ D3D12_FUNCTION_DESC * pDesc) PURE; + + STDMETHOD_(ID3D12ShaderReflectionConstantBuffer *, GetConstantBufferByIndex)(THIS_ _In_ UINT BufferIndex) PURE; + STDMETHOD_(ID3D12ShaderReflectionConstantBuffer *, GetConstantBufferByName)(THIS_ _In_ LPCSTR Name) PURE; + + STDMETHOD(GetResourceBindingDesc)(THIS_ _In_ UINT ResourceIndex, + _Out_ D3D12_SHADER_INPUT_BIND_DESC * pDesc) PURE; + + STDMETHOD_(ID3D12ShaderReflectionVariable *, GetVariableByName)(THIS_ _In_ LPCSTR Name) PURE; + + STDMETHOD(GetResourceBindingDescByName)(THIS_ _In_ LPCSTR Name, + _Out_ D3D12_SHADER_INPUT_BIND_DESC * pDesc) PURE; + + // Use D3D_RETURN_PARAMETER_INDEX to get description of the return value. + STDMETHOD_(ID3D12FunctionParameterReflection *, GetFunctionParameter)(THIS_ _In_ INT ParameterIndex) PURE; +}; + +// {EC25F42D-7006-4F2B-B33E-02CC3375733F} +interface DECLSPEC_UUID("EC25F42D-7006-4F2B-B33E-02CC3375733F") ID3D12FunctionParameterReflection; +DEFINE_GUID(IID_ID3D12FunctionParameterReflection, +0xec25f42d, 0x7006, 0x4f2b, 0xb3, 0x3e, 0x2, 0xcc, 0x33, 0x75, 0x73, 0x3f); + +#undef INTERFACE +#define INTERFACE ID3D12FunctionParameterReflection + +DECLARE_INTERFACE(ID3D12FunctionParameterReflection) +{ + STDMETHOD(GetDesc)(THIS_ _Out_ D3D12_PARAMETER_DESC * pDesc) PURE; +}; + + +////////////////////////////////////////////////////////////////////////////// +// APIs ////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +#ifdef __cplusplus +extern "C" { +#endif //__cplusplus + +#ifdef __cplusplus +} +#endif //__cplusplus + +#endif //__D3D12SHADER_H__ + diff --git a/src/runtime/dxcapi.h b/src/runtime/dxcapi.h new file mode 100644 index 000000000000..d5e235195b21 --- /dev/null +++ b/src/runtime/dxcapi.h @@ -0,0 +1,1309 @@ + +/////////////////////////////////////////////////////////////////////////////// +// // +// dxcapi.h // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Provides declarations for the DirectX Compiler API entry point. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#ifndef __DXC_API__ +#define __DXC_API__ + +#ifdef _WIN32 +#ifndef DXC_API_IMPORT +#define DXC_API_IMPORT __declspec(dllimport) +#endif +#else +#ifndef DXC_API_IMPORT +#define DXC_API_IMPORT __attribute__((visibility("default"))) +#endif +#endif + +#ifdef _WIN32 + +#ifndef CROSS_PLATFORM_UUIDOF +// Warning: This macro exists in WinAdapter.h as well +#define CROSS_PLATFORM_UUIDOF(interface, spec) \ + struct __declspec(uuid(spec)) interface; +#endif + +#else + +#include "WinAdapter.h" +#include +#endif + +struct IMalloc; + +struct IDxcIncludeHandler; + +/// \brief Typedef for DxcCreateInstance function pointer. +/// +/// This can be used with GetProcAddress to get the DxcCreateInstance function. +typedef HRESULT(__stdcall *DxcCreateInstanceProc)(_In_ REFCLSID rclsid, + _In_ REFIID riid, + _Out_ LPVOID *ppv); + +/// \brief Typedef for DxcCreateInstance2 function pointer. +/// +/// This can be used with GetProcAddress to get the DxcCreateInstance2 function. +typedef HRESULT(__stdcall *DxcCreateInstance2Proc)(_In_ IMalloc *pMalloc, + _In_ REFCLSID rclsid, + _In_ REFIID riid, + _Out_ LPVOID *ppv); + +/// \brief Creates a single uninitialized object of the class associated with a +/// specified CLSID. +/// +/// \param rclsid The CLSID associated with the data and code that will be used +/// to create the object. +/// +/// \param riid A reference to the identifier of the interface to be used to +/// communicate with the object. +/// +/// \param ppv Address of pointer variable that receives the interface pointer +/// requested in riid. Upon successful return, *ppv contains the requested +/// interface pointer. Upon failure, *ppv contains NULL. +/// +/// While this function is similar to CoCreateInstance, there is no COM +/// involvement. +extern "C" DXC_API_IMPORT + HRESULT __stdcall DxcCreateInstance(_In_ REFCLSID rclsid, _In_ REFIID riid, + _Out_ LPVOID *ppv); + +/// \brief Version of DxcCreateInstance that takes an IMalloc interface. +/// +/// This can be used to create an instance of the compiler with a custom memory +/// allocator. +extern "C" DXC_API_IMPORT + HRESULT __stdcall DxcCreateInstance2(_In_ IMalloc *pMalloc, + _In_ REFCLSID rclsid, _In_ REFIID riid, + _Out_ LPVOID *ppv); + +// For convenience, equivalent definitions to CP_UTF8 and CP_UTF16. +#define DXC_CP_UTF8 65001 +#define DXC_CP_UTF16 1200 +#define DXC_CP_UTF32 12000 +// Use DXC_CP_ACP for: Binary; ANSI Text; Autodetect UTF with BOM +#define DXC_CP_ACP 0 + +/// Codepage for "wide" characters - UTF16 on Windows, UTF32 on other platforms. +#ifdef _WIN32 +#define DXC_CP_WIDE DXC_CP_UTF16 +#else +#define DXC_CP_WIDE DXC_CP_UTF32 +#endif + +/// Indicates that the shader hash was computed taking into account source +/// information (-Zss). +#define DXC_HASHFLAG_INCLUDES_SOURCE 1 + +/// Hash digest type for ShaderHash. +typedef struct DxcShaderHash { + UINT32 Flags; ///< DXC_HASHFLAG_* + BYTE HashDigest[16]; ///< The hash digest +} DxcShaderHash; + +#define DXC_FOURCC(ch0, ch1, ch2, ch3) \ + ((UINT32)(UINT8)(ch0) | (UINT32)(UINT8)(ch1) << 8 | \ + (UINT32)(UINT8)(ch2) << 16 | (UINT32)(UINT8)(ch3) << 24) +#define DXC_PART_PDB DXC_FOURCC('I', 'L', 'D', 'B') +#define DXC_PART_PDB_NAME DXC_FOURCC('I', 'L', 'D', 'N') +#define DXC_PART_PRIVATE_DATA DXC_FOURCC('P', 'R', 'I', 'V') +#define DXC_PART_ROOT_SIGNATURE DXC_FOURCC('R', 'T', 'S', '0') +#define DXC_PART_DXIL DXC_FOURCC('D', 'X', 'I', 'L') +#define DXC_PART_REFLECTION_DATA DXC_FOURCC('S', 'T', 'A', 'T') +#define DXC_PART_SHADER_HASH DXC_FOURCC('H', 'A', 'S', 'H') +#define DXC_PART_INPUT_SIGNATURE DXC_FOURCC('I', 'S', 'G', '1') +#define DXC_PART_OUTPUT_SIGNATURE DXC_FOURCC('O', 'S', 'G', '1') +#define DXC_PART_PATCH_CONSTANT_SIGNATURE DXC_FOURCC('P', 'S', 'G', '1') + +// Some option arguments are defined here for continuity with D3DCompile +// interface. +#define DXC_ARG_DEBUG L"-Zi" +#define DXC_ARG_SKIP_VALIDATION L"-Vd" +#define DXC_ARG_SKIP_OPTIMIZATIONS L"-Od" +#define DXC_ARG_PACK_MATRIX_ROW_MAJOR L"-Zpr" +#define DXC_ARG_PACK_MATRIX_COLUMN_MAJOR L"-Zpc" +#define DXC_ARG_AVOID_FLOW_CONTROL L"-Gfa" +#define DXC_ARG_PREFER_FLOW_CONTROL L"-Gfp" +#define DXC_ARG_ENABLE_STRICTNESS L"-Ges" +#define DXC_ARG_ENABLE_BACKWARDS_COMPATIBILITY L"-Gec" +#define DXC_ARG_IEEE_STRICTNESS L"-Gis" +#define DXC_ARG_OPTIMIZATION_LEVEL0 L"-O0" +#define DXC_ARG_OPTIMIZATION_LEVEL1 L"-O1" +#define DXC_ARG_OPTIMIZATION_LEVEL2 L"-O2" +#define DXC_ARG_OPTIMIZATION_LEVEL3 L"-O3" +#define DXC_ARG_WARNINGS_ARE_ERRORS L"-WX" +#define DXC_ARG_RESOURCES_MAY_ALIAS L"-res_may_alias" +#define DXC_ARG_ALL_RESOURCES_BOUND L"-all_resources_bound" +#define DXC_ARG_DEBUG_NAME_FOR_SOURCE L"-Zss" +#define DXC_ARG_DEBUG_NAME_FOR_BINARY L"-Zsb" + +CROSS_PLATFORM_UUIDOF(IDxcBlob, "8BA5FB08-5195-40e2-AC58-0D989C3A0102") +/// \brief A sized buffer that can be passed in and out of DXC APIs. +/// +/// This is an alias of ID3D10Blob and ID3DBlob. +struct IDxcBlob : public IUnknown { +public: + /// \brief Retrieves a pointer to the blob's data. + virtual LPVOID STDMETHODCALLTYPE GetBufferPointer(void) = 0; + + /// \brief Retrieves the size, in bytes, of the blob's data. + virtual SIZE_T STDMETHODCALLTYPE GetBufferSize(void) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcBlobEncoding, "7241d424-2646-4191-97c0-98e96e42fc68") +/// \brief A blob that might have a known encoding. +struct IDxcBlobEncoding : public IDxcBlob { +public: + /// \brief Retrieve the encoding for this blob. + /// + /// \param pKnown Pointer to a variable that will be set to TRUE if the + /// encoding is known. + /// + /// \param pCodePage Pointer to variable that will be set to the encoding used + /// for this blog. + /// + /// If the encoding is not known then pCodePage will be set to CP_ACP. + virtual HRESULT STDMETHODCALLTYPE GetEncoding(_Out_ BOOL *pKnown, + _Out_ UINT32 *pCodePage) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcBlobWide, "A3F84EAB-0FAA-497E-A39C-EE6ED60B2D84") +/// \brief A blob containing a null-terminated wide string. +/// +/// This uses the native wide character encoding (utf16 on Windows, utf32 on +/// Linux). +/// +/// The value returned by GetBufferSize() is the size of the buffer, in bytes, +/// including the null-terminator. +/// +/// This interface is used to return output name strings DXC. Other string +/// output blobs, such as errors/warnings, preprocessed HLSL, or other text are +/// returned using encodings based on the -encoding option passed to the +/// compiler. +struct IDxcBlobWide : public IDxcBlobEncoding { +public: + /// \brief Retrieves a pointer to the string stored in this blob. + virtual LPCWSTR STDMETHODCALLTYPE GetStringPointer(void) = 0; + + /// \brief Retrieves the length of the string stored in this blob, in + /// characters, excluding the null-terminator. + virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcBlobUtf8, "3DA636C9-BA71-4024-A301-30CBF125305B") +/// \brief A blob containing a UTF-8 encoded string. +/// +/// The value returned by GetBufferSize() is the size of the buffer, in bytes, +/// including the null-terminator. +/// +/// Depending on the -encoding option passed to the compiler, this interface is +/// used to return string output blobs, such as errors/warnings, preprocessed +/// HLSL, or other text. Output name strings always use IDxcBlobWide. +struct IDxcBlobUtf8 : public IDxcBlobEncoding { +public: + /// \brief Retrieves a pointer to the string stored in this blob. + virtual LPCSTR STDMETHODCALLTYPE GetStringPointer(void) = 0; + + /// \brief Retrieves the length of the string stored in this blob, in + /// characters, excluding the null-terminator. + virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0; +}; + +#ifdef _WIN32 +/// IDxcBlobUtf16 is a legacy alias for IDxcBlobWide on Win32. +typedef IDxcBlobWide IDxcBlobUtf16; +#endif + +CROSS_PLATFORM_UUIDOF(IDxcIncludeHandler, + "7f61fc7d-950d-467f-b3e3-3c02fb49187c") +/// \brief Interface for handling include directives. +/// +/// This interface can be implemented to customize handling of include +/// directives. +/// +/// Use IDxcUtils::CreateDefaultIncludeHandler to create a default +/// implementation that reads include files from the filesystem. +/// +struct IDxcIncludeHandler : public IUnknown { + /// \brief Load a source file to be included by the compiler. + /// + /// \param pFilename Candidate filename. + /// + /// \param ppIncludeSource Resultant source object for included file, nullptr + /// if not found. + virtual HRESULT STDMETHODCALLTYPE + LoadSource(_In_z_ LPCWSTR pFilename, + _COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource) = 0; +}; + +/// \brief Structure for supplying bytes or text input to Dxc APIs. +typedef struct DxcBuffer { + /// \brief Pointer to the start of the buffer. + LPCVOID Ptr; + + /// \brief Size of the buffer in bytes. + SIZE_T Size; + + /// \brief Encoding of the buffer. + /// + /// Use Encoding = 0 for non-text bytes, ANSI text, or unknown with BOM. + UINT Encoding; +} DxcText; + +/// \brief Structure for supplying defines to Dxc APIs. +struct DxcDefine { + LPCWSTR Name; ///< The define name. + _Maybenull_ LPCWSTR Value; ///< Optional value for the define. +}; + +CROSS_PLATFORM_UUIDOF(IDxcCompilerArgs, "73EFFE2A-70DC-45F8-9690-EFF64C02429D") +/// \brief Interface for managing arguments passed to DXC. +/// +/// Use IDxcUtils::BuildArguments to create an instance of this interface. +struct IDxcCompilerArgs : public IUnknown { + /// \brief Retrieve the array of arguments. + /// + /// This can be passed directly to the pArguments parameter of the Compile() + /// method. + virtual LPCWSTR *STDMETHODCALLTYPE GetArguments() = 0; + + /// \brief Retrieve the number of arguments. + /// + /// This can be passed directly to the argCount parameter of the Compile() + /// method. + virtual UINT32 STDMETHODCALLTYPE GetCount() = 0; + + /// \brief Add additional arguments to this list of compiler arguments. + virtual HRESULT STDMETHODCALLTYPE AddArguments( + _In_opt_count_(argCount) + LPCWSTR *pArguments, ///< Array of pointers to arguments to add. + _In_ UINT32 argCount ///< Number of arguments to add. + ) = 0; + + /// \brief Add additional UTF-8 encoded arguments to this list of compiler + /// arguments. + virtual HRESULT STDMETHODCALLTYPE AddArgumentsUTF8( + _In_opt_count_(argCount) + LPCSTR *pArguments, ///< Array of pointers to UTF-8 arguments to add. + _In_ UINT32 argCount ///< Number of arguments to add. + ) = 0; + + /// \brief Add additional defines to this list of compiler arguments. + virtual HRESULT STDMETHODCALLTYPE AddDefines( + _In_count_(defineCount) const DxcDefine *pDefines, ///< Array of defines. + _In_ UINT32 defineCount ///< Number of defines. + ) = 0; +}; + +////////////////////////// +// Legacy Interfaces +///////////////////////// + +CROSS_PLATFORM_UUIDOF(IDxcLibrary, "e5204dc7-d18c-4c3c-bdfb-851673980fe7") +/// \deprecated IDxcUtils replaces IDxcLibrary; please use IDxcUtils insted. +struct IDxcLibrary : public IUnknown { + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE SetMalloc(_In_opt_ IMalloc *pMalloc) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE + CreateBlobFromBlob(_In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, + _COM_Outptr_ IDxcBlob **ppResult) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE + CreateBlobFromFile(_In_z_ LPCWSTR pFileName, _In_opt_ UINT32 *codePage, + _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingFromPinned( + _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage, + _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnHeapCopy( + _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage, + _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnMalloc( + _In_bytecount_(size) LPCVOID pText, IMalloc *pIMalloc, UINT32 size, + UINT32 codePage, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE + CreateIncludeHandler(_COM_Outptr_ IDxcIncludeHandler **ppResult) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE CreateStreamFromBlobReadOnly( + _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0; + + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8( + _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; + + // Renamed from GetBlobAsUtf16 to GetBlobAsWide + /// \deprecated + virtual HRESULT STDMETHODCALLTYPE GetBlobAsWide( + _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; + +#ifdef _WIN32 + // Alias to GetBlobAsWide on Win32 + /// \deprecated + inline HRESULT GetBlobAsUtf16(_In_ IDxcBlob *pBlob, + _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) { + return this->GetBlobAsWide(pBlob, pBlobEncoding); + } +#endif +}; + +CROSS_PLATFORM_UUIDOF(IDxcOperationResult, + "CEDB484A-D4E9-445A-B991-CA21CA157DC2") +/// \brief The results of a DXC operation. +/// +/// Note: IDxcResult replaces IDxcOperationResult and should be used wherever +/// possible. +struct IDxcOperationResult : public IUnknown { + /// \brief Retrieve the overall status of the operation. + virtual HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) = 0; + + /// \brief Retrieve the primary output of the operation. + /// + /// This corresponds to: + /// * DXC_OUT_OBJECT - Compile() with shader or library target + /// * DXC_OUT_DISASSEMBLY - Disassemble() + /// * DXC_OUT_HLSL - Compile() with -P + /// * DXC_OUT_ROOT_SIGNATURE - Compile() with rootsig_* target + virtual HRESULT STDMETHODCALLTYPE + GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) = 0; + + /// \brief Retrieves the error buffer from the operation, if there is one. + /// + // This corresponds to calling IDxcResult::GetOutput() with DXC_OUT_ERRORS. + virtual HRESULT STDMETHODCALLTYPE + GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcCompiler, "8c210bf3-011f-4422-8d70-6f9acb8db617") +/// \deprecated Please use IDxcCompiler3 instead. +struct IDxcCompiler : public IUnknown { + /// \brief Compile a single entry point to the target shader model. + /// + /// \deprecated Please use IDxcCompiler3::Compile() instead. + virtual HRESULT STDMETHODCALLTYPE Compile( + _In_ IDxcBlob *pSource, // Source text to compile. + _In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in + // errors and include handlers. + _In_opt_z_ LPCWSTR pEntryPoint, // Entry point name. + _In_z_ LPCWSTR pTargetProfile, // Shader profile to compile. + _In_opt_count_(argCount) + LPCWSTR *pArguments, // Array of pointers to arguments. + _In_ UINT32 argCount, // Number of arguments. + _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines. + _In_ UINT32 defineCount, // Number of defines. + _In_opt_ IDxcIncludeHandler + *pIncludeHandler, // User-provided interface to handle #include + // directives (optional). + _COM_Outptr_ IDxcOperationResult * + *ppResult // Compiler output status, buffer, and errors. + ) = 0; + + /// \brief Preprocess source text. + /// + /// \deprecated Please use IDxcCompiler3::Compile() with the "-P" argument + /// instead. + virtual HRESULT STDMETHODCALLTYPE Preprocess( + _In_ IDxcBlob *pSource, // Source text to preprocess. + _In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in + // errors and include handlers. + _In_opt_count_(argCount) + LPCWSTR *pArguments, // Array of pointers to arguments. + _In_ UINT32 argCount, // Number of arguments. + _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines. + _In_ UINT32 defineCount, // Number of defines. + _In_opt_ IDxcIncludeHandler + *pIncludeHandler, // user-provided interface to handle #include + // directives (optional). + _COM_Outptr_ IDxcOperationResult * + *ppResult // Preprocessor output status, buffer, and errors. + ) = 0; + + /// \brief Disassemble a program. + /// + /// \deprecated Please use IDxcCompiler3::Disassemble() instead. + virtual HRESULT STDMETHODCALLTYPE Disassemble( + _In_ IDxcBlob *pSource, // Program to disassemble. + _COM_Outptr_ IDxcBlobEncoding **ppDisassembly // Disassembly text. + ) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcCompiler2, "A005A9D9-B8BB-4594-B5C9-0E633BEC4D37") +/// \deprecated Please use IDxcCompiler3 instead. +struct IDxcCompiler2 : public IDxcCompiler { + /// \brief Compile a single entry point to the target shader model with debug + /// information. + /// + /// \deprecated Please use IDxcCompiler3::Compile() instead. + virtual HRESULT STDMETHODCALLTYPE CompileWithDebug( + _In_ IDxcBlob *pSource, // Source text to compile. + _In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in + // errors and include handlers. + _In_opt_z_ LPCWSTR pEntryPoint, // Entry point name. + _In_z_ LPCWSTR pTargetProfile, // Shader profile to compile. + _In_opt_count_(argCount) + LPCWSTR *pArguments, // Array of pointers to arguments. + _In_ UINT32 argCount, // Number of arguments. + _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines. + _In_ UINT32 defineCount, // Number of defines. + _In_opt_ IDxcIncludeHandler + *pIncludeHandler, // user-provided interface to handle #include + // directives (optional). + _COM_Outptr_ IDxcOperationResult * + *ppResult, // Compiler output status, buffer, and errors. + _Outptr_opt_result_z_ LPWSTR + *ppDebugBlobName, // Suggested file name for debug blob. Must be + // CoTaskMemFree()'d. + _COM_Outptr_opt_ IDxcBlob **ppDebugBlob // Debug blob. + ) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcLinker, "F1B5BE2A-62DD-4327-A1C2-42AC1E1E78E6") +/// \brief DXC linker interface. +/// +/// Use DxcCreateInstance with CLSID_DxcLinker to obtain an instance of this +/// interface. +struct IDxcLinker : public IUnknown { +public: + /// \brief Register a library with name to reference it later. + virtual HRESULT + RegisterLibrary(_In_opt_ LPCWSTR pLibName, ///< Name of the library. + _In_ IDxcBlob *pLib ///< Library blob. + ) = 0; + + /// \brief Links the shader and produces a shader blob that the Direct3D + /// runtime can use. + virtual HRESULT STDMETHODCALLTYPE Link( + _In_opt_ LPCWSTR pEntryName, ///< Entry point name. + _In_ LPCWSTR pTargetProfile, ///< shader profile to link. + _In_count_(libCount) + const LPCWSTR *pLibNames, ///< Array of library names to link. + _In_ UINT32 libCount, ///< Number of libraries to link. + _In_opt_count_(argCount) + const LPCWSTR *pArguments, ///< Array of pointers to arguments. + _In_ UINT32 argCount, ///< Number of arguments. + _COM_Outptr_ IDxcOperationResult * + *ppResult ///< Linker output status, buffer, and errors. + ) = 0; +}; + +///////////////////////// +// Latest interfaces. Please use these. +//////////////////////// + +CROSS_PLATFORM_UUIDOF(IDxcUtils, "4605C4CB-2019-492A-ADA4-65F20BB7D67F") +/// \brief Various utility functions for DXC. +/// +/// Use DxcCreateInstance with CLSID_DxcUtils to obtain an instance of this +/// interface. +/// +/// IDxcUtils replaces IDxcLibrary. +struct IDxcUtils : public IUnknown { + /// \brief Create a sub-blob that holds a reference to the outer blob and + /// points to its memory. + /// + /// \param pBlob The outer blob. + /// + /// \param offset The offset inside the outer blob. + /// + /// \param length The size, in bytes, of the buffer to reference from the + /// output blob. + /// + /// \param ppResult Address of the pointer that receives a pointer to the + /// newly created blob. + virtual HRESULT STDMETHODCALLTYPE + CreateBlobFromBlob(_In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, + _COM_Outptr_ IDxcBlob **ppResult) = 0; + + // For codePage, use 0 (or DXC_CP_ACP) for raw binary or ANSI code page. + + /// \brief Create a blob referencing existing memory, with no copy. + /// + /// \param pData Pointer to buffer containing the contents of the new blob. + /// + /// \param size The size of the pData buffer, in bytes. + /// + /// \param codePage The code page to use if the blob contains text. Use + /// DXC_CP_ACP for binary or ANSI code page. + /// + /// \param ppBlobEncoding Address of the pointer that receives a pointer to + /// the newly created blob. + /// + /// The user must manage the memory lifetime separately. + /// + /// This replaces IDxcLibrary::CreateBlobWithEncodingFromPinned. + virtual HRESULT STDMETHODCALLTYPE CreateBlobFromPinned( + _In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage, + _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) = 0; + + /// \brief Create a blob, taking ownership of memory allocated with the + /// supplied allocator. + /// + /// \param pData Pointer to buffer containing the contents of the new blob. + /// + /// \param pIMalloc The memory allocator to use. + /// + /// \param size The size of thee pData buffer, in bytes. + /// + /// \param codePage The code page to use if the blob contains text. Use + /// DXC_CP_ACP for binary or ANSI code page. + /// + /// \param ppBlobEncoding Address of the pointer that receives a pointer to + /// the newly created blob. + /// + /// This replaces IDxcLibrary::CreateBlobWithEncodingOnMalloc. + virtual HRESULT STDMETHODCALLTYPE MoveToBlob( + _In_bytecount_(size) LPCVOID pData, IMalloc *pIMalloc, UINT32 size, + UINT32 codePage, _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) = 0; + + /// \brief Create a blob containing a copy of the existing data. + /// + /// \param pData Pointer to buffer containing the contents of the new blob. + /// + /// \param size The size of thee pData buffer, in bytes. + /// + /// \param codePage The code page to use if the blob contains text. Use + /// DXC_CP_ACP for binary or ANSI code page. + /// + /// \param ppBlobEncoding Address of the pointer that receives a pointer to + /// the newly created blob. + /// + /// The new blob and its contents are allocated with the current allocator. + /// This replaces IDxcLibrary::CreateBlobWithEncodingOnHeapCopy. + virtual HRESULT STDMETHODCALLTYPE + CreateBlob(_In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage, + _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) = 0; + + /// \brief Create a blob with data loaded from a file. + /// + /// \param pFileName The name of the file to load from. + /// + /// \param pCodePage Optional code page to use if the blob contains text. Pass + /// NULL for binary data. + /// + /// \param ppBlobEncoding Address of the pointer that receives a pointer to + /// the newly created blob. + /// + /// The new blob and its contents are allocated with the current allocator. + /// This replaces IDxcLibrary::CreateBlobFromFile. + virtual HRESULT STDMETHODCALLTYPE + LoadFile(_In_z_ LPCWSTR pFileName, _In_opt_ UINT32 *pCodePage, + _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) = 0; + + /// \brief Create a stream that reads data from a blob. + /// + /// \param pBlob The blob to read from. + /// + /// \param ppStream Address of the pointer that receives a pointer to the + /// newly created stream. + virtual HRESULT STDMETHODCALLTYPE CreateReadOnlyStreamFromBlob( + _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0; + + /// \brief Create default file-based include handler. + /// + /// \param ppResult Address of the pointer that receives a pointer to the + /// newly created include handler. + virtual HRESULT STDMETHODCALLTYPE + CreateDefaultIncludeHandler(_COM_Outptr_ IDxcIncludeHandler **ppResult) = 0; + + /// \brief Convert or return matching encoded text blob as UTF-8. + /// + /// \param pBlob The blob to convert. + /// + /// \param ppBlobEncoding Address of the pointer that receives a pointer to + /// the newly created blob. + virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8( + _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf8 **ppBlobEncoding) = 0; + + /// \brief Convert or return matching encoded text blob as UTF-16. + /// + /// \param pBlob The blob to convert. + /// + /// \param ppBlobEncoding Address of the pointer that receives a pointer to + /// the newly created blob. + virtual HRESULT STDMETHODCALLTYPE GetBlobAsWide( + _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobWide **ppBlobEncoding) = 0; + +#ifdef _WIN32 + /// \brief Convert or return matching encoded text blob as UTF-16. + /// + /// \param pBlob The blob to convert. + /// + /// \param ppBlobEncoding Address of the pointer that receives a pointer to + /// the newly created blob. + /// + /// Alias to GetBlobAsWide on Win32. + inline HRESULT GetBlobAsUtf16(_In_ IDxcBlob *pBlob, + _COM_Outptr_ IDxcBlobWide **ppBlobEncoding) { + return this->GetBlobAsWide(pBlob, ppBlobEncoding); + } +#endif + + /// \brief Retrieve a single part from a DXIL container. + /// + /// \param pShader The shader to retrieve the part from. + /// + /// \param DxcPart The part to retrieve (eg DXC_PART_ROOT_SIGNATURE). + /// + /// \param ppPartData Address of the pointer that receives a pointer to the + /// part. + /// + /// \param pPartSizeInBytes Address of the pointer that receives the size of + /// the part. + /// + /// The returned pointer points inside the buffer passed in pShader. + virtual HRESULT STDMETHODCALLTYPE + GetDxilContainerPart(_In_ const DxcBuffer *pShader, _In_ UINT32 DxcPart, + _Outptr_result_nullonfailure_ void **ppPartData, + _Out_ UINT32 *pPartSizeInBytes) = 0; + + /// \brief Create reflection interface from serialized DXIL container or the + /// DXC_OUT_REFLECTION blob contents. + /// + /// \param pData The source data. + /// + /// \param iid The interface ID of the reflection interface to create. + /// + /// \param ppvReflection Address of the pointer that receives a pointer to the + /// newly created reflection interface. + /// + /// Use this with interfaces such as ID3D12ShaderReflection. + virtual HRESULT STDMETHODCALLTYPE CreateReflection( + _In_ const DxcBuffer *pData, REFIID iid, void **ppvReflection) = 0; + + /// \brief Build arguments that can be passed to the Compile method. + virtual HRESULT STDMETHODCALLTYPE BuildArguments( + _In_opt_z_ LPCWSTR pSourceName, ///< Optional file name for pSource. Used + ///< in errors and include handlers. + _In_opt_z_ LPCWSTR pEntryPoint, ///< Entry point name (-E). + _In_z_ LPCWSTR pTargetProfile, ///< Shader profile to compile (-T). + _In_opt_count_(argCount) + LPCWSTR *pArguments, ///< Array of pointers to arguments. + _In_ UINT32 argCount, ///< Number of arguments. + _In_count_(defineCount) const DxcDefine *pDefines, ///< Array of defines. + _In_ UINT32 defineCount, ///< Number of defines. + _COM_Outptr_ IDxcCompilerArgs * + *ppArgs ///< Arguments you can use with Compile() method. + ) = 0; + + /// \brief Retrieve the hash and contents of a shader PDB. + /// + /// \param pPDBBlob The blob containing the PDB. + /// + /// \param ppHash Address of the pointer that receives a pointer to the hash + /// blob. + /// + /// \param ppContainer Address of the pointer that receives a pointer to the + /// bloc containing the contents of the PDB. + /// + virtual HRESULT STDMETHODCALLTYPE + GetPDBContents(_In_ IDxcBlob *pPDBBlob, _COM_Outptr_ IDxcBlob **ppHash, + _COM_Outptr_ IDxcBlob **ppContainer) = 0; +}; + +/// \brief Specifies the kind of output to retrieve from a IDxcResult. +/// +/// Note: text outputs returned from version 2 APIs are UTF-8 or UTF-16 based on +/// the -encoding option passed to the compiler. +typedef enum DXC_OUT_KIND { + DXC_OUT_NONE = 0, ///< No output. + DXC_OUT_OBJECT = 1, ///< IDxcBlob - Shader or library object. + DXC_OUT_ERRORS = 2, ///< IDxcBlobUtf8 or IDxcBlobWide. + DXC_OUT_PDB = 3, ///< IDxcBlob. + DXC_OUT_SHADER_HASH = 4, ///< IDxcBlob - DxcShaderHash of shader or shader + ///< with source info (-Zsb/-Zss). + DXC_OUT_DISASSEMBLY = 5, ///< IDxcBlobUtf8 or IDxcBlobWide - from Disassemble. + DXC_OUT_HLSL = + 6, ///< IDxcBlobUtf8 or IDxcBlobWide - from Preprocessor or Rewriter. + DXC_OUT_TEXT = 7, ///< IDxcBlobUtf8 or IDxcBlobWide - other text, such as + ///< -ast-dump or -Odump. + DXC_OUT_REFLECTION = 8, ///< IDxcBlob - RDAT part with reflection data. + DXC_OUT_ROOT_SIGNATURE = 9, ///< IDxcBlob - Serialized root signature output. + DXC_OUT_EXTRA_OUTPUTS = 10, ///< IDxcExtraOutputs - Extra outputs. + DXC_OUT_REMARKS = + 11, ///< IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout. + DXC_OUT_TIME_REPORT = + 12, ///< IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout. + DXC_OUT_TIME_TRACE = + 13, ///< IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout. + + DXC_OUT_LAST = DXC_OUT_TIME_TRACE, ///< Last value for a counter. + + DXC_OUT_NUM_ENUMS, + DXC_OUT_FORCE_DWORD = 0xFFFFFFFF +} DXC_OUT_KIND; + +static_assert(DXC_OUT_NUM_ENUMS == DXC_OUT_LAST + 1, + "DXC_OUT_* Enum added and last value not updated."); + +CROSS_PLATFORM_UUIDOF(IDxcResult, "58346CDA-DDE7-4497-9461-6F87AF5E0659") +/// \brief Result of a DXC operation. +/// +/// DXC operations may have multiple outputs, such as a shader object and +/// errors. This interface provides access to the outputs. +struct IDxcResult : public IDxcOperationResult { + /// \brief Determines whether or not this result has the specified output. + /// + /// \param dxcOutKind The kind of output to check for. + virtual BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) = 0; + + /// \brief Retrieves the specified output. + /// + /// \param dxcOutKind The kind of output to retrieve. + /// + /// \param iid The interface ID of the output interface. + /// + /// \param ppvObject Address of the pointer that receives a pointer to the + /// output. + /// + /// \param ppOutputName Optional address of a pointer to receive the name + /// blob, if there is one. + virtual HRESULT STDMETHODCALLTYPE + GetOutput(_In_ DXC_OUT_KIND dxcOutKind, _In_ REFIID iid, + _COM_Outptr_opt_result_maybenull_ void **ppvObject, + _COM_Outptr_ IDxcBlobWide **ppOutputName) = 0; + + /// \brief Retrieves the number of outputs available in this result. + virtual UINT32 GetNumOutputs() = 0; + + /// \brief Retrieves the output kind at the specified index. + virtual DXC_OUT_KIND GetOutputByIndex(UINT32 Index) = 0; + + /// \brief Retrieves the primary output kind for this result. + /// + /// See IDxcOperationResult::GetResult() for more information on the primary + /// output kinds. + virtual DXC_OUT_KIND PrimaryOutput() = 0; +}; + +// Special names for extra output that should get written to specific streams. +#define DXC_EXTRA_OUTPUT_NAME_STDOUT L"*stdout*" +#define DXC_EXTRA_OUTPUT_NAME_STDERR L"*stderr*" + +CROSS_PLATFORM_UUIDOF(IDxcExtraOutputs, "319b37a2-a5c2-494a-a5de-4801b2faf989") +/// \brief Additional outputs from a DXC operation. +/// +/// This can be used to obtain outputs that don't have an explicit DXC_OUT_KIND. +/// Use DXC_OUT_EXTRA_OUTPUTS to obtain instances of this. +struct IDxcExtraOutputs : public IUnknown { + /// \brief Retrieves the number of outputs available + virtual UINT32 STDMETHODCALLTYPE GetOutputCount() = 0; + + /// \brief Retrieves the specified output. + /// + /// \param uIndex The index of the output to retrieve. + /// + /// \param iid The interface ID of the output interface. + /// + /// \param ppvObject Optional address of the pointer that receives a pointer + /// to the output if there is one. + /// + /// \param ppOutputType Optional address of the pointer that receives the + /// output type name blob if there is one. + /// + /// \param ppOutputName Optional address of the pointer that receives the + /// output name blob if there is one. + virtual HRESULT STDMETHODCALLTYPE + GetOutput(_In_ UINT32 uIndex, _In_ REFIID iid, + _COM_Outptr_opt_result_maybenull_ void **ppvObject, + _COM_Outptr_opt_result_maybenull_ IDxcBlobWide **ppOutputType, + _COM_Outptr_opt_result_maybenull_ IDxcBlobWide **ppOutputName) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcCompiler3, "228B4687-5A6A-4730-900C-9702B2203F54") +/// \brief Interface to the DirectX Shader Compiler. +/// +/// Use DxcCreateInstance with CLSID_DxcCompiler to obtain an instance of this +/// interface. +struct IDxcCompiler3 : public IUnknown { + /// \brief Compile a shader. + /// + /// IDxcUtils::BuildArguments can be used to assist building the pArguments + /// and argCount parameters. + /// + /// Depending on the arguments, this method can be used to: + /// + /// * Compile a single entry point to the target shader model, + /// * Compile a library to a library target (-T lib_*) + /// * Compile a root signature (-T rootsig_*), + /// * Preprocess HLSL source (-P). + virtual HRESULT STDMETHODCALLTYPE Compile( + _In_ const DxcBuffer *pSource, ///< Source text to compile. + _In_opt_count_(argCount) + LPCWSTR *pArguments, ///< Array of pointers to arguments. + _In_ UINT32 argCount, ///< Number of arguments. + _In_opt_ IDxcIncludeHandler + *pIncludeHandler, ///< user-provided interface to handle include + ///< directives (optional). + _In_ REFIID riid, ///< Interface ID for the result. + _Out_ LPVOID *ppResult ///< IDxcResult: status, buffer, and errors. + ) = 0; + + /// \brief Disassemble a program. + virtual HRESULT STDMETHODCALLTYPE Disassemble( + _In_ const DxcBuffer + *pObject, ///< Program to disassemble: dxil container or bitcode. + _In_ REFIID riid, ///< Interface ID for the result. + _Out_ LPVOID + *ppResult ///< IDxcResult: status, disassembly text, and errors. + ) = 0; +}; + +static const UINT32 DxcValidatorFlags_Default = 0; +static const UINT32 DxcValidatorFlags_InPlaceEdit = + 1; // Validator is allowed to update shader blob in-place. +static const UINT32 DxcValidatorFlags_RootSignatureOnly = 2; +static const UINT32 DxcValidatorFlags_ModuleOnly = 4; +static const UINT32 DxcValidatorFlags_ValidMask = 0x7; + +CROSS_PLATFORM_UUIDOF(IDxcValidator, "A6E82BD2-1FD7-4826-9811-2857E797F49A") +/// \brief Interface to DXC shader validator. +/// +/// Use DxcCreateInstance with CLSID_DxcValidator to obtain an instance of this. +struct IDxcValidator : public IUnknown { + /// \brief Validate a shader. + virtual HRESULT STDMETHODCALLTYPE Validate( + _In_ IDxcBlob *pShader, ///< Shader to validate. + _In_ UINT32 Flags, ///< Validation flags. + _COM_Outptr_ IDxcOperationResult * + *ppResult ///< Validation output status, buffer, and errors. + ) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcValidator2, "458e1fd1-b1b2-4750-a6e1-9c10f03bed92") +/// \brief Interface to DXC shader validator. +/// +/// Use DxcCreateInstance with CLSID_DxcValidator to obtain an instance of this. +struct IDxcValidator2 : public IDxcValidator { + /// \brief Validate a shader with optional debug bitcode. + virtual HRESULT STDMETHODCALLTYPE ValidateWithDebug( + _In_ IDxcBlob *pShader, ///< Shader to validate. + _In_ UINT32 Flags, ///< Validation flags. + _In_opt_ DxcBuffer *pOptDebugBitcode, ///< Optional debug module bitcode + ///< to provide line numbers. + _COM_Outptr_ IDxcOperationResult * + *ppResult ///< Validation output status, buffer, and errors. + ) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcContainerBuilder, + "334b1f50-2292-4b35-99a1-25588d8c17fe") +/// \brief Interface to DXC container builder. +/// +/// Use DxcCreateInstance with CLSID_DxcContainerBuilder to obtain an instance +/// of this. +struct IDxcContainerBuilder : public IUnknown { + /// \brief Load a DxilContainer to the builder. + virtual HRESULT STDMETHODCALLTYPE + Load(_In_ IDxcBlob *pDxilContainerHeader) = 0; + + /// \brief Add a part to the container. + /// + /// \param fourCC The part identifier (eg DXC_PART_PDB). + /// + /// \param pSource The source blob. + virtual HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, + _In_ IDxcBlob *pSource) = 0; + + /// \brief Remove a part from the container. + /// + /// \param fourCC The part identifier (eg DXC_PART_PDB). + /// + /// \return S_OK on success, DXC_E_MISSING_PART if the part was not found, or + /// other standard HRESULT error code. + virtual HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC) = 0; + + /// \brief Build the container. + /// + /// \param ppResult Pointer to variable to receive the result. + virtual HRESULT STDMETHODCALLTYPE + SerializeContainer(_Out_ IDxcOperationResult **ppResult) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcAssembler, "091f7a26-1c1f-4948-904b-e6e3a8a771d5") +/// \brief Interface to DxcAssembler. +/// +/// Use DxcCreateInstance with CLSID_DxcAssembler to obtain an instance of this. +struct IDxcAssembler : public IUnknown { + /// \brief Assemble DXIL in LL or LLVM bitcode to DXIL container. + virtual HRESULT STDMETHODCALLTYPE AssembleToContainer( + _In_ IDxcBlob *pShader, ///< Shader to assemble. + _COM_Outptr_ IDxcOperationResult * + *ppResult ///< Assembly output status, buffer, and errors. + ) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcContainerReflection, + "d2c21b26-8350-4bdc-976a-331ce6f4c54c") +/// \brief Interface to DxcContainerReflection. +/// +/// Use DxcCreateInstance with CLSID_DxcContainerReflection to obtain an +/// instance of this. +struct IDxcContainerReflection : public IUnknown { + /// \brief Choose the container to perform reflection on + /// + /// \param pContainer The container to load. If null is passed then this + /// instance will release any held resources. + virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pContainer) = 0; + + /// \brief Retrieves the number of parts in the container. + /// + /// \param pResult Pointer to variable to receive the result. + /// + /// \return S_OK on success, E_NOT_VALID_STATE if a container has not been + /// loaded using Load(), or other standard HRESULT error codes. + virtual HRESULT STDMETHODCALLTYPE GetPartCount(_Out_ UINT32 *pResult) = 0; + + /// \brief Retrieve the kind of a specified part. + /// + /// \param idx The index of the part to retrieve the kind of. + /// + /// \param pResult Pointer to variable to receive the result. + /// + /// \return S_OK on success, E_NOT_VALID_STATE if a container has not been + /// loaded using Load(), E_BOUND if idx is out of bounds, or other standard + /// HRESULT error codes. + virtual HRESULT STDMETHODCALLTYPE GetPartKind(UINT32 idx, + _Out_ UINT32 *pResult) = 0; + + /// \brief Retrieve the content of a specified part. + /// + /// \param idx The index of the part to retrieve. + /// + /// \param ppResult Pointer to variable to receive the result. + /// + /// \return S_OK on success, E_NOT_VALID_STATE if a container has not been + /// loaded using Load(), E_BOUND if idx is out of bounds, or other standard + /// HRESULT error codes. + virtual HRESULT STDMETHODCALLTYPE + GetPartContent(UINT32 idx, _COM_Outptr_ IDxcBlob **ppResult) = 0; + + /// \brief Retrieve the index of the first part with the specified kind. + /// + /// \param kind The kind to search for. + /// + /// \param pResult Pointer to variable to receive the index of the matching + /// part. + /// + /// \return S_OK on success, E_NOT_VALID_STATE if a container has not been + /// loaded using Load(), HRESULT_FROM_WIN32(ERROR_NOT_FOUND) if there is no + /// part with the specified kind, or other standard HRESULT error codes. + virtual HRESULT STDMETHODCALLTYPE + FindFirstPartKind(UINT32 kind, _Out_ UINT32 *pResult) = 0; + + /// \brief Retrieve the reflection interface for a specified part. + /// + /// \param idx The index of the part to retrieve the reflection interface of. + /// + /// \param iid The IID of the interface to retrieve. + /// + /// \param ppvObject Pointer to variable to receive the result. + /// + /// Use this with interfaces such as ID3D12ShaderReflection. + /// + /// \return S_OK on success, E_NOT_VALID_STATE if a container has not been + /// loaded using Load(), E_BOUND if idx is out of bounds, or other standard + /// HRESULT error codes. + virtual HRESULT STDMETHODCALLTYPE GetPartReflection(UINT32 idx, REFIID iid, + void **ppvObject) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcOptimizerPass, "AE2CD79F-CC22-453F-9B6B-B124E7A5204C") +/// \brief An optimizer pass. +/// +/// Instances of this can be obtained via IDxcOptimizer::GetAvailablePass. +struct IDxcOptimizerPass : public IUnknown { + virtual HRESULT STDMETHODCALLTYPE + GetOptionName(_COM_Outptr_ LPWSTR *ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetDescription(_COM_Outptr_ LPWSTR *ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE GetOptionArgCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetOptionArgName(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetOptionArgDescription(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcOptimizer, "25740E2E-9CBA-401B-9119-4FB42F39F270") +/// \brief Interface to DxcOptimizer. +/// +/// Use DxcCreateInstance with CLSID_DxcOptimizer to obtain an instance of this. +struct IDxcOptimizer : public IUnknown { + virtual HRESULT STDMETHODCALLTYPE + GetAvailablePassCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetAvailablePass(UINT32 index, _COM_Outptr_ IDxcOptimizerPass **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + RunOptimizer(IDxcBlob *pBlob, _In_count_(optionCount) LPCWSTR *ppOptions, + UINT32 optionCount, _COM_Outptr_ IDxcBlob **pOutputModule, + _COM_Outptr_opt_ IDxcBlobEncoding **ppOutputText) = 0; +}; + +static const UINT32 DxcVersionInfoFlags_None = 0; +static const UINT32 DxcVersionInfoFlags_Debug = 1; // Matches VS_FF_DEBUG +static const UINT32 DxcVersionInfoFlags_Internal = + 2; // Internal Validator (non-signing) + +CROSS_PLATFORM_UUIDOF(IDxcVersionInfo, "b04f5b50-2059-4f12-a8ff-a1e0cde1cc7e") +/// \brief PDB Version information. +/// +/// Use IDxcPdbUtils2::GetVersionInfo to obtain an instance of this. +struct IDxcVersionInfo : public IUnknown { + virtual HRESULT STDMETHODCALLTYPE GetVersion(_Out_ UINT32 *pMajor, + _Out_ UINT32 *pMinor) = 0; + virtual HRESULT STDMETHODCALLTYPE GetFlags(_Out_ UINT32 *pFlags) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcVersionInfo2, "fb6904c4-42f0-4b62-9c46-983af7da7c83") +/// \brief PDB Version Information. +/// +/// Use IDxcPdbUtils2::GetVersionInfo to obtain a IDxcVersionInfo interface, and +/// then use QueryInterface to obtain an instance of this interface from it. +struct IDxcVersionInfo2 : public IDxcVersionInfo { + virtual HRESULT STDMETHODCALLTYPE GetCommitInfo( + _Out_ UINT32 *pCommitCount, ///< The total number commits. + _Outptr_result_z_ char **pCommitHash ///< The SHA of the latest commit. + ///< Must be CoTaskMemFree()'d. + ) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcVersionInfo3, "5e13e843-9d25-473c-9ad2-03b2d0b44b1e") +/// \brief PDB Version Information. +/// +/// Use IDxcPdbUtils2::GetVersionInfo to obtain a IDxcVersionInfo interface, and +/// then use QueryInterface to obtain an instance of this interface from it. +struct IDxcVersionInfo3 : public IUnknown { + virtual HRESULT STDMETHODCALLTYPE GetCustomVersionString( + _Outptr_result_z_ char * + *pVersionString ///< Custom version string for compiler. Must be + ///< CoTaskMemFree()'d. + ) = 0; +}; + +struct DxcArgPair { + const WCHAR *pName; + const WCHAR *pValue; +}; + +CROSS_PLATFORM_UUIDOF(IDxcPdbUtils, "E6C9647E-9D6A-4C3B-B94C-524B5A6C343D") +/// \deprecated Please use IDxcPdbUtils2 instead. +struct IDxcPdbUtils : public IUnknown { + virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pPdbOrDxil) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetSourceCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetSource(_In_ UINT32 uIndex, _COM_Outptr_ IDxcBlobEncoding **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetSourceName(_In_ UINT32 uIndex, _Outptr_result_z_ BSTR *pResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetFlagCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetFlag(_In_ UINT32 uIndex, _Outptr_result_z_ BSTR *pResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetArgCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE GetArg(_In_ UINT32 uIndex, + _Outptr_result_z_ BSTR *pResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetArgPairCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetArgPair(_In_ UINT32 uIndex, _Outptr_result_z_ BSTR *pName, + _Outptr_result_z_ BSTR *pValue) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetDefineCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetDefine(_In_ UINT32 uIndex, _Outptr_result_z_ BSTR *pResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE + GetTargetProfile(_Outptr_result_z_ BSTR *pResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetEntryPoint(_Outptr_result_z_ BSTR *pResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetMainFileName(_Outptr_result_z_ BSTR *pResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE + GetHash(_COM_Outptr_ IDxcBlob **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetName(_Outptr_result_z_ BSTR *pResult) = 0; + + virtual BOOL STDMETHODCALLTYPE IsFullPDB() = 0; + virtual HRESULT STDMETHODCALLTYPE + GetFullPDB(_COM_Outptr_ IDxcBlob **ppFullPDB) = 0; + + virtual HRESULT STDMETHODCALLTYPE + GetVersionInfo(_COM_Outptr_ IDxcVersionInfo **ppVersionInfo) = 0; + + virtual HRESULT STDMETHODCALLTYPE + SetCompiler(_In_ IDxcCompiler3 *pCompiler) = 0; + virtual HRESULT STDMETHODCALLTYPE + CompileForFullPDB(_COM_Outptr_ IDxcResult **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE OverrideArgs(_In_ DxcArgPair *pArgPairs, + UINT32 uNumArgPairs) = 0; + virtual HRESULT STDMETHODCALLTYPE + OverrideRootSignature(_In_ const WCHAR *pRootSignature) = 0; +}; + +CROSS_PLATFORM_UUIDOF(IDxcPdbUtils2, "4315D938-F369-4F93-95A2-252017CC3807") +/// \brief DxcPdbUtils interface. +/// +/// Use DxcCreateInstance with CLSID_DxcPdbUtils to create an instance of this. +struct IDxcPdbUtils2 : public IUnknown { + virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pPdbOrDxil) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetSourceCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetSource(_In_ UINT32 uIndex, _COM_Outptr_ IDxcBlobEncoding **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetSourceName(_In_ UINT32 uIndex, _COM_Outptr_ IDxcBlobWide **ppResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetLibraryPDBCount(UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE GetLibraryPDB( + _In_ UINT32 uIndex, _COM_Outptr_ IDxcPdbUtils2 **ppOutPdbUtils, + _COM_Outptr_opt_result_maybenull_ IDxcBlobWide **ppLibraryName) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetFlagCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetFlag(_In_ UINT32 uIndex, _COM_Outptr_ IDxcBlobWide **ppResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetArgCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetArg(_In_ UINT32 uIndex, _COM_Outptr_ IDxcBlobWide **ppResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetArgPairCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE GetArgPair( + _In_ UINT32 uIndex, _COM_Outptr_result_maybenull_ IDxcBlobWide **ppName, + _COM_Outptr_result_maybenull_ IDxcBlobWide **ppValue) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetDefineCount(_Out_ UINT32 *pCount) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetDefine(_In_ UINT32 uIndex, _COM_Outptr_ IDxcBlobWide **ppResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE + GetTargetProfile(_COM_Outptr_result_maybenull_ IDxcBlobWide **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetEntryPoint(_COM_Outptr_result_maybenull_ IDxcBlobWide **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetMainFileName(_COM_Outptr_result_maybenull_ IDxcBlobWide **ppResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE + GetHash(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetName(_COM_Outptr_result_maybenull_ IDxcBlobWide **ppResult) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetVersionInfo( + _COM_Outptr_result_maybenull_ IDxcVersionInfo **ppVersionInfo) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetCustomToolchainID(_Out_ UINT32 *pID) = 0; + virtual HRESULT STDMETHODCALLTYPE + GetCustomToolchainData(_COM_Outptr_result_maybenull_ IDxcBlob **ppBlob) = 0; + + virtual HRESULT STDMETHODCALLTYPE + GetWholeDxil(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) = 0; + + virtual BOOL STDMETHODCALLTYPE IsFullPDB() = 0; + virtual BOOL STDMETHODCALLTYPE IsPDBRef() = 0; +}; + +// Note: __declspec(selectany) requires 'extern' +// On Linux __declspec(selectany) is removed and using 'extern' results in link +// error. +#ifdef _MSC_VER +#define CLSID_SCOPE __declspec(selectany) extern +#else +#define CLSID_SCOPE +#endif + +CLSID_SCOPE const CLSID CLSID_DxcCompiler = { + 0x73e22d93, + 0xe6ce, + 0x47f3, + {0xb5, 0xbf, 0xf0, 0x66, 0x4f, 0x39, 0xc1, 0xb0}}; + +// {EF6A8087-B0EA-4D56-9E45-D07E1A8B7806} +CLSID_SCOPE const GUID CLSID_DxcLinker = { + 0xef6a8087, + 0xb0ea, + 0x4d56, + {0x9e, 0x45, 0xd0, 0x7e, 0x1a, 0x8b, 0x78, 0x6}}; + +// {CD1F6B73-2AB0-484D-8EDC-EBE7A43CA09F} +CLSID_SCOPE const CLSID CLSID_DxcDiaDataSource = { + 0xcd1f6b73, + 0x2ab0, + 0x484d, + {0x8e, 0xdc, 0xeb, 0xe7, 0xa4, 0x3c, 0xa0, 0x9f}}; + +// {3E56AE82-224D-470F-A1A1-FE3016EE9F9D} +CLSID_SCOPE const CLSID CLSID_DxcCompilerArgs = { + 0x3e56ae82, + 0x224d, + 0x470f, + {0xa1, 0xa1, 0xfe, 0x30, 0x16, 0xee, 0x9f, 0x9d}}; + +// {6245D6AF-66E0-48FD-80B4-4D271796748C} +CLSID_SCOPE const GUID CLSID_DxcLibrary = { + 0x6245d6af, + 0x66e0, + 0x48fd, + {0x80, 0xb4, 0x4d, 0x27, 0x17, 0x96, 0x74, 0x8c}}; + +CLSID_SCOPE const GUID CLSID_DxcUtils = CLSID_DxcLibrary; + +// {8CA3E215-F728-4CF3-8CDD-88AF917587A1} +CLSID_SCOPE const GUID CLSID_DxcValidator = { + 0x8ca3e215, + 0xf728, + 0x4cf3, + {0x8c, 0xdd, 0x88, 0xaf, 0x91, 0x75, 0x87, 0xa1}}; + +// {D728DB68-F903-4F80-94CD-DCCF76EC7151} +CLSID_SCOPE const GUID CLSID_DxcAssembler = { + 0xd728db68, + 0xf903, + 0x4f80, + {0x94, 0xcd, 0xdc, 0xcf, 0x76, 0xec, 0x71, 0x51}}; + +// {b9f54489-55b8-400c-ba3a-1675e4728b91} +CLSID_SCOPE const GUID CLSID_DxcContainerReflection = { + 0xb9f54489, + 0x55b8, + 0x400c, + {0xba, 0x3a, 0x16, 0x75, 0xe4, 0x72, 0x8b, 0x91}}; + +// {AE2CD79F-CC22-453F-9B6B-B124E7A5204C} +CLSID_SCOPE const GUID CLSID_DxcOptimizer = { + 0xae2cd79f, + 0xcc22, + 0x453f, + {0x9b, 0x6b, 0xb1, 0x24, 0xe7, 0xa5, 0x20, 0x4c}}; + +// {94134294-411f-4574-b4d0-8741e25240d2} +CLSID_SCOPE const GUID CLSID_DxcContainerBuilder = { + 0x94134294, + 0x411f, + 0x4574, + {0xb4, 0xd0, 0x87, 0x41, 0xe2, 0x52, 0x40, 0xd2}}; + +// {54621dfb-f2ce-457e-ae8c-ec355faeec7c} +CLSID_SCOPE const GUID CLSID_DxcPdbUtils = { + 0x54621dfb, + 0xf2ce, + 0x457e, + {0xae, 0x8c, 0xec, 0x35, 0x5f, 0xae, 0xec, 0x7c}}; + +#endif diff --git a/src/runtime/mini_d3d12.h b/src/runtime/mini_d3d12.h index 7b179fe58760..ce325840de1e 100644 --- a/src/runtime/mini_d3d12.h +++ b/src/runtime/mini_d3d12.h @@ -37,6 +37,10 @@ #define _In_opt_ #endif //_In_opt_ +#ifndef _In_opt_z_ +#define _In_opt_z_ +#endif //_In_opt_z_ + #ifndef _Out_ #define _Out_ #endif //_Out_ @@ -133,6 +137,10 @@ #define _Outptr_opt_result_bytebuffer_(size) #endif //_Outptr_opt_result_bytebuffer_ +#ifndef _Outptr_opt_result_z_ +#define _Outptr_opt_result_z_ +#endif //_Outptr_opt_result_z_ + #ifndef _Must_inspect_result_ #define _Must_inspect_result_ #endif //_Must_inspect_result_ @@ -145,6 +153,46 @@ #define _Always_(annos) #endif //_Always_ +#ifndef _In_opt_count_ +#define _In_opt_count_(annos) +#endif //_In_opt_count_ + +#ifndef _In_bytecount_ +#define _In_bytecount_(annos) +#endif //_In_bytecount_ + +#ifndef _In_count_ +#define _In_count_(annos) +#endif //_In_count_ + +#ifndef _COM_Outptr_result_maybenull_ +#define _COM_Outptr_result_maybenull_ +#endif // _COM_Outptr_result_maybenull_ + +#ifndef _Outptr_result_nullonfailure_ +#define _Outptr_result_nullonfailure_ +#endif // _Outptr_result_nullonfailure_ + +#ifndef _COM_Outptr_opt_result_maybenull_ +#define _COM_Outptr_opt_result_maybenull_ +#endif // _COM_Outptr_opt_result_maybenull_ + +#ifndef _COM_Outptr_opt_result_maybenull_ +#define _COM_Outptr_opt_result_maybenull_ +#endif // _COM_Outptr_opt_result_maybenull_ + +#ifndef _Maybenull_ +#define _Maybenull_ +#endif // _Maybenull_ + +#ifndef _Outptr_result_z_ +#define _Outptr_result_z_ +#endif // _Outptr_result_z_ + +#ifndef _Out_writes_bytes_to_ +#define _Out_writes_bytes_to_(cb, pcbRead) +#endif // _Out_writes_bytes_to_ + /* rpcsal.h */ #ifndef __RPC_string #define __RPC_string @@ -204,6 +252,7 @@ typedef unsigned short WCHAR; // wc, 16-bit UNICODE character typedef _Null_terminated_ CHAR *NPSTR, *LPSTR, *PSTR; typedef _Null_terminated_ CONST CHAR *LPCSTR, *PCSTR; typedef _Null_terminated_ CONST WCHAR *LPCWSTR, *PCWSTR; +typedef _Null_terminated_ WCHAR *NWPSTR, *LPWSTR, *PWSTR; #define FAR far #define NEAR near @@ -458,6 +507,7 @@ typedef HINSTANCE HMODULE; /* HMODULEs can be used in place of HINSTANCEs */ /* winerror.h */ #define _HRESULT_TYPEDEF_(_sc) ((HRESULT)(_sc)) #define S_OK _HRESULT_TYPEDEF_(0x0L) +#define E_INVALIDARG _HRESULT_TYPEDEF_(0x80070057L) #define E_UNEXPECTED _HRESULT_TYPEDEF_(0x8000FFFFL) #define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) #define SUCCEEDED(hr) (((HRESULT)(hr)) >= 0) @@ -507,6 +557,45 @@ typedef IID *LPIID; EXTERN_C const GUID FAR name #endif // INITGUID +/* guiddef.h */ +#ifndef _REFGUID_DEFINED +#define _REFGUID_DEFINED +#ifdef __cplusplus +#define REFGUID const GUID & +#else +#define REFGUID const GUID *__MIDL_CONST +#endif +#endif + +#ifndef _REFIID_DEFINED +#define _REFIID_DEFINED +#ifdef __cplusplus +#define REFIID const IID & +#else +#define REFIID const IID *__MIDL_CONST +#endif +#endif + +#ifndef _REFCLSID_DEFINED +#define _REFCLSID_DEFINED +#ifdef __cplusplus +#define REFCLSID const IID & +#else +#define REFCLSID const IID *__MIDL_CONST +#endif +#endif + +#ifndef _REFFMTID_DEFINED +#define _REFFMTID_DEFINED +#ifdef __cplusplus +#define REFFMTID const IID & +#else +#define REFFMTID const IID *__MIDL_CONST +#endif +#endif + +typedef GUID CLSID; + /* rpc.h */ #if defined(__specstrings) typedef _Return_type_success_(return == 0) LONG RPC_STATUS; @@ -942,6 +1031,30 @@ typedef LPCSTR LPCOLESTR; #define OLESTR(str) str #endif +/* wtypesbase.h */ +typedef _Null_terminated_ OLECHAR *BSTR; +typedef BSTR *LPBSTR; + +/* objidlbase.h */ +typedef _Null_terminated_ OLECHAR *BSTR; +typedef BSTR *LPBSTR; + +#ifndef __ISequentialStream_FWD_DEFINED__ +#define __ISequentialStream_FWD_DEFINED__ +typedef interface ISequentialStream ISequentialStream; + +#endif /* __ISequentialStream_FWD_DEFINED__ */ + +#ifndef __IStream_FWD_DEFINED__ +#define __IStream_FWD_DEFINED__ +typedef interface IStream IStream; + +#endif /* __IStream_FWD_DEFINED__ */ + +EXTERN_C const IID IID_ISequentialStream; +EXTERN_C const IID IID_IStream; + + /* d3dcommon.h */ #ifdef __cplusplus extern "C" { @@ -3097,6 +3210,24 @@ typedef struct D3D12_VERSIONED_ROOT_SIGNATURE_DESC { }; } D3D12_VERSIONED_ROOT_SIGNATURE_DESC; + +typedef enum D3D_SHADER_MODEL { + D3D_SHADER_MODEL_5_1 = 0x51, + D3D_SHADER_MODEL_6_0 = 0x60, + D3D_SHADER_MODEL_6_1 = 0x61, + D3D_SHADER_MODEL_6_2 = 0x62, + D3D_SHADER_MODEL_6_3 = 0x63, + D3D_SHADER_MODEL_6_4 = 0x64, + D3D_SHADER_MODEL_6_5 = 0x65, + D3D_SHADER_MODEL_6_6 = 0x66, + D3D_SHADER_MODEL_6_7 = 0x67, + D3D_HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_7 +} D3D_SHADER_MODEL; + +typedef struct D3D12_FEATURE_DATA_SHADER_MODEL { + _Inout_ D3D_SHADER_MODEL HighestShaderModel; +} D3D12_FEATURE_DATA_SHADER_MODEL; + #define D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND (0xffffffff) #ifndef __ID3D12Resource_INTERFACE_DEFINED__ @@ -6878,6 +7009,483 @@ WaitForSingleObject( #define CreateEvent CreateEventA #endif // !UNICODE +////////////////////////////////////////////////////////////////////////// +// From: d3dcommon.h + +typedef enum D3D_NAME { + D3D_NAME_UNDEFINED = 0, + D3D_NAME_POSITION = 1, + D3D_NAME_CLIP_DISTANCE = 2, + D3D_NAME_CULL_DISTANCE = 3, + D3D_NAME_RENDER_TARGET_ARRAY_INDEX = 4, + D3D_NAME_VIEWPORT_ARRAY_INDEX = 5, + D3D_NAME_VERTEX_ID = 6, + D3D_NAME_PRIMITIVE_ID = 7, + D3D_NAME_INSTANCE_ID = 8, + D3D_NAME_IS_FRONT_FACE = 9, + D3D_NAME_SAMPLE_INDEX = 10, + D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR = 11, + D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = 12, + D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR = 13, + D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 14, + D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 15, + D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 16, + D3D_NAME_BARYCENTRICS = 23, + D3D_NAME_SHADINGRATE = 24, + D3D_NAME_CULLPRIMITIVE = 25, + D3D_NAME_TARGET = 64, + D3D_NAME_DEPTH = 65, + D3D_NAME_COVERAGE = 66, + D3D_NAME_DEPTH_GREATER_EQUAL = 67, + D3D_NAME_DEPTH_LESS_EQUAL = 68, + D3D_NAME_STENCIL_REF = 69, + D3D_NAME_INNER_COVERAGE = 70, + D3D10_NAME_UNDEFINED = D3D_NAME_UNDEFINED, + D3D10_NAME_POSITION = D3D_NAME_POSITION, + D3D10_NAME_CLIP_DISTANCE = D3D_NAME_CLIP_DISTANCE, + D3D10_NAME_CULL_DISTANCE = D3D_NAME_CULL_DISTANCE, + D3D10_NAME_RENDER_TARGET_ARRAY_INDEX = D3D_NAME_RENDER_TARGET_ARRAY_INDEX, + D3D10_NAME_VIEWPORT_ARRAY_INDEX = D3D_NAME_VIEWPORT_ARRAY_INDEX, + D3D10_NAME_VERTEX_ID = D3D_NAME_VERTEX_ID, + D3D10_NAME_PRIMITIVE_ID = D3D_NAME_PRIMITIVE_ID, + D3D10_NAME_INSTANCE_ID = D3D_NAME_INSTANCE_ID, + D3D10_NAME_IS_FRONT_FACE = D3D_NAME_IS_FRONT_FACE, + D3D10_NAME_SAMPLE_INDEX = D3D_NAME_SAMPLE_INDEX, + D3D10_NAME_TARGET = D3D_NAME_TARGET, + D3D10_NAME_DEPTH = D3D_NAME_DEPTH, + D3D10_NAME_COVERAGE = D3D_NAME_COVERAGE, + D3D11_NAME_FINAL_QUAD_EDGE_TESSFACTOR = D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR, + D3D11_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR, + D3D11_NAME_FINAL_TRI_EDGE_TESSFACTOR = D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR, + D3D11_NAME_FINAL_TRI_INSIDE_TESSFACTOR = D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR, + D3D11_NAME_FINAL_LINE_DETAIL_TESSFACTOR = D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR, + D3D11_NAME_FINAL_LINE_DENSITY_TESSFACTOR = D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR, + D3D11_NAME_DEPTH_GREATER_EQUAL = D3D_NAME_DEPTH_GREATER_EQUAL, + D3D11_NAME_DEPTH_LESS_EQUAL = D3D_NAME_DEPTH_LESS_EQUAL, + D3D11_NAME_STENCIL_REF = D3D_NAME_STENCIL_REF, + D3D11_NAME_INNER_COVERAGE = D3D_NAME_INNER_COVERAGE, + D3D12_NAME_BARYCENTRICS = D3D_NAME_BARYCENTRICS, + D3D12_NAME_SHADINGRATE = D3D_NAME_SHADINGRATE, + D3D12_NAME_CULLPRIMITIVE = D3D_NAME_CULLPRIMITIVE +} D3D_NAME; + +typedef enum D3D_REGISTER_COMPONENT_TYPE { + D3D_REGISTER_COMPONENT_UNKNOWN = 0, + D3D_REGISTER_COMPONENT_UINT32 = 1, + D3D_REGISTER_COMPONENT_SINT32 = 2, + D3D_REGISTER_COMPONENT_FLOAT32 = 3, + D3D10_REGISTER_COMPONENT_UNKNOWN = D3D_REGISTER_COMPONENT_UNKNOWN, + D3D10_REGISTER_COMPONENT_UINT32 = D3D_REGISTER_COMPONENT_UINT32, + D3D10_REGISTER_COMPONENT_SINT32 = D3D_REGISTER_COMPONENT_SINT32, + D3D10_REGISTER_COMPONENT_FLOAT32 = D3D_REGISTER_COMPONENT_FLOAT32 +} D3D_REGISTER_COMPONENT_TYPE; + +typedef enum D3D_MIN_PRECISION { + D3D_MIN_PRECISION_DEFAULT = 0, + D3D_MIN_PRECISION_FLOAT_16 = 1, + D3D_MIN_PRECISION_FLOAT_2_8 = 2, + D3D_MIN_PRECISION_RESERVED = 3, + D3D_MIN_PRECISION_SINT_16 = 4, + D3D_MIN_PRECISION_UINT_16 = 5, + D3D_MIN_PRECISION_ANY_16 = 0xf0, + D3D_MIN_PRECISION_ANY_10 = 0xf1 +} D3D_MIN_PRECISION; + +typedef enum D3D_RESOURCE_RETURN_TYPE { + D3D_RETURN_TYPE_UNORM = 1, + D3D_RETURN_TYPE_SNORM = 2, + D3D_RETURN_TYPE_SINT = 3, + D3D_RETURN_TYPE_UINT = 4, + D3D_RETURN_TYPE_FLOAT = 5, + D3D_RETURN_TYPE_MIXED = 6, + D3D_RETURN_TYPE_DOUBLE = 7, + D3D_RETURN_TYPE_CONTINUED = 8, + D3D10_RETURN_TYPE_UNORM = D3D_RETURN_TYPE_UNORM, + D3D10_RETURN_TYPE_SNORM = D3D_RETURN_TYPE_SNORM, + D3D10_RETURN_TYPE_SINT = D3D_RETURN_TYPE_SINT, + D3D10_RETURN_TYPE_UINT = D3D_RETURN_TYPE_UINT, + D3D10_RETURN_TYPE_FLOAT = D3D_RETURN_TYPE_FLOAT, + D3D10_RETURN_TYPE_MIXED = D3D_RETURN_TYPE_MIXED, + D3D11_RETURN_TYPE_UNORM = D3D_RETURN_TYPE_UNORM, + D3D11_RETURN_TYPE_SNORM = D3D_RETURN_TYPE_SNORM, + D3D11_RETURN_TYPE_SINT = D3D_RETURN_TYPE_SINT, + D3D11_RETURN_TYPE_UINT = D3D_RETURN_TYPE_UINT, + D3D11_RETURN_TYPE_FLOAT = D3D_RETURN_TYPE_FLOAT, + D3D11_RETURN_TYPE_MIXED = D3D_RETURN_TYPE_MIXED, + D3D11_RETURN_TYPE_DOUBLE = D3D_RETURN_TYPE_DOUBLE, + D3D11_RETURN_TYPE_CONTINUED = D3D_RETURN_TYPE_CONTINUED +} D3D_RESOURCE_RETURN_TYPE; + +typedef enum _D3D_CBUFFER_TYPE { + D3D_CT_CBUFFER = 0, + D3D_CT_TBUFFER = (D3D_CT_CBUFFER + 1), + D3D_CT_INTERFACE_POINTERS = (D3D_CT_TBUFFER + 1), + D3D_CT_RESOURCE_BIND_INFO = (D3D_CT_INTERFACE_POINTERS + 1), + D3D10_CT_CBUFFER = D3D_CT_CBUFFER, + D3D10_CT_TBUFFER = D3D_CT_TBUFFER, + D3D11_CT_CBUFFER = D3D_CT_CBUFFER, + D3D11_CT_TBUFFER = D3D_CT_TBUFFER, + D3D11_CT_INTERFACE_POINTERS = D3D_CT_INTERFACE_POINTERS, + D3D11_CT_RESOURCE_BIND_INFO = D3D_CT_RESOURCE_BIND_INFO +} D3D_CBUFFER_TYPE; + +typedef enum _D3D_SHADER_VARIABLE_CLASS { + D3D_SVC_SCALAR = 0, + D3D_SVC_VECTOR = (D3D_SVC_SCALAR + 1), + D3D_SVC_MATRIX_ROWS = (D3D_SVC_VECTOR + 1), + D3D_SVC_MATRIX_COLUMNS = (D3D_SVC_MATRIX_ROWS + 1), + D3D_SVC_OBJECT = (D3D_SVC_MATRIX_COLUMNS + 1), + D3D_SVC_STRUCT = (D3D_SVC_OBJECT + 1), + D3D_SVC_INTERFACE_CLASS = (D3D_SVC_STRUCT + 1), + D3D_SVC_INTERFACE_POINTER = (D3D_SVC_INTERFACE_CLASS + 1), + D3D10_SVC_SCALAR = D3D_SVC_SCALAR, + D3D10_SVC_VECTOR = D3D_SVC_VECTOR, + D3D10_SVC_MATRIX_ROWS = D3D_SVC_MATRIX_ROWS, + D3D10_SVC_MATRIX_COLUMNS = D3D_SVC_MATRIX_COLUMNS, + D3D10_SVC_OBJECT = D3D_SVC_OBJECT, + D3D10_SVC_STRUCT = D3D_SVC_STRUCT, + D3D11_SVC_INTERFACE_CLASS = D3D_SVC_INTERFACE_CLASS, + D3D11_SVC_INTERFACE_POINTER = D3D_SVC_INTERFACE_POINTER, + D3D_SVC_FORCE_DWORD = 0x7fffffff +} D3D_SHADER_VARIABLE_CLASS; + +typedef enum _D3D_SHADER_VARIABLE_TYPE { + D3D_SVT_VOID = 0, + D3D_SVT_BOOL = 1, + D3D_SVT_INT = 2, + D3D_SVT_FLOAT = 3, + D3D_SVT_STRING = 4, + D3D_SVT_TEXTURE = 5, + D3D_SVT_TEXTURE1D = 6, + D3D_SVT_TEXTURE2D = 7, + D3D_SVT_TEXTURE3D = 8, + D3D_SVT_TEXTURECUBE = 9, + D3D_SVT_SAMPLER = 10, + D3D_SVT_SAMPLER1D = 11, + D3D_SVT_SAMPLER2D = 12, + D3D_SVT_SAMPLER3D = 13, + D3D_SVT_SAMPLERCUBE = 14, + D3D_SVT_PIXELSHADER = 15, + D3D_SVT_VERTEXSHADER = 16, + D3D_SVT_PIXELFRAGMENT = 17, + D3D_SVT_VERTEXFRAGMENT = 18, + D3D_SVT_UINT = 19, + D3D_SVT_UINT8 = 20, + D3D_SVT_GEOMETRYSHADER = 21, + D3D_SVT_RASTERIZER = 22, + D3D_SVT_DEPTHSTENCIL = 23, + D3D_SVT_BLEND = 24, + D3D_SVT_BUFFER = 25, + D3D_SVT_CBUFFER = 26, + D3D_SVT_TBUFFER = 27, + D3D_SVT_TEXTURE1DARRAY = 28, + D3D_SVT_TEXTURE2DARRAY = 29, + D3D_SVT_RENDERTARGETVIEW = 30, + D3D_SVT_DEPTHSTENCILVIEW = 31, + D3D_SVT_TEXTURE2DMS = 32, + D3D_SVT_TEXTURE2DMSARRAY = 33, + D3D_SVT_TEXTURECUBEARRAY = 34, + D3D_SVT_HULLSHADER = 35, + D3D_SVT_DOMAINSHADER = 36, + D3D_SVT_INTERFACE_POINTER = 37, + D3D_SVT_COMPUTESHADER = 38, + D3D_SVT_DOUBLE = 39, + D3D_SVT_RWTEXTURE1D = 40, + D3D_SVT_RWTEXTURE1DARRAY = 41, + D3D_SVT_RWTEXTURE2D = 42, + D3D_SVT_RWTEXTURE2DARRAY = 43, + D3D_SVT_RWTEXTURE3D = 44, + D3D_SVT_RWBUFFER = 45, + D3D_SVT_BYTEADDRESS_BUFFER = 46, + D3D_SVT_RWBYTEADDRESS_BUFFER = 47, + D3D_SVT_STRUCTURED_BUFFER = 48, + D3D_SVT_RWSTRUCTURED_BUFFER = 49, + D3D_SVT_APPEND_STRUCTURED_BUFFER = 50, + D3D_SVT_CONSUME_STRUCTURED_BUFFER = 51, + D3D_SVT_MIN8FLOAT = 52, + D3D_SVT_MIN10FLOAT = 53, + D3D_SVT_MIN16FLOAT = 54, + D3D_SVT_MIN12INT = 55, + D3D_SVT_MIN16INT = 56, + D3D_SVT_MIN16UINT = 57, + D3D_SVT_INT16 = 58, + D3D_SVT_UINT16 = 59, + D3D_SVT_FLOAT16 = 60, + D3D_SVT_INT64 = 61, + D3D_SVT_UINT64 = 62, + D3D10_SVT_VOID = D3D_SVT_VOID, + D3D10_SVT_BOOL = D3D_SVT_BOOL, + D3D10_SVT_INT = D3D_SVT_INT, + D3D10_SVT_FLOAT = D3D_SVT_FLOAT, + D3D10_SVT_STRING = D3D_SVT_STRING, + D3D10_SVT_TEXTURE = D3D_SVT_TEXTURE, + D3D10_SVT_TEXTURE1D = D3D_SVT_TEXTURE1D, + D3D10_SVT_TEXTURE2D = D3D_SVT_TEXTURE2D, + D3D10_SVT_TEXTURE3D = D3D_SVT_TEXTURE3D, + D3D10_SVT_TEXTURECUBE = D3D_SVT_TEXTURECUBE, + D3D10_SVT_SAMPLER = D3D_SVT_SAMPLER, + D3D10_SVT_SAMPLER1D = D3D_SVT_SAMPLER1D, + D3D10_SVT_SAMPLER2D = D3D_SVT_SAMPLER2D, + D3D10_SVT_SAMPLER3D = D3D_SVT_SAMPLER3D, + D3D10_SVT_SAMPLERCUBE = D3D_SVT_SAMPLERCUBE, + D3D10_SVT_PIXELSHADER = D3D_SVT_PIXELSHADER, + D3D10_SVT_VERTEXSHADER = D3D_SVT_VERTEXSHADER, + D3D10_SVT_PIXELFRAGMENT = D3D_SVT_PIXELFRAGMENT, + D3D10_SVT_VERTEXFRAGMENT = D3D_SVT_VERTEXFRAGMENT, + D3D10_SVT_UINT = D3D_SVT_UINT, + D3D10_SVT_UINT8 = D3D_SVT_UINT8, + D3D10_SVT_GEOMETRYSHADER = D3D_SVT_GEOMETRYSHADER, + D3D10_SVT_RASTERIZER = D3D_SVT_RASTERIZER, + D3D10_SVT_DEPTHSTENCIL = D3D_SVT_DEPTHSTENCIL, + D3D10_SVT_BLEND = D3D_SVT_BLEND, + D3D10_SVT_BUFFER = D3D_SVT_BUFFER, + D3D10_SVT_CBUFFER = D3D_SVT_CBUFFER, + D3D10_SVT_TBUFFER = D3D_SVT_TBUFFER, + D3D10_SVT_TEXTURE1DARRAY = D3D_SVT_TEXTURE1DARRAY, + D3D10_SVT_TEXTURE2DARRAY = D3D_SVT_TEXTURE2DARRAY, + D3D10_SVT_RENDERTARGETVIEW = D3D_SVT_RENDERTARGETVIEW, + D3D10_SVT_DEPTHSTENCILVIEW = D3D_SVT_DEPTHSTENCILVIEW, + D3D10_SVT_TEXTURE2DMS = D3D_SVT_TEXTURE2DMS, + D3D10_SVT_TEXTURE2DMSARRAY = D3D_SVT_TEXTURE2DMSARRAY, + D3D10_SVT_TEXTURECUBEARRAY = D3D_SVT_TEXTURECUBEARRAY, + D3D11_SVT_HULLSHADER = D3D_SVT_HULLSHADER, + D3D11_SVT_DOMAINSHADER = D3D_SVT_DOMAINSHADER, + D3D11_SVT_INTERFACE_POINTER = D3D_SVT_INTERFACE_POINTER, + D3D11_SVT_COMPUTESHADER = D3D_SVT_COMPUTESHADER, + D3D11_SVT_DOUBLE = D3D_SVT_DOUBLE, + D3D11_SVT_RWTEXTURE1D = D3D_SVT_RWTEXTURE1D, + D3D11_SVT_RWTEXTURE1DARRAY = D3D_SVT_RWTEXTURE1DARRAY, + D3D11_SVT_RWTEXTURE2D = D3D_SVT_RWTEXTURE2D, + D3D11_SVT_RWTEXTURE2DARRAY = D3D_SVT_RWTEXTURE2DARRAY, + D3D11_SVT_RWTEXTURE3D = D3D_SVT_RWTEXTURE3D, + D3D11_SVT_RWBUFFER = D3D_SVT_RWBUFFER, + D3D11_SVT_BYTEADDRESS_BUFFER = D3D_SVT_BYTEADDRESS_BUFFER, + D3D11_SVT_RWBYTEADDRESS_BUFFER = D3D_SVT_RWBYTEADDRESS_BUFFER, + D3D11_SVT_STRUCTURED_BUFFER = D3D_SVT_STRUCTURED_BUFFER, + D3D11_SVT_RWSTRUCTURED_BUFFER = D3D_SVT_RWSTRUCTURED_BUFFER, + D3D11_SVT_APPEND_STRUCTURED_BUFFER = D3D_SVT_APPEND_STRUCTURED_BUFFER, + D3D11_SVT_CONSUME_STRUCTURED_BUFFER = D3D_SVT_CONSUME_STRUCTURED_BUFFER, + D3D_SVT_FORCE_DWORD = 0x7fffffff +} D3D_SHADER_VARIABLE_TYPE; + +typedef enum D3D_TESSELLATOR_DOMAIN { + D3D_TESSELLATOR_DOMAIN_UNDEFINED = 0, + D3D_TESSELLATOR_DOMAIN_ISOLINE = 1, + D3D_TESSELLATOR_DOMAIN_TRI = 2, + D3D_TESSELLATOR_DOMAIN_QUAD = 3, + D3D11_TESSELLATOR_DOMAIN_UNDEFINED = D3D_TESSELLATOR_DOMAIN_UNDEFINED, + D3D11_TESSELLATOR_DOMAIN_ISOLINE = D3D_TESSELLATOR_DOMAIN_ISOLINE, + D3D11_TESSELLATOR_DOMAIN_TRI = D3D_TESSELLATOR_DOMAIN_TRI, + D3D11_TESSELLATOR_DOMAIN_QUAD = D3D_TESSELLATOR_DOMAIN_QUAD +} D3D_TESSELLATOR_DOMAIN; + +typedef enum D3D_TESSELLATOR_PARTITIONING { + D3D_TESSELLATOR_PARTITIONING_UNDEFINED = 0, + D3D_TESSELLATOR_PARTITIONING_INTEGER = 1, + D3D_TESSELLATOR_PARTITIONING_POW2 = 2, + D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, + D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4, + D3D11_TESSELLATOR_PARTITIONING_UNDEFINED = D3D_TESSELLATOR_PARTITIONING_UNDEFINED, + D3D11_TESSELLATOR_PARTITIONING_INTEGER = D3D_TESSELLATOR_PARTITIONING_INTEGER, + D3D11_TESSELLATOR_PARTITIONING_POW2 = D3D_TESSELLATOR_PARTITIONING_POW2, + D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD, + D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN +} D3D_TESSELLATOR_PARTITIONING; + +typedef enum D3D_TESSELLATOR_OUTPUT_PRIMITIVE { + D3D_TESSELLATOR_OUTPUT_UNDEFINED = 0, + D3D_TESSELLATOR_OUTPUT_POINT = 1, + D3D_TESSELLATOR_OUTPUT_LINE = 2, + D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, + D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4, + D3D11_TESSELLATOR_OUTPUT_UNDEFINED = D3D_TESSELLATOR_OUTPUT_UNDEFINED, + D3D11_TESSELLATOR_OUTPUT_POINT = D3D_TESSELLATOR_OUTPUT_POINT, + D3D11_TESSELLATOR_OUTPUT_LINE = D3D_TESSELLATOR_OUTPUT_LINE, + D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW = D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW, + D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW = D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW +} D3D_TESSELLATOR_OUTPUT_PRIMITIVE; + +typedef enum D3D_PRIMITIVE { + D3D_PRIMITIVE_UNDEFINED = 0, + D3D_PRIMITIVE_POINT = 1, + D3D_PRIMITIVE_LINE = 2, + D3D_PRIMITIVE_TRIANGLE = 3, + D3D_PRIMITIVE_LINE_ADJ = 6, + D3D_PRIMITIVE_TRIANGLE_ADJ = 7, + D3D_PRIMITIVE_1_CONTROL_POINT_PATCH = 8, + D3D_PRIMITIVE_2_CONTROL_POINT_PATCH = 9, + D3D_PRIMITIVE_3_CONTROL_POINT_PATCH = 10, + D3D_PRIMITIVE_4_CONTROL_POINT_PATCH = 11, + D3D_PRIMITIVE_5_CONTROL_POINT_PATCH = 12, + D3D_PRIMITIVE_6_CONTROL_POINT_PATCH = 13, + D3D_PRIMITIVE_7_CONTROL_POINT_PATCH = 14, + D3D_PRIMITIVE_8_CONTROL_POINT_PATCH = 15, + D3D_PRIMITIVE_9_CONTROL_POINT_PATCH = 16, + D3D_PRIMITIVE_10_CONTROL_POINT_PATCH = 17, + D3D_PRIMITIVE_11_CONTROL_POINT_PATCH = 18, + D3D_PRIMITIVE_12_CONTROL_POINT_PATCH = 19, + D3D_PRIMITIVE_13_CONTROL_POINT_PATCH = 20, + D3D_PRIMITIVE_14_CONTROL_POINT_PATCH = 21, + D3D_PRIMITIVE_15_CONTROL_POINT_PATCH = 22, + D3D_PRIMITIVE_16_CONTROL_POINT_PATCH = 23, + D3D_PRIMITIVE_17_CONTROL_POINT_PATCH = 24, + D3D_PRIMITIVE_18_CONTROL_POINT_PATCH = 25, + D3D_PRIMITIVE_19_CONTROL_POINT_PATCH = 26, + D3D_PRIMITIVE_20_CONTROL_POINT_PATCH = 27, + D3D_PRIMITIVE_21_CONTROL_POINT_PATCH = 28, + D3D_PRIMITIVE_22_CONTROL_POINT_PATCH = 29, + D3D_PRIMITIVE_23_CONTROL_POINT_PATCH = 30, + D3D_PRIMITIVE_24_CONTROL_POINT_PATCH = 31, + D3D_PRIMITIVE_25_CONTROL_POINT_PATCH = 32, + D3D_PRIMITIVE_26_CONTROL_POINT_PATCH = 33, + D3D_PRIMITIVE_27_CONTROL_POINT_PATCH = 34, + D3D_PRIMITIVE_28_CONTROL_POINT_PATCH = 35, + D3D_PRIMITIVE_29_CONTROL_POINT_PATCH = 36, + D3D_PRIMITIVE_30_CONTROL_POINT_PATCH = 37, + D3D_PRIMITIVE_31_CONTROL_POINT_PATCH = 38, + D3D_PRIMITIVE_32_CONTROL_POINT_PATCH = 39, + D3D10_PRIMITIVE_UNDEFINED = D3D_PRIMITIVE_UNDEFINED, + D3D10_PRIMITIVE_POINT = D3D_PRIMITIVE_POINT, + D3D10_PRIMITIVE_LINE = D3D_PRIMITIVE_LINE, + D3D10_PRIMITIVE_TRIANGLE = D3D_PRIMITIVE_TRIANGLE, + D3D10_PRIMITIVE_LINE_ADJ = D3D_PRIMITIVE_LINE_ADJ, + D3D10_PRIMITIVE_TRIANGLE_ADJ = D3D_PRIMITIVE_TRIANGLE_ADJ, + D3D11_PRIMITIVE_UNDEFINED = D3D_PRIMITIVE_UNDEFINED, + D3D11_PRIMITIVE_POINT = D3D_PRIMITIVE_POINT, + D3D11_PRIMITIVE_LINE = D3D_PRIMITIVE_LINE, + D3D11_PRIMITIVE_TRIANGLE = D3D_PRIMITIVE_TRIANGLE, + D3D11_PRIMITIVE_LINE_ADJ = D3D_PRIMITIVE_LINE_ADJ, + D3D11_PRIMITIVE_TRIANGLE_ADJ = D3D_PRIMITIVE_TRIANGLE_ADJ, + D3D11_PRIMITIVE_1_CONTROL_POINT_PATCH = D3D_PRIMITIVE_1_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_2_CONTROL_POINT_PATCH = D3D_PRIMITIVE_2_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_3_CONTROL_POINT_PATCH = D3D_PRIMITIVE_3_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_4_CONTROL_POINT_PATCH = D3D_PRIMITIVE_4_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_5_CONTROL_POINT_PATCH = D3D_PRIMITIVE_5_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_6_CONTROL_POINT_PATCH = D3D_PRIMITIVE_6_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_7_CONTROL_POINT_PATCH = D3D_PRIMITIVE_7_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_8_CONTROL_POINT_PATCH = D3D_PRIMITIVE_8_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_9_CONTROL_POINT_PATCH = D3D_PRIMITIVE_9_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_10_CONTROL_POINT_PATCH = D3D_PRIMITIVE_10_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_11_CONTROL_POINT_PATCH = D3D_PRIMITIVE_11_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_12_CONTROL_POINT_PATCH = D3D_PRIMITIVE_12_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_13_CONTROL_POINT_PATCH = D3D_PRIMITIVE_13_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_14_CONTROL_POINT_PATCH = D3D_PRIMITIVE_14_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_15_CONTROL_POINT_PATCH = D3D_PRIMITIVE_15_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_16_CONTROL_POINT_PATCH = D3D_PRIMITIVE_16_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_17_CONTROL_POINT_PATCH = D3D_PRIMITIVE_17_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_18_CONTROL_POINT_PATCH = D3D_PRIMITIVE_18_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_19_CONTROL_POINT_PATCH = D3D_PRIMITIVE_19_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_20_CONTROL_POINT_PATCH = D3D_PRIMITIVE_20_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_21_CONTROL_POINT_PATCH = D3D_PRIMITIVE_21_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_22_CONTROL_POINT_PATCH = D3D_PRIMITIVE_22_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_23_CONTROL_POINT_PATCH = D3D_PRIMITIVE_23_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_24_CONTROL_POINT_PATCH = D3D_PRIMITIVE_24_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_25_CONTROL_POINT_PATCH = D3D_PRIMITIVE_25_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_26_CONTROL_POINT_PATCH = D3D_PRIMITIVE_26_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_27_CONTROL_POINT_PATCH = D3D_PRIMITIVE_27_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_28_CONTROL_POINT_PATCH = D3D_PRIMITIVE_28_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_29_CONTROL_POINT_PATCH = D3D_PRIMITIVE_29_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_30_CONTROL_POINT_PATCH = D3D_PRIMITIVE_30_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_31_CONTROL_POINT_PATCH = D3D_PRIMITIVE_31_CONTROL_POINT_PATCH, + D3D11_PRIMITIVE_32_CONTROL_POINT_PATCH = D3D_PRIMITIVE_32_CONTROL_POINT_PATCH +} D3D_PRIMITIVE; + +typedef enum _D3D_SHADER_INPUT_TYPE { + D3D_SIT_CBUFFER = 0, + D3D_SIT_TBUFFER = (D3D_SIT_CBUFFER + 1), + D3D_SIT_TEXTURE = (D3D_SIT_TBUFFER + 1), + D3D_SIT_SAMPLER = (D3D_SIT_TEXTURE + 1), + D3D_SIT_UAV_RWTYPED = (D3D_SIT_SAMPLER + 1), + D3D_SIT_STRUCTURED = (D3D_SIT_UAV_RWTYPED + 1), + D3D_SIT_UAV_RWSTRUCTURED = (D3D_SIT_STRUCTURED + 1), + D3D_SIT_BYTEADDRESS = (D3D_SIT_UAV_RWSTRUCTURED + 1), + D3D_SIT_UAV_RWBYTEADDRESS = (D3D_SIT_BYTEADDRESS + 1), + D3D_SIT_UAV_APPEND_STRUCTURED = (D3D_SIT_UAV_RWBYTEADDRESS + 1), + D3D_SIT_UAV_CONSUME_STRUCTURED = (D3D_SIT_UAV_APPEND_STRUCTURED + 1), + D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER = (D3D_SIT_UAV_CONSUME_STRUCTURED + 1), + D3D_SIT_RTACCELERATIONSTRUCTURE = (D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER + 1), + D3D_SIT_UAV_FEEDBACKTEXTURE = (D3D_SIT_RTACCELERATIONSTRUCTURE + 1), + D3D10_SIT_CBUFFER = D3D_SIT_CBUFFER, + D3D10_SIT_TBUFFER = D3D_SIT_TBUFFER, + D3D10_SIT_TEXTURE = D3D_SIT_TEXTURE, + D3D10_SIT_SAMPLER = D3D_SIT_SAMPLER, + D3D11_SIT_UAV_RWTYPED = D3D_SIT_UAV_RWTYPED, + D3D11_SIT_STRUCTURED = D3D_SIT_STRUCTURED, + D3D11_SIT_UAV_RWSTRUCTURED = D3D_SIT_UAV_RWSTRUCTURED, + D3D11_SIT_BYTEADDRESS = D3D_SIT_BYTEADDRESS, + D3D11_SIT_UAV_RWBYTEADDRESS = D3D_SIT_UAV_RWBYTEADDRESS, + D3D11_SIT_UAV_APPEND_STRUCTURED = D3D_SIT_UAV_APPEND_STRUCTURED, + D3D11_SIT_UAV_CONSUME_STRUCTURED = D3D_SIT_UAV_CONSUME_STRUCTURED, + D3D11_SIT_UAV_RWSTRUCTURED_WITH_COUNTER = D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER +} D3D_SHADER_INPUT_TYPE; + +typedef enum D3D_SRV_DIMENSION { + D3D_SRV_DIMENSION_UNKNOWN = 0, + D3D_SRV_DIMENSION_BUFFER = 1, + D3D_SRV_DIMENSION_TEXTURE1D = 2, + D3D_SRV_DIMENSION_TEXTURE1DARRAY = 3, + D3D_SRV_DIMENSION_TEXTURE2D = 4, + D3D_SRV_DIMENSION_TEXTURE2DARRAY = 5, + D3D_SRV_DIMENSION_TEXTURE2DMS = 6, + D3D_SRV_DIMENSION_TEXTURE2DMSARRAY = 7, + D3D_SRV_DIMENSION_TEXTURE3D = 8, + D3D_SRV_DIMENSION_TEXTURECUBE = 9, + D3D_SRV_DIMENSION_TEXTURECUBEARRAY = 10, + D3D_SRV_DIMENSION_BUFFEREX = 11, + D3D10_SRV_DIMENSION_UNKNOWN = D3D_SRV_DIMENSION_UNKNOWN, + D3D10_SRV_DIMENSION_BUFFER = D3D_SRV_DIMENSION_BUFFER, + D3D10_SRV_DIMENSION_TEXTURE1D = D3D_SRV_DIMENSION_TEXTURE1D, + D3D10_SRV_DIMENSION_TEXTURE1DARRAY = D3D_SRV_DIMENSION_TEXTURE1DARRAY, + D3D10_SRV_DIMENSION_TEXTURE2D = D3D_SRV_DIMENSION_TEXTURE2D, + D3D10_SRV_DIMENSION_TEXTURE2DARRAY = D3D_SRV_DIMENSION_TEXTURE2DARRAY, + D3D10_SRV_DIMENSION_TEXTURE2DMS = D3D_SRV_DIMENSION_TEXTURE2DMS, + D3D10_SRV_DIMENSION_TEXTURE2DMSARRAY = D3D_SRV_DIMENSION_TEXTURE2DMSARRAY, + D3D10_SRV_DIMENSION_TEXTURE3D = D3D_SRV_DIMENSION_TEXTURE3D, + D3D10_SRV_DIMENSION_TEXTURECUBE = D3D_SRV_DIMENSION_TEXTURECUBE, + D3D10_1_SRV_DIMENSION_UNKNOWN = D3D_SRV_DIMENSION_UNKNOWN, + D3D10_1_SRV_DIMENSION_BUFFER = D3D_SRV_DIMENSION_BUFFER, + D3D10_1_SRV_DIMENSION_TEXTURE1D = D3D_SRV_DIMENSION_TEXTURE1D, + D3D10_1_SRV_DIMENSION_TEXTURE1DARRAY = D3D_SRV_DIMENSION_TEXTURE1DARRAY, + D3D10_1_SRV_DIMENSION_TEXTURE2D = D3D_SRV_DIMENSION_TEXTURE2D, + D3D10_1_SRV_DIMENSION_TEXTURE2DARRAY = D3D_SRV_DIMENSION_TEXTURE2DARRAY, + D3D10_1_SRV_DIMENSION_TEXTURE2DMS = D3D_SRV_DIMENSION_TEXTURE2DMS, + D3D10_1_SRV_DIMENSION_TEXTURE2DMSARRAY = D3D_SRV_DIMENSION_TEXTURE2DMSARRAY, + D3D10_1_SRV_DIMENSION_TEXTURE3D = D3D_SRV_DIMENSION_TEXTURE3D, + D3D10_1_SRV_DIMENSION_TEXTURECUBE = D3D_SRV_DIMENSION_TEXTURECUBE, + D3D10_1_SRV_DIMENSION_TEXTURECUBEARRAY = D3D_SRV_DIMENSION_TEXTURECUBEARRAY, + D3D11_SRV_DIMENSION_UNKNOWN = D3D_SRV_DIMENSION_UNKNOWN, + D3D11_SRV_DIMENSION_BUFFER = D3D_SRV_DIMENSION_BUFFER, + D3D11_SRV_DIMENSION_TEXTURE1D = D3D_SRV_DIMENSION_TEXTURE1D, + D3D11_SRV_DIMENSION_TEXTURE1DARRAY = D3D_SRV_DIMENSION_TEXTURE1DARRAY, + D3D11_SRV_DIMENSION_TEXTURE2D = D3D_SRV_DIMENSION_TEXTURE2D, + D3D11_SRV_DIMENSION_TEXTURE2DARRAY = D3D_SRV_DIMENSION_TEXTURE2DARRAY, + D3D11_SRV_DIMENSION_TEXTURE2DMS = D3D_SRV_DIMENSION_TEXTURE2DMS, + D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY = D3D_SRV_DIMENSION_TEXTURE2DMSARRAY, + D3D11_SRV_DIMENSION_TEXTURE3D = D3D_SRV_DIMENSION_TEXTURE3D, + D3D11_SRV_DIMENSION_TEXTURECUBE = D3D_SRV_DIMENSION_TEXTURECUBE, + D3D11_SRV_DIMENSION_TEXTURECUBEARRAY = D3D_SRV_DIMENSION_TEXTURECUBEARRAY, + D3D11_SRV_DIMENSION_BUFFEREX = D3D_SRV_DIMENSION_BUFFEREX +} D3D_SRV_DIMENSION; + +typedef enum D3D_INTERPOLATION_MODE { + D3D_INTERPOLATION_UNDEFINED = 0, + D3D_INTERPOLATION_CONSTANT = 1, + D3D_INTERPOLATION_LINEAR = 2, + D3D_INTERPOLATION_LINEAR_CENTROID = 3, + D3D_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4, + D3D_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5, + D3D_INTERPOLATION_LINEAR_SAMPLE = 6, + D3D_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7 +} D3D_INTERPOLATION_MODE; + +typedef enum _D3D_PARAMETER_FLAGS { + D3D_PF_NONE = 0, + D3D_PF_IN = 0x1, + D3D_PF_OUT = 0x2, + D3D_PF_FORCE_DWORD = 0x7fffffff +} D3D_PARAMETER_FLAGS; +////////////////////////////////////////////////////////////////////////// + // end of Mini Win32 for D3D12 #endif /*__mini_d3d12_h__*/ diff --git a/src/runtime/runtime_atomics.h b/src/runtime/runtime_atomics.h index 61139a622d75..1e9a2367c3e0 100644 --- a/src/runtime/runtime_atomics.h +++ b/src/runtime/runtime_atomics.h @@ -39,27 +39,27 @@ ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) { } template::type> -ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) { +ALWAYS_INLINE TV atomic_fetch_add_sequentially_consistent(T *addr, TV val) { return __sync_fetch_and_add(addr, val); } template::type> -ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) { +ALWAYS_INLINE TV atomic_fetch_sub_sequentially_consistent(T *addr, TV val) { return __sync_fetch_and_sub(addr, val); } template::type> -ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) { +ALWAYS_INLINE TV atomic_fetch_or_sequentially_consistent(T *addr, TV val) { return __sync_fetch_and_or(addr, val); } -template -ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) { +template::type> +ALWAYS_INLINE TV atomic_add_fetch_sequentially_consistent(T *addr, TV val) { return __sync_add_and_fetch(addr, val); } -template -ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) { +template::type> +ALWAYS_INLINE TV atomic_sub_fetch_sequentially_consistent(T *addr, TV val) { return __sync_sub_and_fetch(addr, val); } @@ -103,7 +103,7 @@ ALWAYS_INLINE T atomic_fetch_and_release(T *addr, T val) { } template::type> -ALWAYS_INLINE T atomic_fetch_and_sequentially_consistent(T *addr, TV val) { +ALWAYS_INLINE TV atomic_fetch_and_sequentially_consistent(T *addr, TV val) { return __sync_fetch_and_and(addr, val); } @@ -165,27 +165,27 @@ ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) { } template::type> -ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) { +ALWAYS_INLINE TV atomic_fetch_add_sequentially_consistent(T *addr, TV val) { return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST); } template::type> -ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) { +ALWAYS_INLINE TV atomic_fetch_sub_sequentially_consistent(T *addr, TV val) { return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST); } template::type> -ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) { +ALWAYS_INLINE TV atomic_fetch_or_sequentially_consistent(T *addr, TV val) { return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST); } -template -ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) { +template::type> +ALWAYS_INLINE TV atomic_add_fetch_sequentially_consistent(T *addr, TV val) { return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST); } -template -ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) { +template::type> +ALWAYS_INLINE TV atomic_sub_fetch_sequentially_consistent(T *addr, TV val) { return __atomic_sub_fetch(addr, val, __ATOMIC_SEQ_CST); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bab103196493..613bf2a03ea6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -17,7 +17,8 @@ endif () add_halide_test(_test_internal GROUPS internal) -Halide_feature(WITH_TEST_AUTO_SCHEDULE "Build autoscheduler tests" ON) +Halide_feature(WITH_TEST_AUTO_SCHEDULE "Build autoscheduler tests" AUTO + DEPENDS WITH_AUTOSCHEDULERS) if (WITH_TEST_AUTO_SCHEDULE) add_subdirectory(autoschedulers) endif () diff --git a/test/autoschedulers/adams2019/CMakeLists.txt b/test/autoschedulers/adams2019/CMakeLists.txt index 55a66cf20440..50fb103a3f28 100644 --- a/test/autoschedulers/adams2019/CMakeLists.txt +++ b/test/autoschedulers/adams2019/CMakeLists.txt @@ -1,8 +1,3 @@ -if (NOT TARGET Halide::Adams2019) - message(STATUS "Disabling adams2019 tests for static Halide") - return() -endif () - ## # Build rules for the Adams2019 autoscheduler library ## diff --git a/test/autoschedulers/anderson2021/CMakeLists.txt b/test/autoschedulers/anderson2021/CMakeLists.txt index 37cf5b40bcc4..4fd7649b1c9f 100644 --- a/test/autoschedulers/anderson2021/CMakeLists.txt +++ b/test/autoschedulers/anderson2021/CMakeLists.txt @@ -1,8 +1,3 @@ -if (NOT TARGET Halide::Anderson2021) - message(STATUS "Disabling anderson2021 tests for static Halide") - return() -endif () - ## # Build rules for the Anderson2021 autoscheduler library ## diff --git a/test/autoschedulers/li2018/CMakeLists.txt b/test/autoschedulers/li2018/CMakeLists.txt index ee49c585e8c1..2d4db3070554 100644 --- a/test/autoschedulers/li2018/CMakeLists.txt +++ b/test/autoschedulers/li2018/CMakeLists.txt @@ -1,8 +1,3 @@ -if (NOT TARGET Halide::Li2018) - message(STATUS "Disabling li2018 tests for static Halide") - return() -endif () - add_halide_generator(li2018_demo_gradient.generator SOURCES demo_generator.cpp) @@ -31,14 +26,14 @@ if (WITH_PYTHON_BINDINGS) if (Halide_TARGET MATCHES "webgpu") message(WARNING "li2018_gradient_autoscheduler_test_py is not supported with WebGPU.") else() - find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module) + find_package(Python 3.8 REQUIRED COMPONENTS Interpreter Development.Module) add_test( NAME li2018_gradient_autoscheduler_test_py - COMMAND ${Halide_PYTHON_LAUNCHER} "$" "${CMAKE_CURRENT_SOURCE_DIR}/test.py" $ + COMMAND ${Halide_PYTHON_LAUNCHER} "$" "${CMAKE_CURRENT_SOURCE_DIR}/test.py" $ ) - set(PYTHONPATH "$/..") + set(PYTHONPATH "$/..>") list(TRANSFORM PYTHONPATH PREPEND "PYTHONPATH=path_list_prepend:") set_tests_properties(li2018_gradient_autoscheduler_test_py PROPERTIES diff --git a/test/autoschedulers/mullapudi2016/CMakeLists.txt b/test/autoschedulers/mullapudi2016/CMakeLists.txt index 2fa20e801cd6..7fb9cd156b53 100644 --- a/test/autoschedulers/mullapudi2016/CMakeLists.txt +++ b/test/autoschedulers/mullapudi2016/CMakeLists.txt @@ -1,8 +1,3 @@ -if (NOT TARGET Halide::Mullapudi2016) - message(STATUS "Disabling mullapudi2016 tests for static Halide") - return() -endif() - tests(GROUPS mullapudi2016 autoschedulers_cpu SOURCES extern.cpp diff --git a/test/correctness/simd_op_check_sve2.cpp b/test/correctness/simd_op_check_sve2.cpp index 9f6b958d6c2a..d0e614d657a2 100644 --- a/test/correctness/simd_op_check_sve2.cpp +++ b/test/correctness/simd_op_check_sve2.cpp @@ -643,8 +643,13 @@ class SimdOpCheckArmSve : public SimdOpCheckTest { // No corresponding instructions exists for is_nan, is_inf, is_finite. // The instructions expected to be generated depends on CodeGen_LLVM::visit(const Call *op) add_arm64("nan", is_vector ? sel_op("", "fcmge", "fcmuo") : "fcmp", is_nan(f_1)); - add_arm64("inf", {{"fabs", bits, force_vectorized_lanes}}, vf, is_inf(f_1)); - add_arm64("finite", {{"fabs", bits, force_vectorized_lanes}}, vf, is_inf(f_1)); + if (Halide::Internal::get_llvm_version() >= 200) { + add_arm64("inf", is_vector ? sel_op("", "fcmge", "fcmeq") : "", is_inf(f_1)); + add_arm64("finite", is_vector ? sel_op("", "fcmge", "fcmeq") : "", is_inf(f_1)); + } else { + add_arm64("inf", {{"fabs", bits, force_vectorized_lanes}}, vf, is_inf(f_1)); + add_arm64("finite", {{"fabs", bits, force_vectorized_lanes}}, vf, is_inf(f_1)); + } } if (bits == 16) { diff --git a/test/generator/CMakeLists.txt b/test/generator/CMakeLists.txt index 431bd4d795ff..35d0ac13e32e 100644 --- a/test/generator/CMakeLists.txt +++ b/test/generator/CMakeLists.txt @@ -163,7 +163,6 @@ function(_add_halide_libraries TARGET) target_link_libraries(${TARGET} INTERFACE ${args_EXTERNS}) endif() - if (NOT args_OMIT_C_BACKEND) # The C backend basically ignores TARGETS (it emits a warning that the sources # will be compiled with the current CMake toolchain)... but making matters worse, @@ -583,6 +582,9 @@ if (${_USING_WASM}) # wasm doesn't support multitargets # TODO: currently, Halide_CMAKE_TARGET == Halide_HOST_TARGET when building for Emscripten; we should fix this set(MDT_TARGETS ${Halide_TARGET}) +elseif (Halide_CMAKE_TARGET MATCHES ";") + # multiarch doesn't support multitargets + set(MDT_TARGETS cmake) else() set(MDT_TARGETS cmake-no_bounds_query cmake) endif() @@ -629,7 +631,7 @@ if (NOT Halide_TARGET MATCHES "windows" AND NOT CMAKE_SYSTEM_NAME MATCHES "Windo endif () # multitarget and wasm don't mix well -if (NOT _USING_WASM) +if (NOT _USING_WASM AND NOT Halide_CMAKE_TARGET MATCHES ";") # multitarget_aottest.cpp # multitarget_generator.cpp _add_halide_libraries(multitarget diff --git a/test/performance/matrix_multiplication.cpp b/test/performance/matrix_multiplication.cpp index 873c2f06cb7a..8b37b001f7df 100644 --- a/test/performance/matrix_multiplication.cpp +++ b/test/performance/matrix_multiplication.cpp @@ -30,44 +30,78 @@ int main(int argc, char **argv) { ImageParam A(type_of(), 2); ImageParam B(type_of(), 2); - Var x("x"), xi("xi"), xo("xo"), y("y"), yo("yo"), yi("yi"), yii("yii"), xii("xii"); - Func matrix_mul("matrix_mul"); - + Var x("x"), y("y"); RDom k(0, matrix_size); - RVar ki; + + Func matrix_mul("matrix_mul"); matrix_mul(x, y) += A(k, y) * B(x, k); Func out; out(x, y) = matrix_mul(x, y); - Var xy; + // Now the schedule. Single-threaded, it hits 155 GFlops on Skylake-X + // i9-9960x with AVX-512 (80% of peak), and 87 GFlops with AVX2 (90% of + // peak). + // + // Using 16 threads (and no hyperthreading), hits 2080 GFlops (67% of peak) + // and 1310 GFLops (85% of peak) respectively. - out.tile(x, y, xi, yi, 24, 32) - .fuse(x, y, xy) - .parallel(xy) - .split(yi, yi, yii, 4) - .vectorize(xi, 8) + const int vec = target.natural_vector_size(); + + // Size the inner loop tiles to fit into the number of registers available + // on the target, using either 12 accumulator registers or 24. + const int inner_tile_x = 3 * vec; + const int inner_tile_y = (target.has_feature(Target::AVX512) || target.arch != Target::X86) ? 8 : 4; + + // The shape of the outer tiling + const int tile_y = matrix_size / 4; + const int tile_k = matrix_size / 16; + + Var xy("xy"), xi("xi"), yi("yi"), yii("yii"); + + out.tile(x, y, xi, yi, inner_tile_x, tile_y) + .split(yi, yi, yii, inner_tile_y) + .vectorize(xi, vec) .unroll(xi) - .unroll(yii); + .unroll(yii) + .fuse(x, y, xy) + .parallel(xy); + + RVar ko("ko"), ki("ki"); + Var z("z"); + matrix_mul.update().split(k, ko, ki, tile_k); + + // Factor the reduction so that we can do outer blocking over the reduction + // dimension. + Func intm = matrix_mul.update().rfactor(ko, z); - matrix_mul.compute_at(out, yi) - .vectorize(x, 8) + intm.compute_at(matrix_mul, y) + .vectorize(x, vec) + .unroll(x) .unroll(y); - matrix_mul.update(0) - .reorder(x, y, k) - .vectorize(x, 8) + intm.update(0) + .reorder(x, y, ki) + .vectorize(x, vec) .unroll(x) - .unroll(y) - .unroll(k, 2); + .unroll(y); + + matrix_mul.compute_at(out, xy) + .vectorize(x, vec) + .unroll(x); + + matrix_mul.update() + .split(y, y, yi, inner_tile_y) + .reorder(x, yi, y, ko) + .vectorize(x, vec) + .unroll(x) + .unroll(yi); out .bound(x, 0, matrix_size) .bound(y, 0, matrix_size); - out.compile_jit(); - Buffer mat_A(matrix_size, matrix_size); Buffer mat_B(matrix_size, matrix_size); Buffer output(matrix_size, matrix_size); diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 275a94184916..0e338b6de84b 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -63,6 +63,15 @@ target_link_libraries( Halide_Generator INTERFACE "$" ) +## gengen tool for building runtimes in add_halide_runtime +add_executable(Halide_GenRT GenGen.cpp) +add_executable(Halide::GenRT ALIAS Halide_GenRT) +set_target_properties(Halide_GenRT PROPERTIES EXPORT_NAME GenRT OUTPUT_NAME gengen) + +target_link_libraries(Halide_GenRT PRIVATE Halide::Halide ${CMAKE_DL_LIBS}) + +_Halide_place_dll(Halide_GenRT) + ## # Dependency-free header-only libs ## diff --git a/tutorial/CMakeLists.txt b/tutorial/CMakeLists.txt index 6e872e2153d4..6f8c624a4617 100644 --- a/tutorial/CMakeLists.txt +++ b/tutorial/CMakeLists.txt @@ -162,13 +162,13 @@ else () target_link_libraries(lesson_16_rgb_generate PRIVATE Halide::Generator) add_halide_library(brighten_planar FROM lesson_16_rgb_generate - GENERATOR brighten PARAMS layout=planar) + TARGETS cmake GENERATOR brighten PARAMS layout=planar) add_halide_library(brighten_interleaved FROM lesson_16_rgb_generate - GENERATOR brighten PARAMS layout=interleaved) + TARGETS cmake GENERATOR brighten PARAMS layout=interleaved) add_halide_library(brighten_either FROM lesson_16_rgb_generate - GENERATOR brighten PARAMS layout=either) + TARGETS cmake GENERATOR brighten PARAMS layout=either) add_halide_library(brighten_specialized FROM lesson_16_rgb_generate - GENERATOR brighten PARAMS layout=specialized) + TARGETS cmake GENERATOR brighten PARAMS layout=specialized) add_executable(lesson_16_rgb_run lesson_16_rgb_run.cpp) target_link_libraries(lesson_16_rgb_run PRIVATE diff --git a/tutorial/lesson_12_using_the_gpu.cpp b/tutorial/lesson_12_using_the_gpu.cpp index 3fc108a87e82..d4cbff2c6245 100644 --- a/tutorial/lesson_12_using_the_gpu.cpp +++ b/tutorial/lesson_12_using_the_gpu.cpp @@ -112,6 +112,12 @@ class MyPipeline { // Now a schedule that uses CUDA or OpenCL. bool schedule_for_gpu() { Target target = find_gpu_target(); + + std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << std::endl; + std::cout << target << std::endl; + std::cout << target.get_d3d12_capability_lower_bound() << std::endl; + std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << std::endl; + if (!target.has_gpu_feature()) { return false; } @@ -248,6 +254,8 @@ class MyPipeline { } }; +#include + int main(int argc, char **argv) { // Load an input image. Buffer input = load_image("images/rgb.png"); @@ -261,6 +269,7 @@ int main(int argc, char **argv) { p1.curved.realize(reference_output); printf("Running pipeline on GPU:\n"); + putenv("HL_DEBUG_CODEGEN=1"); MyPipeline p2(input); bool has_gpu_target = p2.schedule_for_gpu(); if (has_gpu_target) { @@ -288,6 +297,7 @@ Target find_gpu_target() { Target target = get_host_target(); std::vector features_to_try; + //features_to_try.push_back(Target::CUDA); if (target.os == Target::Windows) { // Try D3D12 first; if that fails, try OpenCL. if (sizeof(void*) == 8) { @@ -307,6 +317,14 @@ Target find_gpu_target() { for (Target::Feature f : features_to_try) { Target new_target = target.with_feature(f); + if (new_target.has_feature(Target::D3D12Compute)) + { + new_target = new_target.with_feature(Target::D3D12ComputeSM60); + } + //if (new_target.has_feature(Target::CUDA)) + //{ + // new_target = new_target.with_feature(Target::CUDACapability80); + //} if (host_supports_target_device(new_target)) { return new_target; }