Expose parts of <cuda/std/memory> (#2502)

This exposes some parts of `<memory>` that are frequently used and safe to use everywhere. We do not expose some features like allocators and smart pointers until we are sure that they are usefull and properly implemented. Co-authored-by: Michael Schellenberger Costa <[email protected]>
NVIDIA · Oct 15, 2024 · 5e1c74f · 5e1c74f
1 parent 25bd198
commit 5e1c74f
Show file tree

Hide file tree

Showing 29 changed files with 329 additions and 77 deletions.
diff --git a/docs/libcudacxx/standard_api.rst b/docs/libcudacxx/standard_api.rst
@@ -74,6 +74,8 @@ Feature availability:
       they need to be used similar to type traits as language concepts
       are not available.
 
+-  C++20 ``std::assume_aligned`` in ``<memory>`` is available in C++11.
+
 -  C++20 ``<ranges>`` are available in C++17.
 
    -  all ``<ranges>`` concepts are available in C++17. However, they

diff --git a/docs/libcudacxx/standard_api/utility_library.rst b/docs/libcudacxx/standard_api/utility_library.rst
@@ -10,6 +10,7 @@ Utility Library
    utility_library/bitset
    utility_library/expected
    utility_library/functional
+   utility_library/memory
    utility_library/optional
    utility_library/tuple
    utility_library/type_traits
@@ -34,6 +35,9 @@ the information about the individual features for details.
      - Optional value with error channel
      - CCCL 2.3.0 / CUDA 12.4
    * - :ref:`libcudacxx-standard-api-utility-functional`
+     - ``std::assume_aligned``
+     - CCCL 2.9.0 / CUDA 12.9
+   * - :ref:`libcudacxx-standard-api-utility-memory`
      - Function objects and function wrappers
      - libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.2
    * - :ref:`libcudacxx-standard-api-utility-optional`

diff --git a/docs/libcudacxx/standard_api/utility_library/memory.rst b/docs/libcudacxx/standard_api/utility_library/memory.rst
@@ -0,0 +1,25 @@
+.. _libcudacxx-standard-api-utility-memory:
+
+<cuda/std/memory>
+===================
+
+Provided functionalities
+------------------------
+
+- ``cuda::std::addressof``. See the C++ documentation of `std::addressof <https://en.cppreference.com/w/cpp/memory/addressof>`_
+- ``cuda::std::align``. See the C++ documentation of `std::align <https://en.cppreference.com/w/cpp/memory/align>`_
+- ``cuda::std::assume_aligned``. See the C++ documentation of `std::assume_aligned <https://en.cppreference.com/w/cpp/memory/assume_aligned>`_
+- Uninitialized memory algorithms. See the C++ documentation `<https://en.cppreference.com/w/cpp/memory>`_
+
+Extensions
+----------
+
+-  Most features are available from C++11 onwards.
+-  ``cuda::std::addressof`` is constexpr from C++11 on if compiler support is available
+-  ``cuda::std::assume_aligned`` is constexpr from C++14 on
+
+Restrictions
+------------
+
+- `construct_at` and is only available in C++20 as that is explicitly mentioned in the standard
+- The specialized memory algorithms are not parallel
diff --git a/libcudacxx/include/cuda/std/__cccl/builtin.h b/libcudacxx/include/cuda/std/__cccl/builtin.h
@@ -84,7 +84,17 @@
 
 #if _CCCL_HAS_BUILTIN(__array_extent)
 #  define _CCCL_BUILTIN_ARRAY_EXTENT(...) __array_extent(__VA_ARGS__)
-#endif // _CCCL_HAS_BUILTIN(array_extent)
+#endif // _CCCL_HAS_BUILTIN(__array_extent)
+
+#if _CCCL_HAS_BUILTIN(__builtin_assume_aligned) || (defined(_CCCL_COMPILER_MSVC) && _CCCL_MSVC_VERSION >= 1923) \
+  || defined(_CCCL_COMPILER_GCC)
+#  define _CCCL_BUILTIN_ASSUME_ALIGNED(...) __builtin_assume_aligned(__VA_ARGS__)
+#endif // _CCCL_HAS_BUILTIN(__builtin_assume_aligned)
+
+// NVCC below 11.2 treats this as a host only function
+#if defined(_CCCL_CUDACC_BELOW_11_2)
+#  undef _CCCL_BUILTIN_ASSUME_ALIGNED
+#endif // _CCCL_CUDACC_BELOW_11_2
 
 // nvhpc has a bug where it supports __builtin_addressof but does not mark it via _CCCL_CHECK_BUILTIN
 #if _CCCL_CHECK_BUILTIN(builtin_addressof) || (defined(_CCCL_COMPILER_GCC) && _CCCL_GCC_VERSION >= 70000) \

diff --git a/libcudacxx/include/cuda/std/__memory/assume_aligned.h b/libcudacxx/include/cuda/std/__memory/assume_aligned.h
@@ -0,0 +1,53 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___MEMORY_ASSUME_ALIGNED_H
+#define _LIBCUDACXX___MEMORY_ASSUME_ALIGNED_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__bit/has_single_bit.h>
+#include <cuda/std/__type_traits/is_constant_evaluated.h>
+#include <cuda/std/cstddef> // size_t
+#include <cuda/std/cstdint> // uintptr_t
+
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+
+template <size_t _Align, class _Tp>
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Tp* assume_aligned(_Tp* __ptr) noexcept
+{
+  static_assert(_CUDA_VSTD::has_single_bit(_Align), "std::assume_aligned requires the alignment to be a power of 2!");
+#if defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) && defined(_CCCL_BUILTIN_ASSUME_ALIGNED)
+  if (!_CCCL_BUILTIN_IS_CONSTANT_EVALUATED())
+  {
+#  if !defined(_CCCL_COMPILER_MSVC) // MSVC checks within the builtin
+    _CCCL_ASSERT(reinterpret_cast<uintptr_t>(__ptr) % _Align == 0, "Alignment assumption is violated");
+#  endif // !_CCCL_COMPILER_MSVC
+    return static_cast<_Tp*>(_CCCL_BUILTIN_ASSUME_ALIGNED(__ptr, _Align));
+  }
+  else
+#endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && _CCCL_BUILTIN_ASSUME_ALIGNED
+  {
+    return __ptr;
+  }
+}
+
+_LIBCUDACXX_END_NAMESPACE_STD
+
+#endif // _LIBCUDACXX___MEMORY_ASSUME_ALIGNED_H
diff --git a/libcudacxx/include/cuda/std/__memory/construct_at.h b/libcudacxx/include/cuda/std/__memory/construct_at.h
@@ -4,7 +4,7 @@
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES
 //
 //===----------------------------------------------------------------------===//
 
@@ -185,8 +185,8 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _ForwardIterator __destroy(_Forw
 
 _CCCL_EXEC_CHECK_DISABLE
 template <class _Tp,
-          __enable_if_t<!is_array<_Tp>::value, int>                  = 0,
-          __enable_if_t<!is_trivially_destructible<_Tp>::value, int> = 0>
+          __enable_if_t<!_CCCL_TRAIT(is_array, _Tp), int>                  = 0,
+          __enable_if_t<!_CCCL_TRAIT(is_trivially_destructible, _Tp), int> = 0>
 _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
 {
   _CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
@@ -195,22 +195,20 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
 
 _CCCL_EXEC_CHECK_DISABLE
 template <class _Tp,
-          __enable_if_t<!is_array<_Tp>::value, int>                 = 0,
-          __enable_if_t<is_trivially_destructible<_Tp>::value, int> = 0>
+          __enable_if_t<!_CCCL_TRAIT(is_array, _Tp), int>                 = 0,
+          __enable_if_t<_CCCL_TRAIT(is_trivially_destructible, _Tp), int> = 0>
 _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
 {
   _CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
   (void) __loc;
 }
 
-#if _CCCL_STD_VER >= 2020
-template <class _Tp, __enable_if_t<is_array<_Tp>::value, int> = 0>
-_LIBCUDACXX_HIDE_FROM_ABI constexpr void __destroy_at(_Tp* __loc)
+template <class _Tp, __enable_if_t<_CCCL_TRAIT(is_array, _Tp), int> = 0>
+_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
 {
   _CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
   _CUDA_VSTD::__destroy(_CUDA_VSTD::begin(*__loc), _CUDA_VSTD::end(*__loc));
 }
-#endif // _CCCL_STD_VER >= 2020
 
 template <class _ForwardIterator>
 _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _ForwardIterator
@@ -235,22 +233,18 @@ __reverse_destroy(_BidirectionalIterator __first, _BidirectionalIterator __last)
   return __last;
 }
 
-#if _CCCL_STD_VER >= 2017
-
-template <class _Tp, enable_if_t<!is_array_v<_Tp>, int> = 0>
-_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc) noexcept
+template <class _Tp, __enable_if_t<!_CCCL_TRAIT(is_array, _Tp), int> = 0>
+_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc)
 {
   _CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
   __loc->~_Tp();
 }
 
-#  if _CCCL_STD_VER >= 2020
-template <class _Tp, enable_if_t<is_array_v<_Tp>, int> = 0>
-_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc) noexcept
+template <class _Tp, __enable_if_t<_CCCL_TRAIT(is_array, _Tp), int> = 0>
+_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc)
 {
   _CUDA_VSTD::__destroy_at(__loc);
 }
-#  endif // _CCCL_STD_VER >= 2020
 
 template <class _ForwardIterator>
 _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy(_ForwardIterator __first, _ForwardIterator __last) noexcept
@@ -268,8 +262,6 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 _ForwardIterator destroy_n(_Forw
   return __first;
 }
 
-#endif // _CCCL_STD_VER >= 2017
-
 _LIBCUDACXX_END_NAMESPACE_STD
 
 #endif // _LIBCUDACXX___MEMORY_CONSTRUCT_AT_H
diff --git a/libcudacxx/include/cuda/std/__memory_ b/libcudacxx/include/cuda/std/__memory_
@@ -8,8 +8,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _CUDA_STD_MEMORY
-#define _CUDA_STD_MEMORY
+#ifndef _CUDA_STD___MEMORY_
+#define _CUDA_STD___MEMORY_
 
 #include <cuda/std/detail/__config>
 
@@ -21,25 +21,14 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__memory/addressof.h>
-#include <cuda/std/__memory/align.h>
 #include <cuda/std/__memory/allocate_at_least.h>
 #include <cuda/std/__memory/allocation_guard.h>
 #include <cuda/std/__memory/allocator.h>
 #include <cuda/std/__memory/allocator_arg_t.h>
 #include <cuda/std/__memory/allocator_traits.h>
-#include <cuda/std/__memory/construct_at.h>
-#include <cuda/std/__memory/pointer_traits.h>
-#include <cuda/std/__memory/uninitialized_algorithms.h>
+#include <cuda/std/__memory/temporary_buffer.h>
 #include <cuda/std/__memory/unique_ptr.h>
 #include <cuda/std/__memory/uses_allocator.h>
+#include <cuda/std/memory>
 
-// standard-mandated includes
-#include <cuda/std/version>
-
-// [memory.syn]
-#ifndef _LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
-#  include <cuda/std/compare>
-#endif // !_LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
-
-#endif // _CUDA_STD_MEMORY
+#endif // _CUDA_STD___MEMORY_
diff --git a/libcudacxx/include/cuda/std/memory b/libcudacxx/include/cuda/std/memory
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD_MEMORY
+#define _CUDA_STD_MEMORY
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__memory/addressof.h>
+#include <cuda/std/__memory/align.h>
+#include <cuda/std/__memory/assume_aligned.h>
+#include <cuda/std/__memory/construct_at.h>
+#include <cuda/std/__memory/pointer_traits.h>
+#include <cuda/std/__memory/uninitialized_algorithms.h>
+
+// standard-mandated includes
+#include <cuda/std/version>
+
+// [memory.syn]
+#ifndef _LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
+#  include <cuda/std/compare>
+#endif // !_LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
+
+#endif // _CUDA_STD_MEMORY
diff --git a/libcudacxx/test/libcudacxx/std/containers/sequences/array/size_and_alignment.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/sequences/array/size_and_alignment.pass.cpp
@@ -29,7 +29,7 @@
 TEST_NV_DIAG_SUPPRESS(cuda_demote_unsupported_floating_point)
 
 #if defined(TEST_COMPILER_MSVC)
-#  pragma warning(disable : 4324)
+#  pragma warning(disable : 4324) // structure was padded due to alignment specifier
 #endif // TEST_COMPILER_MSVC
 
 template <class T, cuda::std::size_t Size>

diff --git a/...est/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp b/...est/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp
@@ -23,7 +23,7 @@
 #include "test_macros.h"
 
 #if defined(TEST_COMPILER_MSVC)
-#  pragma warning(disable : 4324)
+#  pragma warning(disable : 4324) // structure was padded due to alignment specifier
 #endif // TEST_COMPILER_MSVC
 
 #ifdef TEST_HAS_NO_ALIGNED_ALLOCATION

diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/align.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/align.pass.cpp
@@ -11,9 +11,9 @@
 
 // void* align(size_t alignment, size_t size, void*& ptr, size_t& space);
 
-#include <cuda/std/__memory_>
 #include <cuda/std/cassert>
 #include <cuda/std/cstddef>
+#include <cuda/std/memory>
 
 #include "test_macros.h"
 

diff --git a/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.nodiscard.fail.cpp b/libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/assume_aligned.nodiscard.fail.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// #include <memory>
+
+// template<size_t N, class T>
+// [[nodiscard]] constexpr T* assume_aligned(T* ptr);
+
+// UNSUPPORTED: nvrtc
+// nvrtc currently compiles the test with a warning
+
+#include <cuda/std/memory>
+
+__host__ __device__ void f()
+{
+  int* p = nullptr;
+  cuda::std::assume_aligned<4>(p); // expected-warning {{ignoring return value of function declared with 'nodiscard'
+                                   // attribute}}
+}
+
+int main(int, char**)
+{
+  return 0;
+}