Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIP and MPI+HIP updates #361

Draft
wants to merge 27 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c388fde
Port updated CUDA and MPI+CUDA codes (f function support) to HIP and …
ohearnk Apr 15, 2024
534ea10
Merge branch 'master' into hip-f-func-porting.
ohearnk Apr 18, 2024
43e5ad9
Merge branch 'master' into hip-f-func-porting.
ohearnk Apr 19, 2024
a77c08c
Fix uninitialized variable usage.
ohearnk Apr 19, 2024
5e3f244
Add missed file during CUDA source conversion via hipify-perl (*.cuh).
ohearnk Apr 19, 2024
6510281
Fix source file permissions. Remove unused code.
ohearnk Apr 19, 2024
4815e0b
Merge branch 'master' into hip-f-func-porting.
ohearnk Apr 30, 2024
779130d
Deduplicate GPU codes (CUDA/HIP). Change several static constants to …
ohearnk May 1, 2024
c0a1a64
Fix include path for HIP builds. Match preprocessor controlled code p…
ohearnk May 1, 2024
9d56048
Fix initialized data in GPU code (1e and 2e integrals, address later …
ohearnk Jun 22, 2024
0bf89e1
Conditionally compile ROCsolver code (for SCF diagonalizations) if to…
ohearnk Jun 26, 2024
f937da6
Further GPU code deduplication. Use faster math functions for simple …
ohearnk Jun 26, 2024
815b8c9
Remove unnecessary DGEMM in SCF for CUDA GPU codepaths.
ohearnk Jul 1, 2024
72782c8
Ensure QUICK GPU architectures are always set correctly for HIP build…
ohearnk Jul 1, 2024
01315f6
Remove improper legacy atomic support for double precision arithmetic…
ohearnk Aug 20, 2024
2ea2acc
Fix declaration for emulated double precision atomic addition.
ohearnk Aug 20, 2024
dc9031b
Reduce the number of atomics used during computation of operator mati…
ohearnk Aug 21, 2024
55d9ccd
Reduce the number of atomics used during computation of operator mati…
ohearnk Aug 21, 2024
e2b3f9b
Fix memory leaks.
ohearnk Aug 22, 2024
b15e410
Fix deallocation issue.
ohearnk Aug 22, 2024
0f0085c
OEI code tuning.
ohearnk Aug 22, 2024
6aa0c5d
Hand-tune ERI gradient code.
ohearnk Aug 22, 2024
5a0f95e
More ERI gradient tuning. Other code clean-up.
ohearnk Aug 23, 2024
609e55a
Fix truncation of double precision absolute value calculations for cu…
ohearnk Aug 30, 2024
d57901c
Remove superfluous arithmetic in generated one electron integral code.
ohearnk Aug 30, 2024
bcb47de
Indexing changes for intermediate data structures.
ohearnk Sep 19, 2024
6940f20
Remove stack size limits and cache config hints.
ohearnk Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
44 changes: 22 additions & 22 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,7 @@ for buildtype in $buildtypes; do

if [ "$enablef" = 'yes' ]; then
echo "F functions will be compiled in the $buildtype version."
cuda_incl_flags="$cuda_incl_flags -DCUDA_SPDF"
cuda_incl_flags="$cuda_incl_flags -DGPU_SPDF"
fi

# set cew flag for nvcc
Expand All @@ -1331,7 +1331,7 @@ for buildtype in $buildtypes; do

if [ "$enablef" = 'yes' ]; then
echo "F functions will be compiled in the $buildtype version."
hip_incl_flags="$hip_incl_flags -DHIP_SPDF"
hip_incl_flags="$hip_incl_flags -DGPU_SPDF"
fi

fi
Expand All @@ -1345,31 +1345,31 @@ for buildtype in $buildtypes; do

elif [ "$buildtype" = 'cuda' ]; then

fort_flags="$fort_flags -DCUDA"
cc_flags="$cc_flags -DCUDA"
cxx_flags="$cxx_flags -DCUDA"
cuda_incl_flags="$cuda_incl_flags -DCUDA"
fort_flags="$fort_flags -DGPU -DCUDA"
cc_flags="$cc_flags -DGPU -DCUDA"
cxx_flags="$cxx_flags -DGPU -DCUDA"
cuda_incl_flags="$cuda_incl_flags -DGPU -DCUDA"

elif [ "$buildtype" = 'cudampi' ]; then

fort_flags="$fort_flags -DMPIV -DCUDA_MPIV"
cc_flags="$cc_flags -DMPIV -DCUDA_MPIV"
cxx_flags="$cxx_flags -DMPIV -DCUDA_MPIV"
cuda_incl_flags="$cuda_incl_flags -DMPIV -DCUDA_MPIV"
fort_flags="$fort_flags -DMPIV -DMPIV_GPU -DCUDA_MPIV"
cc_flags="$cc_flags -DMPIV -DMPIV_GPU -DCUDA_MPIV"
cxx_flags="$cxx_flags -DMPIV -DMPIV_GPU -DCUDA_MPIV"
cuda_incl_flags="$cuda_incl_flags -DMPIV -DMPIV_GPU -DCUDA_MPIV"

elif [ "$buildtype" = 'hip' ]; then

fort_flags="$fort_flags -DHIP"
cc_flags="$cc_flags -DHIP"
cxx_flags="$cxx_flags -DHIP"
hip_incl_flags="$hip_incl_flags -DHIP"
fort_flags="$fort_flags -DGPU -DHIP"
cc_flags="$cc_flags -DGPU -DHIP"
cxx_flags="$cxx_flags -DGPU -DHIP"
hip_incl_flags="$hip_incl_flags -DGPU -DHIP"

elif [ "$buildtype" = 'hipmpi' ]; then

fort_flags="$fort_flags -DMPIV -DHIP_MPIV"
cc_flags="$cc_flags -DMPIV -DHIP_MPIV"
cxx_flags="$cxx_flags -DMPIV -DHIP_MPIV"
hip_incl_flags="$hip_incl_flags -DMPIV -DHIP_MPIV"
fort_flags="$fort_flags -DMPIV -DMPIV_GPU -DHIP_MPIV"
cc_flags="$cc_flags -DMPIV -DMPIV_GPU -DHIP_MPIV"
cxx_flags="$cxx_flags -DMPIV -DMPIV_GPU -DHIP_MPIV"
hip_incl_flags="$hip_incl_flags -DMPIV -DMPIV_GPU -DHIP_MPIV"

fi

Expand Down Expand Up @@ -1438,13 +1438,13 @@ for buildtype in $buildtypes; do
if [ "$buildtype" = 'mpi' ]; then
fort_ext_lib_flags="$fort_ext_lib_flags -DMPIV"
elif [ "$buildtype" = 'cuda' ]; then
fort_ext_lib_flags="$fort_ext_lib_flags -DCUDA"
fort_ext_lib_flags="$fort_ext_lib_flags -DGPU -DCUDA"
elif [ "$buildtype" = 'cudampi' ]; then
fort_ext_lib_flags="$fort_ext_lib_flags -DMPIV -DCUDA_MPIV"
fort_ext_lib_flags="$fort_ext_lib_flags -DMPIV -DMPIV_GPU -DCUDA_MPIV"
elif [ "$buildtype" = 'hip' ]; then
fort_ext_lib_flags="$fort_ext_lib_flags -DHIP"
fort_ext_lib_flags="$fort_ext_lib_flags -DGPU -DHIP"
elif [ "$buildtype" = 'hipmpi' ]; then
fort_ext_lib_flags="$fort_ext_lib_flags -DMPIV -DHIP_MPIV"
fort_ext_lib_flags="$fort_ext_lib_flags -DMPIV -DMPIV_GPU -DHIP_MPIV"
fi

# set the installer
Expand Down
10 changes: 5 additions & 5 deletions quick-cmake/FindHipCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -890,12 +890,12 @@ endif()
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath")

# Use target ID syntax if supported for AMDGPU_TARGETS
if(TARGET_ID_SUPPORT)
#if(TARGET_ID_SUPPORT)
# set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target")
set(AMDGPU_TARGETS ${QUICK_USER_ARCH} CACHE STRING "List of specific machine types for library to target")
else()
set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908;gfx90a CACHE STRING "List of specific machine types for library to target")
endif()
#else()
# set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908;gfx90a CACHE STRING "List of specific machine types for library to target")
#endif()
set(AMDGPU_TARGETS "${QUICK_USER_ARCH}" CACHE STRING "List of specific machine types for library to target")
set(AMDGPU_TEST_TARGETS "" CACHE STRING "List of specific device types to test for") # Leave empty for default system device

list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip)
Expand Down
33 changes: 7 additions & 26 deletions quick-cmake/QUICKCudaConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -61,50 +61,42 @@ if(CUDA)
message(STATUS "Configuring QUICK for SM3.0, SM3.5, SM3.7, SM5.0, SM5.2 and SM5.3")
message(STATUS "BE AWARE: CUDA 7.5 does not support GTX-1080, Titan-XP, DGX-1, V100 or other Pascal/Volta based GPUs.")
list(APPEND CUDA_NVCC_FLAGS ${SM30FLAGS} ${SM35FLAGS} ${SM37FLAGS} ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

elseif(${CUDA_VERSION} VERSION_EQUAL 8.0)
message(STATUS "Configuring QUICK for SM3.0, SM3.5, SM3.7, SM5.0, SM5.2, SM5.3, SM6.0 and SM6.1,")
message(STATUS "BE AWARE: CUDA 8.0 does not support V100, GV100, Titan-V or later GPUs")
list(APPEND CUDA_NVCC_FLAGS ${SM30FLAGS} ${SM35FLAGS} ${SM37FLAGS} ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS} ${SM60FLAGS} ${SM61FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

elseif((${CUDA_VERSION} VERSION_GREATER_EQUAL 9.0) AND (${CUDA_VERSION} VERSION_LESS 10.0))
message(STATUS "Configuring QUICK for SM3.0, SM3.5, SM3.7, SM5.0, SM5.2, SM5.3, SM6.0, SM6.1 and SM7.0")
list(APPEND CUDA_NVCC_FLAGS ${SM30FLAGS} ${SM35FLAGS} ${SM37FLAGS} ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS} ${SM60FLAGS} ${SM61FLAGS} ${SM70FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

elseif((${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0) AND (${CUDA_VERSION} VERSION_LESS 11.0))
message(STATUS "Configuring QUICK for SM3.0, SM3.5, SM3.7, SM5.0, SM5.2, SM5.3, SM6.0, SM6.1, SM7.0 and SM7.5")
list(APPEND CUDA_NVCC_FLAGS ${SM30FLAGS} ${SM35FLAGS} ${SM37FLAGS} ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS} ${SM60FLAGS} ${SM61FLAGS} ${SM70FLAGS} ${SM75FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

elseif((${CUDA_VERSION} VERSION_EQUAL 11.0))
message(STATUS "Configuring QUICK for SM3.0, SM3.5, SM3.7, SM5.0, SM5.2, SM5.3, SM6.0, SM6.1, SM7.0, SM7.5 and SM8.0")
list(APPEND CUDA_NVCC_FLAGS ${SM30FLAGS} ${SM35FLAGS} ${SM37FLAGS} ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS} ${SM60FLAGS} ${SM61FLAGS} ${SM70FLAGS} ${SM75FLAGS} ${SM80FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

elseif((${CUDA_VERSION} VERSION_GREATER_EQUAL 11.1) AND (${CUDA_VERSION} VERSION_LESS_EQUAL 11.7))
message(STATUS "Configuring QUICK for SM3.5, SM3.7, SM5.0, SM5.2, SM5.3, SM6.0, SM6.1, SM7.0, SM7.5, SM8.0 and SM8.6")
list(APPEND CUDA_NVCC_FLAGS ${SM35FLAGS} ${SM37FLAGS} ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS} ${SM60FLAGS} ${SM61FLAGS} ${SM70FLAGS} ${SM75FLAGS} ${SM80FLAGS} ${SM86FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

elseif((${CUDA_VERSION} VERSION_EQUAL 11.8))
message(STATUS "Configuring QUICK for SM3.5, SM3.7, SM5.0, SM5.2, SM5.3, SM6.0, SM6.1, SM7.0, SM7.5, SM8.0, SM8.6, SM8.9 and SM9.0")
list(APPEND CUDA_NVCC_FLAGS ${SM35FLAGS} ${SM37FLAGS} ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS} ${SM60FLAGS} ${SM61FLAGS} ${SM70FLAGS} ${SM75FLAGS} ${SM80FLAGS} ${SM86FLAGS} ${SM89FLAGS} ${SM90FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

elseif((${CUDA_VERSION} VERSION_GREATER_EQUAL 12.0) AND (${CUDA_VERSION} VERSION_LESS 12.5))
message(STATUS "Configuring QUICK for SM5.0, SM5.2, SM5.3, SM6.0, SM6.1, SM7.0, SM7.5, SM8.0, SM8.6, SM8.9 and SM9.0")
list(APPEND CUDA_NVCC_FLAGS ${SM50FLAGS} ${SM52FLAGS} ${SM53FLAGS} ${SM60FLAGS} ${SM61FLAGS} ${SM70FLAGS} ${SM75FLAGS} ${SM80FLAGS} ${SM86FLAGS} ${SM89FLAGS} ${SM90FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)

else()
Expand All @@ -118,15 +110,13 @@ if(CUDA)
if("${QUICK_USER_ARCH}" MATCHES "kepler")
message(STATUS "Configuring QUICK for SM3.5")
list(APPEND CUDA_NVCC_FLAGS ${SM35FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)
set(FOUND "TRUE")
endif()

if("${QUICK_USER_ARCH}" MATCHES "maxwell")
message(STATUS "Configuring QUICK for SM5.0")
list(APPEND CUDA_NVCC_FLAGS ${SM50FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
set(DISABLE_OPTIMIZER_CONSTANTS TRUE)
set(FOUND "TRUE")
endif()
Expand Down Expand Up @@ -274,16 +264,12 @@ if(CUDA)

# SPDF
if(ENABLEF)
list(APPEND CUDA_NVCC_FLAGS -DCUDA_SPDF)
list(APPEND CUDA_NVCC_FLAGS -DGPU_SPDF)
endif()

if(DISABLE_OPTIMIZER_CONSTANTS)
set(CUDA_DEVICE_CODE_FLAGS -Xptxas --disable-optimizer-constants)
endif()

if(USE_LEGACY_ATOMICS)
list(APPEND CUDA_NVCC_FLAGS -DUSE_LEGACY_ATOMICS)
endif()

if(NOT INSIDE_AMBER)
# --------------------------------------------------------------------
Expand All @@ -299,9 +285,6 @@ endif()
#option(HIP_RDC "Build relocatable device code, also known as separate compilation mode." FALSE)
#option(HIP_WARP64 "Build for CDNA AMD GPUs (warp size 64) or RDNA (warp size 32)" TRUE)
if(HIP)
# HIP builds currently unavailable (TODO: fix post release)
message(FATAL_ERROR "Error: HIP support is currently unavailable in this QUICK release. Support will be added back in a future release.")

set(QUICK_GPU_PLATFORM "HIP")
set(QUICK_GPU_TARGET_NAME "hip")
set(GPU_LD_FLAGS -fgpu-rdc --hip-link)
Expand All @@ -325,22 +308,16 @@ if(HIP)
endif()

list(APPEND AMD_HIP_FLAGS -fPIC -std=c++14)
set(TARGET_ID_SUPPORT ON)
#set(TARGET_ID_SUPPORT ON)

# if(HIP_WARP64)
# add_compile_definitions(QUICK_PLATFORM_AMD_WARP64)
# endif()

# HIP codes currently do not support f-functions with -DUSE_LEGACY_ATOMICS targets (gfx906 and gfx908)
if(ENABLEF AND (("${QUICK_USER_ARCH}" STREQUAL "") OR ("${QUICK_USER_ARCH}" MATCHES "gfx906") OR ("${QUICK_USER_ARCH}" MATCHES "gfx908")))
message(FATAL_ERROR "Error: Unsupported HIP options (ENABLEF with -DUSE_LEGACY_ATOMICS). ${PROJECT_NAME} support for f-functions requires newer HIP architecture targets not using LEGACY_ATOMICS. Please specify architectures with QUICK_USER_ARCH not needing LEGACY_ATOMICS (post-gfx908) or disable f-function support.")
endif()

if( NOT "${QUICK_USER_ARCH}" STREQUAL "")
set(FOUND "FALSE")
if("${QUICK_USER_ARCH}" MATCHES "gfx908")
message(STATUS "Configuring QUICK for gfx908")
list(APPEND AMD_HIP_FLAGS -DUSE_LEGACY_ATOMICS)
set(FOUND "TRUE")
endif()

Expand All @@ -354,13 +331,17 @@ if(HIP)
message(FATAL_ERROR "Invalid value for QUICK_USER_ARCH. Possible values are gfx908, gfx90a.")
endif()
else()
list(APPEND AMD_HIP_FLAGS -DUSE_LEGACY_ATOMICS)
set(QUICK_USER_ARCH "gfx908")
message(STATUS "AMD GPU architecture not specified. Code will be optimized for gfx908.")
endif()

find_package(HipCUDA REQUIRED)

if(QUICK_DEBUG_HIP_ASAN)
set(QUICK_USER_ARCH "${QUICK_USER_ARCH}:xnack+")
list(APPEND CUDA_NVCC_FLAGS -fsanitize=address -fsanitize-recover=address -shared-libsan -g --offload-arch=${QUICK_USER_ARCH})
endif()

list(APPEND CUDA_NVCC_FLAGS ${AMD_HIP_FLAGS})

set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
Expand Down
Loading
Loading