Skip to content

Commit

Permalink
Changing define names.
Browse files Browse the repository at this point in the history
  • Loading branch information
corbett5 committed Aug 12, 2020
1 parent ae95dc9 commit 2bf471c
Show file tree
Hide file tree
Showing 72 changed files with 249 additions and 286 deletions.
12 changes: 6 additions & 6 deletions benchmarks/benchmarkArray1DR2TensorMultiplication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,12 @@ void pointerRAJA( benchmark::State & state )

INDEX_TYPE const SERIAL_SIZE = (2 << 18) - 87;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
INDEX_TYPE const OMP_SIZE = (2 << 22) - 87;
#endif

// The non Array benchmarks could be run without chai, but then what's the point.
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
constexpr INDEX_TYPE CUDA_SIZE = (2 << 24) - 87;
#endif

Expand Down Expand Up @@ -271,11 +271,11 @@ void registerBenchmarks()
},
std::make_tuple( SERIAL_SIZE, RAJA::PERM_IJK {}, serialPolicy {} )
, std::make_tuple( SERIAL_SIZE, RAJA::PERM_KJI {}, serialPolicy {} )
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
, std::make_tuple( OMP_SIZE, RAJA::PERM_IJK {}, parallelHostPolicy {} )
, std::make_tuple( OMP_SIZE, RAJA::PERM_KJI {}, parallelHostPolicy {} )
#endif
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
, std::make_tuple( CUDA_SIZE, RAJA::PERM_IJK {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
, std::make_tuple( CUDA_SIZE, RAJA::PERM_KJI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
#endif
Expand All @@ -295,11 +295,11 @@ int main( int argc, char * * argv )
LVARRAY_LOG( "VALUE_TYPE = " << LvArray::system::demangleType< LvArray::benchmarking::VALUE_TYPE >() );
LVARRAY_LOG( "Serial problems of size ( " << LvArray::benchmarking::SERIAL_SIZE << ", 3, 3 )." );

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
LVARRAY_LOG( "OMP problems of size ( " << LvArray::benchmarking::OMP_SIZE << ", 3, 3 )." );
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
LVARRAY_LOG( "CUDA problems of size ( " << LvArray::benchmarking::CUDA_SIZE << ", 3, 3 )." );
#endif

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmarkArray1DR2TensorMultiplicationKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,12 @@ template class ArrayOfR2TensorsNative< RAJA::PERM_KJI >;
template class ArrayOfR2TensorsRAJA< RAJA::PERM_IJK, serialPolicy >;
template class ArrayOfR2TensorsRAJA< RAJA::PERM_KJI, serialPolicy >;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
template class ArrayOfR2TensorsRAJA< RAJA::PERM_IJK, parallelHostPolicy >;
template class ArrayOfR2TensorsRAJA< RAJA::PERM_KJI, parallelHostPolicy >;
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
template class ArrayOfR2TensorsRAJA< RAJA::PERM_IJK, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
template class ArrayOfR2TensorsRAJA< RAJA::PERM_KJI, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void registerBenchmarks()
REGISTER_BENCHMARK_TEMPLATE( WRAP( { nx, ny, nz } ), overAllocation, POLICY );
REGISTER_BENCHMARK_TEMPLATE( WRAP( { nx, ny, nz } ), resizeFromCapacities, POLICY );
}, std::make_tuple( NX, NY, NZ, serialPolicy {} )
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
, std::make_tuple( NX, NY, NZ, parallelHostPolicy {} )
#endif
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ resizeFromCapacities( ArrayView< INDEX_TYPE const, 2, 1, INDEX_TYPE, DEFAULT_BUF
// Explicit instantiation of NodeToElemMapConstruction.
template class NodeToElemMapConstruction< serialPolicy >;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
template class NodeToElemMapConstruction< parallelHostPolicy >;
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class NaiveNodeToElemMapConstruction
{
CALI_CXX_MARK_SCOPE( "~NaiveNodeToElemMapConstruction" );

// #if defined(USE_OPENMP)
// #if defined(LVARRAY_USE_OPENMP)
// using EXEC_POLICY = parallelHostPolicy;
// #else
using EXEC_POLICY = serialPolicy;
Expand Down
12 changes: 6 additions & 6 deletions benchmarks/benchmarkEigendecomposition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ void eigenvectors( benchmark::State & state )
INDEX_TYPE const SERIAL_SIZE_2x2 = (2 << 22) - 87;
INDEX_TYPE const SERIAL_SIZE_3x3 = (2 << 19) - 87;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
INDEX_TYPE const OMP_SIZE_2x2 = (2 << 24) - 87;
INDEX_TYPE const OMP_SIZE_3x3 = (2 << 23) - 87;
#endif

// The non Array benchmarks could be run without chai, but then what's the point.
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
constexpr INDEX_TYPE CUDA_SIZE_2x2 = (2 << 24) - 87;
constexpr INDEX_TYPE CUDA_SIZE_3x3 = (2 << 24) - 87;
#endif
Expand All @@ -67,13 +67,13 @@ void registerBenchmarks()
, std::make_tuple( SERIAL_SIZE_2x2, std::integral_constant< int, 2 > {}, RAJA::PERM_JI {}, RAJA::PERM_KJI {}, serialPolicy {} )
, std::make_tuple( SERIAL_SIZE_3x3, std::integral_constant< int, 3 > {}, RAJA::PERM_IJ {}, RAJA::PERM_IJK {}, serialPolicy {} )
, std::make_tuple( SERIAL_SIZE_3x3, std::integral_constant< int, 3 > {}, RAJA::PERM_JI {}, RAJA::PERM_KJI {}, serialPolicy {} )
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
, std::make_tuple( OMP_SIZE_2x2, std::integral_constant< int, 2 > {}, RAJA::PERM_IJ {}, RAJA::PERM_IJK {}, parallelHostPolicy {} )
, std::make_tuple( OMP_SIZE_2x2, std::integral_constant< int, 2 > {}, RAJA::PERM_JI {}, RAJA::PERM_KJI {}, parallelHostPolicy {} )
, std::make_tuple( OMP_SIZE_3x3, std::integral_constant< int, 3 > {}, RAJA::PERM_IJ {}, RAJA::PERM_IJK {}, parallelHostPolicy {} )
, std::make_tuple( OMP_SIZE_3x3, std::integral_constant< int, 3 > {}, RAJA::PERM_JI {}, RAJA::PERM_KJI {}, parallelHostPolicy {} )
#endif
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
, std::make_tuple( CUDA_SIZE_2x2, std::integral_constant< int, 2 > {}, RAJA::PERM_IJ {}, RAJA::PERM_IJK {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
, std::make_tuple( CUDA_SIZE_2x2, std::integral_constant< int, 2 > {}, RAJA::PERM_JI {}, RAJA::PERM_KJI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
, std::make_tuple( CUDA_SIZE_3x3, std::integral_constant< int, 3 > {}, RAJA::PERM_IJ {}, RAJA::PERM_IJK {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
Expand All @@ -97,12 +97,12 @@ int main( int argc, char * * argv )
LVARRAY_LOG( "Serial number of 2x2 matrices = " << LvArray::benchmarking::SERIAL_SIZE_2x2 );
LVARRAY_LOG( "Serial number of 3x3 matrices = " << LvArray::benchmarking::SERIAL_SIZE_3x3 );

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
LVARRAY_LOG( "OMP number of 2x2 matrices = " << LvArray::benchmarking::OMP_SIZE_2x2 );
LVARRAY_LOG( "OMP number of 3x3 matrices = " << LvArray::benchmarking::OMP_SIZE_3x3 );
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
LVARRAY_LOG( "CUDA number of 2x2 matrices = " << LvArray::benchmarking::CUDA_SIZE_2x2 );
LVARRAY_LOG( "CUDA number of 3x3 matrices = " << LvArray::benchmarking::CUDA_SIZE_3x3 );
#endif
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmarkEigendecompositionKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ template class Eigendecomposition< 2, RAJA::PERM_JI, RAJA::PERM_KJI, serialPolic
template class Eigendecomposition< 3, RAJA::PERM_IJ, RAJA::PERM_IJK, serialPolicy >;
template class Eigendecomposition< 3, RAJA::PERM_JI, RAJA::PERM_KJI, serialPolicy >;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
template class Eigendecomposition< 2, RAJA::PERM_IJ, RAJA::PERM_IJK, parallelHostPolicy >;
template class Eigendecomposition< 2, RAJA::PERM_JI, RAJA::PERM_KJI, parallelHostPolicy >;
template class Eigendecomposition< 3, RAJA::PERM_IJ, RAJA::PERM_IJK, parallelHostPolicy >;
template class Eigendecomposition< 3, RAJA::PERM_JI, RAJA::PERM_KJI, parallelHostPolicy >;
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
template class Eigendecomposition< 2, RAJA::PERM_IJ, RAJA::PERM_IJK, parallelDevicePolicy< THREADS_PER_BLOCK > >;
template class Eigendecomposition< 2, RAJA::PERM_JI, RAJA::PERM_KJI, parallelDevicePolicy< THREADS_PER_BLOCK > >;
template class Eigendecomposition< 3, RAJA::PERM_IJ, RAJA::PERM_IJK, parallelDevicePolicy< THREADS_PER_BLOCK > >;
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmarkHelpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <chrono>
#include <unordered_map>

#if defined(USE_CALIPER)
#if defined(LVARRAY_USE_CALIPER)
#include <caliper/cali.h>
#define CALI_CXX_MARK_PRETTY_FUNCTION cali::Function __cali_ann ## __func__( __PRETTY_FUNCTION__ )
#else
Expand All @@ -32,7 +32,7 @@ namespace LvArray
using namespace testing;


#if defined(USE_CHAI)
#if defined(LVARRAY_USE_CHAI)
static_assert( std::is_same< DEFAULT_BUFFER< int >, ChaiBuffer< int > >::value,
"The default buffer should be ChaiBuffer when chai is enabled." );
#endif
Expand Down
12 changes: 6 additions & 6 deletions benchmarks/benchmarkInnerProduct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ void pointerRAJA( benchmark::State & state )


INDEX_TYPE const SERIAL_SIZE = (2 << 20) + 573;
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
INDEX_TYPE const OMP_SIZE = SERIAL_SIZE;
#endif

// The non Array benchmarks could be run without chai, but then what's the point.
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
INDEX_TYPE const CUDA_SIZE = SERIAL_SIZE;
#endif

Expand Down Expand Up @@ -153,10 +153,10 @@ void registerBenchmarks()
REGISTER_BENCHMARK_TEMPLATE( { size }, pointerRAJA, POLICY );
},
std::make_tuple( SERIAL_SIZE, serialPolicy {} )
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
, std::make_tuple( OMP_SIZE, parallelHostPolicy {} )
#endif
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
, std::make_tuple( CUDA_SIZE, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
#endif
);
Expand All @@ -178,11 +178,11 @@ int main( int argc, char * * argv )

LVARRAY_LOG( "Serial problems of size ( " << LvArray::benchmarking::SERIAL_SIZE << " )." );

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
LVARRAY_LOG( "OMP problems of size ( " << LvArray::benchmarking::OMP_SIZE << " )." );
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
LVARRAY_LOG( "CUDA problems of size ( " << LvArray::benchmarking::CUDA_SIZE << " )." );
#endif

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmarkInnerProductKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ pointerKernel( INDEX_TYPE const N,

template class InnerProductRAJA< serialPolicy >;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
template class InnerProductRAJA< parallelHostPolicy >;
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
template class InnerProductRAJA< RAJA::cuda_exec< THREADS_PER_BLOCK > >;
#endif

Expand Down
12 changes: 6 additions & 6 deletions benchmarks/benchmarkMatrixMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ INDEX_TYPE const SERIAL_N = (2 << 7) + 73;
INDEX_TYPE const SERIAL_L = (2 << 7) - 71;
INDEX_TYPE const SERIAL_M = (2 << 7) - 3;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
INDEX_TYPE const OMP_N = SERIAL_N;
INDEX_TYPE const OMP_L = SERIAL_L;
INDEX_TYPE const OMP_M = SERIAL_M;
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
INDEX_TYPE const CUDA_N = SERIAL_N;
INDEX_TYPE const CUDA_L = SERIAL_L;
INDEX_TYPE const CUDA_M = SERIAL_M;
Expand Down Expand Up @@ -169,11 +169,11 @@ void registerBenchmarks()
},
std::make_tuple( SERIAL_N, SERIAL_L, SERIAL_M, RAJA::PERM_IJ {}, serialPolicy {} )
, std::make_tuple( SERIAL_N, SERIAL_L, SERIAL_M, RAJA::PERM_JI {}, serialPolicy {} )
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
, std::make_tuple( OMP_N, SERIAL_L, OMP_M, RAJA::PERM_IJ {}, parallelHostPolicy {} )
, std::make_tuple( OMP_N, SERIAL_L, OMP_M, RAJA::PERM_JI {}, parallelHostPolicy {} )
#endif
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
, std::make_tuple( CUDA_N, SERIAL_L, CUDA_M, RAJA::PERM_IJ {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
, std::make_tuple( CUDA_N, SERIAL_L, CUDA_M, RAJA::PERM_JI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
#endif
Expand All @@ -196,12 +196,12 @@ int main( int argc, char * * argv )
LVARRAY_LOG( "Serial problems of size ( " << LvArray::benchmarking::SERIAL_N << ", " <<
LvArray::benchmarking::SERIAL_L << ", " << LvArray::benchmarking::SERIAL_M << " )." );

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
LVARRAY_LOG( "OMP problems of size ( " << LvArray::benchmarking::OMP_N << ", " <<
LvArray::benchmarking::OMP_L << ", " << LvArray::benchmarking::OMP_M << " )." );
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
LVARRAY_LOG( "CUDA problems of size ( " << LvArray::benchmarking::CUDA_N << ", " <<
LvArray::benchmarking::CUDA_L << ", " << LvArray::benchmarking::CUDA_M << " )." );
#endif
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmarkMatrixMatrixKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,14 @@ template class MatrixMatrixNative< RAJA::PERM_JI >;
template class MatrixMatrixRAJA< RAJA::PERM_IJ, serialPolicy >;
template class MatrixMatrixRAJA< RAJA::PERM_JI, serialPolicy >;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)

template class MatrixMatrixRAJA< RAJA::PERM_IJ, parallelHostPolicy >;
template class MatrixMatrixRAJA< RAJA::PERM_JI, parallelHostPolicy >;

#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)

template class MatrixMatrixRAJA< RAJA::PERM_IJ, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
template class MatrixMatrixRAJA< RAJA::PERM_JI, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/benchmarkMatrixVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,11 @@ void registerBenchmarks()
},
std::make_tuple( SERIAL_N, SERIAL_M, RAJA::PERM_IJ {}, serialPolicy {} )
, std::make_tuple( SERIAL_N, SERIAL_M, RAJA::PERM_JI {}, serialPolicy {} )
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
, std::make_tuple( OMP_N, OMP_M, RAJA::PERM_IJ {}, parallelHostPolicy {} )
, std::make_tuple( OMP_N, OMP_M, RAJA::PERM_JI {}, parallelHostPolicy {} )
#endif
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
, std::make_tuple( CUDA_N, CUDA_M, RAJA::PERM_IJ {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
, std::make_tuple( CUDA_N, CUDA_M, RAJA::PERM_JI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
#endif
Expand All @@ -188,12 +188,12 @@ int main( int argc, char * * argv )
LVARRAY_LOG( "Serial problems of size ( " << LvArray::benchmarking::SERIAL_N << ", " <<
LvArray::benchmarking::SERIAL_M << " )." );

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
LVARRAY_LOG( "OMP problems of size ( " << LvArray::benchmarking::OMP_N << ", " <<
LvArray::benchmarking::OMP_M << " )." );
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
LVARRAY_LOG( "CUDA problems of size ( " << LvArray::benchmarking::CUDA_N << ", " <<
LvArray::benchmarking::CUDA_M << " )." );
#endif
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmarkMatrixVectorKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,14 @@ template class MatrixVectorNative< RAJA::PERM_JI >;
template class MatrixVectorRAJA< RAJA::PERM_IJ, serialPolicy >;
template class MatrixVectorRAJA< RAJA::PERM_JI, serialPolicy >;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)

template class MatrixVectorRAJA< RAJA::PERM_IJ, parallelHostPolicy >;
template class MatrixVectorRAJA< RAJA::PERM_JI, parallelHostPolicy >;

#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)

template class MatrixVectorRAJA< RAJA::PERM_IJ, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
template class MatrixVectorRAJA< RAJA::PERM_JI, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
Expand Down
12 changes: 6 additions & 6 deletions benchmarks/benchmarkOuterProduct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,12 @@ void pointerRAJA( benchmark::State & state )
INDEX_TYPE const SERIAL_N = (2 << 9) + 73;
INDEX_TYPE const SERIAL_M = (2 << 9) - 71;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
INDEX_TYPE const OMP_N = (2 << 9) + 73;
INDEX_TYPE const OMP_M = (2 << 9) - 71;
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
INDEX_TYPE const CUDA_N = (2 << 9) + 73;
INDEX_TYPE const CUDA_M = (2 << 9) - 71;
#endif
Expand Down Expand Up @@ -165,11 +165,11 @@ void registerBenchmarks()
},
std::make_tuple( SERIAL_N, SERIAL_M, RAJA::PERM_IJ {}, serialPolicy {} )
, std::make_tuple( SERIAL_N, SERIAL_M, RAJA::PERM_JI {}, serialPolicy {} )
#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
, std::make_tuple( OMP_N, OMP_M, RAJA::PERM_IJ {}, parallelHostPolicy {} )
, std::make_tuple( OMP_N, OMP_M, RAJA::PERM_JI {}, parallelHostPolicy {} )
#endif
#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
, std::make_tuple( CUDA_N, CUDA_M, RAJA::PERM_IJ {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
, std::make_tuple( CUDA_N, CUDA_M, RAJA::PERM_JI {}, parallelDevicePolicy< THREADS_PER_BLOCK > {} )
#endif
Expand All @@ -192,12 +192,12 @@ int main( int argc, char * * argv )
LVARRAY_LOG( "Serial problems of size ( " << LvArray::benchmarking::SERIAL_N << ", " <<
LvArray::benchmarking::SERIAL_M << " )." );

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
LVARRAY_LOG( "OMP problems of size ( " << LvArray::benchmarking::OMP_N << ", " <<
LvArray::benchmarking::OMP_M << " )." );
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
LVARRAY_LOG( "CUDA problems of size ( " << LvArray::benchmarking::CUDA_N << ", " <<
LvArray::benchmarking::CUDA_M << " )." );
#endif
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmarkOuterProductKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,12 @@ template class OuterProductNative< RAJA::PERM_JI >;
template class OuterProductRAJA< RAJA::PERM_IJ, serialPolicy >;
template class OuterProductRAJA< RAJA::PERM_JI, serialPolicy >;

#if defined(USE_OPENMP)
#if defined(LVARRAY_USE_OPENMP)
template class OuterProductRAJA< RAJA::PERM_IJ, parallelHostPolicy >;
template class OuterProductRAJA< RAJA::PERM_JI, parallelHostPolicy >;
#endif

#if defined(USE_CUDA) && defined(USE_CHAI)
#if defined(LVARRAY_USE_CUDA) && defined(LVARRAY_USE_CHAI)
template class OuterProductRAJA< RAJA::PERM_IJ, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
template class OuterProductRAJA< RAJA::PERM_JI, RAJA::cuda_exec< THREADS_PER_BLOCK > >;
#endif
Expand Down
Loading

0 comments on commit 2bf471c

Please sign in to comment.