Skip to content

Commit

Permalink
Merged in singlePrecChebFilt (pull request #588)
Browse files Browse the repository at this point in the history
Single Precision Algorithm for Chebyshev Filtering

Approved-by: Sambit Das
Approved-by: Phani Motamarri
  • Loading branch information
knikhil1995 authored and phanimotamarri committed Jun 11, 2024
2 parents 48f3a12 + 4a589e9 commit e7930ab
Show file tree
Hide file tree
Showing 48 changed files with 3,405 additions and 781 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ SET(TARGET_SRC
./utils/FEBasisOperations.cc
./utils/FEBasisOperationsKernels.cc
./src/force/locateAtomCoreNodesForce.cc
./src/atom/AtomicCenteredNonLocalOperator.cc
./src/atom/AtomPseudoWavefunctions.cc
./src/atom/AtomCenteredSphericalFunctionContainer.cc
./src/atom/AtomCenteredSphericalFunctionBase.cc
Expand All @@ -172,7 +173,8 @@ SET(TARGET_SRC
./src/atom/AtomCenteredSphericalFunctionValenceDensitySpline.cc
./src/atom/AtomCenteredSphericalFunctionCoreDensitySpline.cc
./src/atom/AtomCenteredSphericalFunctionLocalPotentialSpline.cc
./src/atom/AtomCenteredSphericalFunctionProjectorSpline.cc)
./src/atom/AtomCenteredSphericalFunctionProjectorSpline.cc
./src/pseudo/oncv/oncvClass.cc)

IF ("${GPU_LANG}" STREQUAL "cuda")

Expand Down
29 changes: 23 additions & 6 deletions doc/manual/parameters.tex
Original file line number Diff line number Diff line change
Expand Up @@ -2872,20 +2872,20 @@ \subsection{Parameters in section \tt SCF parameters/Eigen-solver parameters}


{\it Possible values:} A boolean value (true or false)
\item {\it Parameter name:} {\tt USE MIXED PREC CHEBY}
\phantomsection\label{parameters:SCF parameters/Eigen_2dsolver parameters/USE MIXED PREC CHEBY}
\label{parameters:SCF_20parameters/Eigen_2dsolver_20parameters/USE_20MIXED_20PREC_20CHEBY}
\item {\it Parameter name:} {\tt USE SINGLE PREC COMMUN CHEBY}
\phantomsection\label{parameters:SCF parameters/Eigen_2dsolver parameters/USE USE SINGLE PREC COMMUN CHEBY}
\label{parameters:SCF_20parameters/Eigen_2dsolver_20parameters/USE_20SINGLE_20PREC_20COMMUN_20CHEBY}


\index[prmindex]{USE MIXED PREC CHEBY}
\index[prmindexfull]{SCF parameters!Eigen-solver parameters!USE MIXED PREC CHEBY}
\index[prmindex]{USE SINGLE PREC COMMUN CHEBY}
\index[prmindexfull]{SCF parameters!Eigen-solver parameters!USE SINGLE PREC COMMUN CHEBY}
{\it Value:} false


{\it Default:} false


{\it Description:} [Advanced] Use mixed precision arithmetic in Chebyshev filtering. Currently this option is only available for real executable and USE ELPA=true for which DFT-FE also has to be linked to ELPA library. Default setting is false.
{\it Description:} [Advanced] Use single precision communication in Chebyshev filtering. Default setting is false.


{\it Possible values:} A boolean value (true or false)
Expand All @@ -2906,6 +2906,23 @@ \subsection{Parameters in section \tt SCF parameters/Eigen-solver parameters}


{\it Possible values:} A boolean value (true or false)
\item {\it Parameter name:} {\tt USE SINGLE PREC CHEBY}
\phantomsection\label{parameters:SCF parameters/Eigen_2dsolver parameters/USE USE SINGLE PREC CHEBY}
\label{parameters:SCF_20parameters/Eigen_2dsolver_20parameters/USE_20SINGLE_20PREC_20CHEBY}


\index[prmindex]{USE SINGLE PREC CHEBY}
\index[prmindexfull]{SCF parameters!Eigen-solver parameters!USE SINGLE PREC CHEBY}
{\it Value:} false


{\it Default:} false


{\it Description:} [Advanced] Use a modified single precision algorithm for Chebyshev filtering. This cannot be used in conjunction with spectrum splitting. Default setting is false.

{\it Possible values:} A boolean value (true or false)

\item {\it Parameter name:} {\tt USE MIXED PREC RR\_SR}
\phantomsection\label{parameters:SCF parameters/Eigen_2dsolver parameters/USE MIXED PREC RR_5fSR}
\label{parameters:SCF_20parameters/Eigen_2dsolver_20parameters/USE_20MIXED_20PREC_20RR_5fSR}
Expand Down
21 changes: 10 additions & 11 deletions include/AtomicCenteredNonLocalOperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ namespace dftfe
std::shared_ptr<dftfe::linearAlgebra::BLASWrapper<memorySpace>>
BLASWrapperPtr,
std::shared_ptr<
dftfe::basis::FEBasisOperations<ValueType, double, memorySpace>>
dftfe::basis::FEBasisOperations<dataTypes::number, double, memorySpace>>
basisOperatorPtr,
std::shared_ptr<AtomCenteredSphericalFunctionContainer>
atomCenteredSphericalFunctionContainer,
Expand Down Expand Up @@ -102,8 +102,9 @@ namespace dftfe
const std::vector<double> &kPointWeights,
const std::vector<double> &kPointCoordinates,
std::shared_ptr<
dftfe::basis::
FEBasisOperations<ValueType, double, dftfe::utils::MemorySpace::HOST>>
dftfe::basis::FEBasisOperations<dataTypes::number,
double,
dftfe::utils::MemorySpace::HOST>>
basisOperationsPtr,
const unsigned int quadratureIndex);
#if defined(DFTFE_WITH_DEVICE)
Expand Down Expand Up @@ -313,7 +314,7 @@ namespace dftfe
std::vector<unsigned int> d_numberCellsForEachAtom;

std::shared_ptr<
dftfe::basis::FEBasisOperations<ValueType, double, memorySpace>>
dftfe::basis::FEBasisOperations<dataTypes::number, double, memorySpace>>
d_basisOperatorPtr;


Expand Down Expand Up @@ -419,11 +420,11 @@ namespace dftfe
*/
void
computeCMatrixEntries(
std::shared_ptr<
dftfe::basis::
FEBasisOperations<ValueType, double, dftfe::utils::MemorySpace::HOST>>
basisOperationsPtr,
const unsigned int quadratureIndex);
std::shared_ptr<dftfe::basis::FEBasisOperations<
dataTypes::number,
double,
dftfe::utils::MemorySpace::HOST>> basisOperationsPtr,
const unsigned int quadratureIndex);

std::map<
unsigned int,
Expand Down Expand Up @@ -477,6 +478,4 @@ namespace dftfe


} // namespace dftfe
#include "../src/atom/AtomicCenteredNonLocalOperator.t.cc"

#endif // DFTFE_ATOMICCENTEREDNONLOCALOPERATOR_H
36 changes: 32 additions & 4 deletions include/BLASWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,19 @@ namespace dftfe
const ValueType1 * x,
const ValueType2 beta,
ValueType1 * y) const;
template <typename ValueType0,
typename ValueType1,
typename ValueType2,
typename ValueType3,
typename ValueType4>
void
ApaBD(const unsigned int m,
const unsigned int n,
const ValueType0 alpha,
const ValueType1 * A,
const ValueType2 * B,
const ValueType3 * D,
ValueType4 * C) const;

template <typename ValueType>
void
Expand All @@ -513,14 +526,14 @@ namespace dftfe
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

template <typename ValueType1, typename ValueType2>
template <typename ValueType1, typename ValueType2, typename ValueType3>
void
axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize,
const dftfe::size_type numContiguousBlocks,
const ValueType1 a,
const ValueType1 * s,
const ValueType2 * addFromVec,
ValueType2 * addToVec,
ValueType3 * addToVec,
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

Expand Down Expand Up @@ -1030,6 +1043,21 @@ namespace dftfe
const ValueType2 beta,
ValueType1 * y) const;

template <typename ValueType0,
typename ValueType1,
typename ValueType2,
typename ValueType3,
typename ValueType4>
void
ApaBD(const unsigned int m,
const unsigned int n,
const ValueType0 alpha,
const ValueType1 * A,
const ValueType2 * B,
const ValueType3 * D,
ValueType4 * C) const;


template <typename ValueType>
void
axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize,
Expand All @@ -1039,14 +1067,14 @@ namespace dftfe
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

template <typename ValueType1, typename ValueType2>
template <typename ValueType1, typename ValueType2, typename ValueType3>
void
axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize,
const dftfe::size_type numContiguousBlocks,
const ValueType1 a,
const ValueType1 * s,
const ValueType2 * addFromVec,
ValueType2 * addToVec,
ValueType3 * addToVec,
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

Expand Down
30 changes: 30 additions & 0 deletions include/FEBasisOperations.h
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,17 @@ namespace dftfe
createScratchMultiVectors(const unsigned int vecBlockSize,
const unsigned int numMultiVecs = 1) const;

/**
* @brief Creates single precision scratch multivectors.
* @param[in] vecBlockSize Number of vectors in the multivector.
* @param[out] numMultiVecs number of scratch multivectors needed with
* this vecBlockSize.
*/
void
createScratchMultiVectorsSinglePrec(
const unsigned int vecBlockSize,
const unsigned int numMultiVecs = 1) const;

/**
* @brief Clears scratch multivectors.
*/
Expand All @@ -724,6 +735,18 @@ namespace dftfe
getMultiVector(const unsigned int vecBlockSize,
const unsigned int index = 0) const;

/**
* @brief Gets single precision scratch multivectors.
* @param[in] vecBlockSize Number of vectors in the multivector.
* @param[out] numMultiVecs index of the multivector among those with the
* same vecBlockSize.
*/
dftfe::linearAlgebra::MultiVector<
typename dftfe::dataTypes::singlePrecType<ValueTypeBasisCoeff>::type,
memorySpace> &
getMultiVectorSinglePrec(const unsigned int vecBlockSize,
const unsigned int index = 0) const;

/**
* @brief Apply constraints on given multivector.
* @param[inout] multiVector the given multivector.
Expand Down Expand Up @@ -853,6 +876,13 @@ namespace dftfe
dftfe::linearAlgebra::MultiVector<ValueTypeBasisCoeff, memorySpace>>>
scratchMultiVectors;

mutable std::map<
unsigned int,
std::vector<dftfe::linearAlgebra::MultiVector<
typename dftfe::dataTypes::singlePrecType<ValueTypeBasisCoeff>::type,
memorySpace>>>
scratchMultiVectorsSinglePrec;

std::vector<unsigned int> d_quadratureIDsVector;
unsigned int d_quadratureID;
unsigned int d_quadratureIndex;
Expand Down
33 changes: 32 additions & 1 deletion include/KohnShamHamiltonianOperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ namespace dftfe
const unsigned int index);


dftfe::linearAlgebra::MultiVector<dataTypes::numberFP32, memorySpace> &
getScratchFEMultivectorSinglePrec(const unsigned int numVectors,
const unsigned int index);


/**
* @brief Computes effective potential involving exchange-correlation functionals
* @param rhoValues electron-density
Expand Down Expand Up @@ -168,6 +173,19 @@ namespace dftfe
const bool skip2 = false,
const bool skip3 = false);

void
HXCheby(dftfe::linearAlgebra::MultiVector<dataTypes::numberFP32,
memorySpace> &src,
const double scalarHX,
const double scalarY,
const double scalarX,
dftfe::linearAlgebra::MultiVector<dataTypes::numberFP32,
memorySpace> &dst,
const bool onlyHPrimePartForFirstOrderDensityMatResponse,
const bool skip1,
const bool skip2,
const bool skip3);

void
HXRR(
dftfe::linearAlgebra::MultiVector<dataTypes::number, memorySpace> &src,
Expand All @@ -180,6 +198,10 @@ namespace dftfe
AtomicCenteredNonLocalOperator<dataTypes::number, memorySpace>>
d_ONCVnonLocalOperator;

std::shared_ptr<
AtomicCenteredNonLocalOperator<dataTypes::numberFP32, memorySpace>>
d_ONCVnonLocalOperatorSinglePrec;

std::shared_ptr<dftfe::linearAlgebra::BLASWrapper<memorySpace>>
d_BLASWrapperPtr;
std::shared_ptr<
Expand All @@ -197,6 +219,8 @@ namespace dftfe

std::vector<dftfe::utils::MemoryStorage<dataTypes::number, memorySpace>>
d_cellHamiltonianMatrix;
std::vector<dftfe::utils::MemoryStorage<dataTypes::numberFP32, memorySpace>>
d_cellHamiltonianMatrixSinglePrec;
dftfe::utils::MemoryStorage<double, memorySpace>
d_cellHamiltonianMatrixExtPot;

Expand All @@ -206,8 +230,15 @@ namespace dftfe
dftfe::utils::MemoryStorage<dataTypes::number, memorySpace>
d_cellWaveFunctionMatrixDst;

dftfe::utils::MemoryStorage<dataTypes::numberFP32, memorySpace>
d_cellWaveFunctionMatrixSrcSinglePrec;
dftfe::utils::MemoryStorage<dataTypes::numberFP32, memorySpace>
d_cellWaveFunctionMatrixDstSinglePrec;

dftfe::linearAlgebra::MultiVector<dataTypes::number, memorySpace>
d_ONCVNonLocalProjectorTimesVectorBlock;
d_ONCVNonLocalProjectorTimesVectorBlock;
dftfe::linearAlgebra::MultiVector<dataTypes::numberFP32, memorySpace>
d_ONCVNonLocalProjectorTimesVectorBlockSinglePrec;
dftfe::utils::MemoryStorage<double, memorySpace> d_VeffJxW;
dftfe::utils::MemoryStorage<double, memorySpace> d_VeffExtPotJxW;

Expand Down
34 changes: 12 additions & 22 deletions include/MPICommunicatorP2P.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,6 @@ namespace dftfe
};


template <typename T>
struct singlePrecType
{
typedef T type;
};
template <>
struct singlePrecType<double>
{
typedef float type;
};
template <>
struct singlePrecType<std::complex<double>>
{
typedef std::complex<float> type;
};

template <typename ValueType, MemorySpace memorySpace>
class MPICommunicatorP2P
{
Expand Down Expand Up @@ -136,10 +120,14 @@ namespace dftfe

MemoryStorage<float, memorySpace> d_tempFloatImagArrayForAtomics;

MemoryStorage<typename singlePrecType<ValueType>::type, memorySpace>
MemoryStorage<
typename dftfe::dataTypes::singlePrecType<ValueType>::type,
memorySpace>
d_sendRecvBufferSinglePrec;

MemoryStorage<typename singlePrecType<ValueType>::type, memorySpace>
MemoryStorage<
typename dftfe::dataTypes::singlePrecType<ValueType>::type,
memorySpace>
d_ghostDataCopySinglePrec;

#ifdef DFTFE_WITH_DEVICE
Expand All @@ -149,12 +137,14 @@ namespace dftfe
std::shared_ptr<MemoryStorage<ValueType, MemorySpace::HOST_PINNED>>
d_sendRecvBufferHostPinnedPtr;

std::shared_ptr<MemoryStorage<typename singlePrecType<ValueType>::type,
MemorySpace::HOST_PINNED>>
std::shared_ptr<MemoryStorage<
typename dftfe::dataTypes::singlePrecType<ValueType>::type,
MemorySpace::HOST_PINNED>>
d_ghostDataCopySinglePrecHostPinnedPtr;

std::shared_ptr<MemoryStorage<typename singlePrecType<ValueType>::type,
MemorySpace::HOST_PINNED>>
std::shared_ptr<MemoryStorage<
typename dftfe::dataTypes::singlePrecType<ValueType>::type,
MemorySpace::HOST_PINNED>>
d_sendRecvBufferSinglePrecHostPinnedPtr;
#endif // DFTFE_WITH_DEVICE

Expand Down
Loading

0 comments on commit e7930ab

Please sign in to comment.