Skip to content

Commit

Permalink
Merged in bugFixBLASWrappers (pull request #571)
Browse files Browse the repository at this point in the history
BLASWrapper bug fixes

Approved-by: Sambit Das
  • Loading branch information
phanimotamarri committed Mar 11, 2024
2 parents c16ac5b + 43061d7 commit c5fb5c7
Show file tree
Hide file tree
Showing 16 changed files with 1,291 additions and 71 deletions.
105 changes: 67 additions & 38 deletions include/BLASWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,17 @@ namespace dftfe
const unsigned int INCY,
std::complex<double> * result) const;

// Complex dot proeuct with all Reduce call
void
xdot(const unsigned int N,
const std::complex<double> *X,
const unsigned int INCX,
const std::complex<double> *Y,
const unsigned int INCY,
const MPI_Comm & mpi_communicator,
std::complex<double> * result) const;


// Real double Ax+y
void
xaxpy(const unsigned int n,
Expand All @@ -233,14 +244,6 @@ namespace dftfe
double * y,
const unsigned int incy) const;

// Real copy of double data to float
void
xcopy(const unsigned int n,
double * x,
const unsigned int incx,
float * y,
const unsigned int incy) const;

// Complex double copy of data
void
xcopy(const unsigned int n,
Expand All @@ -265,13 +268,6 @@ namespace dftfe
std::complex<float> * y,
const unsigned int incy) const;

void
xcopy(const unsigned int n,
std::complex<double> *x,
const unsigned int incx,
std::complex<float> * y,
const unsigned int incy) const;

// Real double symmetric matrix-vector product
void
xsymv(const char UPLO,
Expand Down Expand Up @@ -500,6 +496,14 @@ namespace dftfe
const ValueType1 * copyFromVec,
ValueType2 * copyToVec);

template <typename ValueType1, typename ValueType2>
void
axpby(const unsigned int n,
const ValueType2 alpha,
const ValueType1 * x,
const ValueType2 beta,
ValueType1 * y) const;

template <typename ValueType>
void
axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize,
Expand All @@ -509,13 +513,14 @@ namespace dftfe
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

template <typename ValueType>
template <typename ValueType1, typename ValueType2>
void
axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize,
const dftfe::size_type numContiguousBlocks,
const ValueType * addFromVec,
double * addToVecReal,
double * addToVecImag,
const ValueType1 a,
const ValueType1 * s,
const ValueType2 * addFromVec,
ValueType2 * addToVec,
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

Expand All @@ -527,6 +532,17 @@ namespace dftfe
const ValueType1 * s,
ValueType2 * x);

template <typename ValueType1, typename ValueType2>
void
stridedBlockScaleCopy(
const dftfe::size_type contiguousBlockSize,
const dftfe::size_type numContiguousBlocks,
const ValueType1 a,
const ValueType1 * s,
const ValueType2 * copyFromVec,
ValueType2 * copyToVecBlock,
const dftfe::global_size_type *copyFromVecStartingContiguousBlockIds);

void
add(double * y,
const double * x,
Expand Down Expand Up @@ -721,6 +737,16 @@ namespace dftfe
const unsigned int INCY,
std::complex<double> * result) const;

// Complex dot product
void
xdot(const unsigned int N,
const std::complex<double> *X,
const unsigned int INCX,
const std::complex<double> *Y,
const unsigned int INCY,
const MPI_Comm & mpi_communicator,
std::complex<double> * result) const;

// Real double Ax+y
void
xaxpy(const unsigned int n,
Expand All @@ -747,14 +773,6 @@ namespace dftfe
double * y,
const unsigned int incy) const;

// Real copy of double data
void
xcopy(const unsigned int n,
double * x,
const unsigned int incx,
float * y,
const unsigned int incy) const;

// Complex double copy of data
void
xcopy(const unsigned int n,
Expand All @@ -779,13 +797,6 @@ namespace dftfe
std::complex<float> * y,
const unsigned int incy) const;

void
xcopy(const unsigned int n,
std::complex<double> *x,
const unsigned int incx,
std::complex<float> * y,
const unsigned int incy) const;

// Real double symmetric matrix-vector product
void
xsymv(const char UPLO,
Expand Down Expand Up @@ -1011,6 +1022,13 @@ namespace dftfe
const dftfe::size_type startingId,
const ValueType1 * copyFromVec,
ValueType2 * copyToVec);
template <typename ValueType1, typename ValueType2>
void
axpby(const unsigned int n,
const ValueType2 alpha,
const ValueType1 * x,
const ValueType2 beta,
ValueType1 * y) const;

template <typename ValueType>
void
Expand All @@ -1021,13 +1039,14 @@ namespace dftfe
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

template <typename ValueType>
template <typename ValueType1, typename ValueType2>
void
axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize,
const dftfe::size_type numContiguousBlocks,
const ValueType * addFromVec,
double * addToVecReal,
double * addToVecImag,
const ValueType1 a,
const ValueType1 * s,
const ValueType2 * addFromVec,
ValueType2 * addToVec,
const dftfe::global_size_type
*addToVecStartingContiguousBlockIds) const;

Expand All @@ -1038,6 +1057,16 @@ namespace dftfe
const ValueType1 a,
const ValueType1 * s,
ValueType2 * x);
template <typename ValueType1, typename ValueType2>
void
stridedBlockScaleCopy(
const dftfe::size_type contiguousBlockSize,
const dftfe::size_type numContiguousBlocks,
const ValueType1 a,
const ValueType1 * s,
const ValueType2 * copyFromVec,
ValueType2 * copyToVecBlock,
const dftfe::global_size_type *copyFromVecStartingContiguousBlockIds);

void
add(double * y,
Expand Down
103 changes: 103 additions & 0 deletions testsGPU/pseudopotential/real/accuracyBenchmarks/outputMg2x_15
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
number of atoms: 31
number of atoms types: 1
Z:12
=============================================================================================================================
number of electrons: 310
number of eigen values: 180
=============================================================================================================================
-----------Simulation Domain bounding vectors (lattice vectors in fully periodic case)-------------
v1 : 1.176399999999999935e+01 0.000000000000000000e+00 0.000000000000000000e+00
v2 : 0.000000000000000000e+00 1.917200000000000060e+01 0.000000000000000000e+00
v3 : 0.000000000000000000e+00 0.000000000000000000e+00 2.037584570023999930e+01
-----------------------------------------------------------------------------------------
-----Fractional coordinates of atoms------
AtomId 0: 2.500000000000000000e-01 2.500000000000000000e-01 4.166666666669999741e-01
AtomId 1: 2.500000000000000000e-01 0.000000000000000000e+00 2.500000000000000000e-01
AtomId 2: 0.000000000000000000e+00 2.500000000000000000e-01 1.666666666670000019e-01
AtomId 3: 0.000000000000000000e+00 0.000000000000000000e+00 5.000000000000000000e-01
AtomId 4: 2.500000000000000000e-01 2.500000000000000000e-01 9.166666666670000296e-01
AtomId 5: 2.500000000000000000e-01 0.000000000000000000e+00 7.500000000000000000e-01
AtomId 6: 0.000000000000000000e+00 2.500000000000000000e-01 6.666666666670000296e-01
AtomId 7: 0.000000000000000000e+00 5.000000000000000000e-01 0.000000000000000000e+00
AtomId 8: 2.500000000000000000e-01 7.500000000000000000e-01 4.166666666669999741e-01
AtomId 9: 2.500000000000000000e-01 5.000000000000000000e-01 2.500000000000000000e-01
AtomId 10: 0.000000000000000000e+00 7.500000000000000000e-01 1.666666666670000019e-01
AtomId 11: 0.000000000000000000e+00 5.000000000000000000e-01 5.000000000000000000e-01
AtomId 12: 2.500000000000000000e-01 7.500000000000000000e-01 9.166666666670000296e-01
AtomId 13: 2.500000000000000000e-01 5.000000000000000000e-01 7.500000000000000000e-01
AtomId 14: 0.000000000000000000e+00 7.500000000000000000e-01 6.666666666670000296e-01
AtomId 15: 5.000000000000000000e-01 0.000000000000000000e+00 0.000000000000000000e+00
AtomId 16: 7.500000000000000000e-01 2.500000000000000000e-01 4.166666666669999741e-01
AtomId 17: 7.500000000000000000e-01 0.000000000000000000e+00 2.500000000000000000e-01
AtomId 18: 5.000000000000000000e-01 2.500000000000000000e-01 1.666666666670000019e-01
AtomId 19: 5.000000000000000000e-01 0.000000000000000000e+00 5.000000000000000000e-01
AtomId 20: 7.500000000000000000e-01 2.500000000000000000e-01 9.166666666670000296e-01
AtomId 21: 7.500000000000000000e-01 0.000000000000000000e+00 7.500000000000000000e-01
AtomId 22: 5.000000000000000000e-01 2.500000000000000000e-01 6.666666666670000296e-01
AtomId 23: 5.000000000000000000e-01 5.000000000000000000e-01 0.000000000000000000e+00
AtomId 24: 7.500000000000000000e-01 7.500000000000000000e-01 4.166666666669999741e-01
AtomId 25: 7.500000000000000000e-01 5.000000000000000000e-01 2.500000000000000000e-01
AtomId 26: 5.000000000000000000e-01 7.500000000000000000e-01 1.666666666670000019e-01
AtomId 27: 5.000000000000000000e-01 5.000000000000000000e-01 5.000000000000000000e-01
AtomId 28: 7.500000000000000000e-01 7.500000000000000000e-01 9.166666666670000296e-01
AtomId 29: 7.500000000000000000e-01 5.000000000000000000e-01 7.500000000000000000e-01
AtomId 30: 5.000000000000000000e-01 7.500000000000000000e-01 6.666666666670000296e-01
-----------------------------------------------------------------------------------------
Number Image Charges 2094

Finite element mesh information
-------------------------------------------------
FE interpolating polynomial order for Kohn-Sham eigenvalue problem: 3
FE interpolating polynomial order for electrostatics solve: 3
FE interpolating polynomial order for nodal electron density computation: 5
number of elements: 1440
number of degrees of freedom for the Kohn-Sham eigenvalue problem : 52791
-------------------------------------------------

Setting initial guess for wavefunctions....

Reading initial guess for electron-density.....

Pseudopotential initalization....

Starting SCF iterations....
SCF iterations converged to the specified tolerance after: 15 iterations.

Energy computations (Hartree)
-------------------
Total energy: -1673.62180253

Absolute values of ion forces (Hartree/Bohr)
--------------------------------------------------------------------------------------------
AtomId 0: 0.001805,0.001878,0.013126
AtomId 1: 0.002319,0.000000,0.003373
AtomId 2: 0.000233,0.001707,0.140373
AtomId 3: 0.000194,0.000000,0.004429
AtomId 4: 0.001175,0.001333,0.013673
AtomId 5: 0.001964,0.000000,0.000305
AtomId 6: 0.000233,0.000011,0.138744
AtomId 7: 0.000191,0.000000,0.000954
AtomId 8: 0.001805,0.001877,0.013126
AtomId 9: 0.003732,0.000000,0.001505
AtomId 10: 0.000233,0.001707,0.140373
AtomId 11: 0.000192,0.000000,0.000588
AtomId 12: 0.001175,0.001333,0.013673
AtomId 13: 0.000157,0.000000,0.001220
AtomId 14: 0.000233,0.000011,0.138744
AtomId 15: 0.000004,0.000000,0.001199
AtomId 16: 0.001805,0.001878,0.013126
AtomId 17: 0.002319,0.000000,0.003373
AtomId 18: 0.000000,0.001220,0.140761
AtomId 19: 0.000096,0.000000,0.000014
AtomId 20: 0.001175,0.001333,0.013673
AtomId 21: 0.001964,0.000000,0.000305
AtomId 22: 0.000000,0.002938,0.140140
AtomId 23: 0.000093,0.000000,0.004499
AtomId 24: 0.001805,0.001877,0.013126
AtomId 25: 0.003732,0.000000,0.001505
AtomId 26: 0.000000,0.001220,0.140761
AtomId 27: 0.000000,0.000000,0.000041
AtomId 28: 0.001175,0.001333,0.013673
AtomId 29: 0.000157,0.000000,0.001220
AtomId 30: 0.000000,0.002938,0.140140
--------------------------------------------------------------------------------------------
68 changes: 68 additions & 0 deletions testsGPU/pseudopotential/real/accuracyBenchmarks/outputMg2x_16
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
number of atoms: 31
number of atoms types: 1
Z:12
=============================================================================================================================
number of electrons: 310
number of eigen values: 180
=============================================================================================================================
-----------Simulation Domain bounding vectors (lattice vectors in fully periodic case)-------------
v1 : 1.176399999999999935e+01 0.000000000000000000e+00 0.000000000000000000e+00
v2 : 0.000000000000000000e+00 1.917200000000000060e+01 0.000000000000000000e+00
v3 : 0.000000000000000000e+00 0.000000000000000000e+00 2.037584570023999930e+01
-----------------------------------------------------------------------------------------
-----Fractional coordinates of atoms------
AtomId 0: 2.500000000000000000e-01 2.500000000000000000e-01 4.166666666669999741e-01
AtomId 1: 2.500000000000000000e-01 0.000000000000000000e+00 2.500000000000000000e-01
AtomId 2: 0.000000000000000000e+00 2.500000000000000000e-01 1.666666666670000019e-01
AtomId 3: 0.000000000000000000e+00 0.000000000000000000e+00 5.000000000000000000e-01
AtomId 4: 2.500000000000000000e-01 2.500000000000000000e-01 9.166666666670000296e-01
AtomId 5: 2.500000000000000000e-01 0.000000000000000000e+00 7.500000000000000000e-01
AtomId 6: 0.000000000000000000e+00 2.500000000000000000e-01 6.666666666670000296e-01
AtomId 7: 0.000000000000000000e+00 5.000000000000000000e-01 0.000000000000000000e+00
AtomId 8: 2.500000000000000000e-01 7.500000000000000000e-01 4.166666666669999741e-01
AtomId 9: 2.500000000000000000e-01 5.000000000000000000e-01 2.500000000000000000e-01
AtomId 10: 0.000000000000000000e+00 7.500000000000000000e-01 1.666666666670000019e-01
AtomId 11: 0.000000000000000000e+00 5.000000000000000000e-01 5.000000000000000000e-01
AtomId 12: 2.500000000000000000e-01 7.500000000000000000e-01 9.166666666670000296e-01
AtomId 13: 2.500000000000000000e-01 5.000000000000000000e-01 7.500000000000000000e-01
AtomId 14: 0.000000000000000000e+00 7.500000000000000000e-01 6.666666666670000296e-01
AtomId 15: 5.000000000000000000e-01 0.000000000000000000e+00 0.000000000000000000e+00
AtomId 16: 7.500000000000000000e-01 2.500000000000000000e-01 4.166666666669999741e-01
AtomId 17: 7.500000000000000000e-01 0.000000000000000000e+00 2.500000000000000000e-01
AtomId 18: 5.000000000000000000e-01 2.500000000000000000e-01 1.666666666670000019e-01
AtomId 19: 5.000000000000000000e-01 0.000000000000000000e+00 5.000000000000000000e-01
AtomId 20: 7.500000000000000000e-01 2.500000000000000000e-01 9.166666666670000296e-01
AtomId 21: 7.500000000000000000e-01 0.000000000000000000e+00 7.500000000000000000e-01
AtomId 22: 5.000000000000000000e-01 2.500000000000000000e-01 6.666666666670000296e-01
AtomId 23: 5.000000000000000000e-01 5.000000000000000000e-01 0.000000000000000000e+00
AtomId 24: 7.500000000000000000e-01 7.500000000000000000e-01 4.166666666669999741e-01
AtomId 25: 7.500000000000000000e-01 5.000000000000000000e-01 2.500000000000000000e-01
AtomId 26: 5.000000000000000000e-01 7.500000000000000000e-01 1.666666666670000019e-01
AtomId 27: 5.000000000000000000e-01 5.000000000000000000e-01 5.000000000000000000e-01
AtomId 28: 7.500000000000000000e-01 7.500000000000000000e-01 9.166666666670000296e-01
AtomId 29: 7.500000000000000000e-01 5.000000000000000000e-01 7.500000000000000000e-01
AtomId 30: 5.000000000000000000e-01 7.500000000000000000e-01 6.666666666670000296e-01
-----------------------------------------------------------------------------------------
Number Image Charges 2094

Finite element mesh information
-------------------------------------------------
FE interpolating polynomial order for Kohn-Sham eigenvalue problem: 3
FE interpolating polynomial order for electrostatics solve: 3
FE interpolating polynomial order for nodal electron density computation: 5
number of elements: 1440
number of degrees of freedom for the Kohn-Sham eigenvalue problem : 52791
-------------------------------------------------

Setting initial guess for wavefunctions....

Reading initial guess for electron-density.....

Pseudopotential initalization....

Starting SCF iterations....
SCF iterations converged to the specified tolerance after: 26 iterations.

Energy computations (Hartree)
-------------------
Total energy: -1666.902177
Loading

0 comments on commit c5fb5c7

Please sign in to comment.