diff --git a/doc/manual/parameters.tex b/doc/manual/parameters.tex index e2b7526af..bd4f02624 100644 --- a/doc/manual/parameters.tex +++ b/doc/manual/parameters.tex @@ -2285,6 +2285,23 @@ \subsection{Parameters in section \tt SCF parameters} {\it Description:} [Advanced] Boolean parameter specifying whether to compute the total energy at the end of every SCF. Setting it to false can lead to some computational time savings. Default value is false but is internally set to true if VERBOSITY==5 +{\it Possible values:} A boolean value (true or false) +\item {\it Parameter name:} {\tt USE ENERGY RESIDUAL METRIC} +\phantomsection\label{parameters:SCF parameters/USE ENERGY RESIDUAL METRIC} +\label{parameters:SCF_20parameters/USE_20ENERGY_20RESIDUAL_20METRIC} + + +\index[prmindex]{USE ENERGY RESIDUAL METRIC} +\index[prmindexfull]{SCF parameters!USE ENERGY RESIDUAL METRIC} +{\it Value:} false + + +{\it Default:} false + + +{\it Description:} [Advanced] Boolean parameter specifying whether to use the energy residual metric (equation 7.23 of Richard Matrin second edition) for convergence check. Setting it to false can lead to some computational time savings. Default value is false + + {\it Possible values:} A boolean value (true or false) \item {\it Parameter name:} {\tt CONSTRAINT MAGNETIZATION} \phantomsection\label{parameters:SCF parameters/CONSTRAINT MAGNETIZATION} diff --git a/include/dft.h b/include/dft.h index 15098fef3..0b2b75577 100644 --- a/include/dft.h +++ b/include/dft.h @@ -1437,7 +1437,9 @@ namespace dftfe // std::map> d_phiInValues, // d_phiOutValues; dftfe::utils::MemoryStorage - d_phiInQuadValues, d_phiOutQuadValues; + d_phiInQuadValues, d_phiOutQuadValues; + dftfe::utils::MemoryStorage + d_gradPhiInQuadValues, d_gradPhiOutQuadValues, d_gradPhiResQuadValues; MixingScheme d_mixingScheme; distributedCPUVec d_rhoInNodalValuesRead, d_rhoOutNodalValuesSplit, diff --git a/include/dftParameters.h b/include/dftParameters.h index 7c0d17af0..9774a0dc8 100644 --- a/include/dftParameters.h +++ b/include/dftParameters.h @@ -50,7 +50,8 @@ namespace dftfe double radiusAtomBall, mixingParameter, spinMixingEnhancementFactor; bool adaptAndersonMixingParameter; double absLinearSolverTolerance, selfConsistentSolverTolerance, TVal, - start_magnetization, absLinearSolverToleranceHelmholtz; + selfConsistentSolverEnergyTolerance, start_magnetization, + absLinearSolverToleranceHelmholtz; bool isPseudopotential, periodicX, periodicY, periodicZ, useSymm, timeReversal, pseudoTestsFlag, constraintMagnetization, writeDosFile, @@ -114,6 +115,7 @@ namespace dftfe unsigned int subspaceRotDofsBlockSize; unsigned int nbandGrps; bool computeEnergyEverySCF; + bool useEnergyResidualTolerance; unsigned int scalapackParalProcs; unsigned int scalapackBlockSize; unsigned int natoms; diff --git a/include/energyCalculator.h b/include/energyCalculator.h index ba13a4a4b..5d104fa05 100644 --- a/include/energyCalculator.h +++ b/include/energyCalculator.h @@ -147,6 +147,50 @@ namespace dftfe const bool print, const bool smearedNuclearCharges = false); + double + computeEnergyResidual( + const std::shared_ptr< + dftfe::basis::FEBasisOperations> + &basisOperationsPtr, + const std::shared_ptr< + dftfe::basis:: + FEBasisOperations> + & basisOperationsPtrElectro, + const unsigned int densityQuadratureID, + const unsigned int densityQuadratureIDElectro, + const unsigned int smearedChargeQuadratureIDElectro, + const unsigned int lpspQuadratureIDElectro, + const std::shared_ptr excManagerPtr, + const dftfe::utils::MemoryStorage + &phiTotRhoInValues, + const dftfe::utils::MemoryStorage + & phiTotRhoOutValues, + const distributedCPUVec &phiTotRhoIn, + const distributedCPUVec &phiTotRhoOut, + const std::vector< + dftfe::utils::MemoryStorage> + &densityInValues, + const std::vector< + dftfe::utils::MemoryStorage> + &densityOutValues, + const std::vector< + dftfe::utils::MemoryStorage> + &gradDensityInValues, + const std::vector< + dftfe::utils::MemoryStorage> + & gradDensityOutValues, + const std::map> &rhoCoreValues, + const std::map> &gradRhoCoreValues, + const std::map> &smearedbValues, + const std::map> + & smearedbNonTrivialAtomIds, + const std::vector> &localVselfs, + const std::map + & atomElectrostaticNodeIdToChargeMap, + const bool smearedNuclearCharges); + void computeXCEnergyTermsSpinPolarized( diff --git a/src/dft/dft.cc b/src/dft/dft.cc index 3a20f4067..2e57bea3e 100644 --- a/src/dft/dft.cc +++ b/src/dft/dft.cc @@ -2300,6 +2300,7 @@ namespace dftfe // unsigned int scfIter = 0; double norm = 1.0; + double energyResidual = 1.0; d_rankCurrentLRD = 0; d_relativeErrorJacInvApproxPrevScfLRD = 100.0; // CAUTION: Choosing a looser tolerance might lead to failed tests @@ -2309,8 +2310,7 @@ namespace dftfe pcout << std::endl; if (d_dftParamsPtr->verbosity == 0) pcout << "Starting SCF iterations...." << std::endl; - while ((norm > d_dftParamsPtr->selfConsistentSolverTolerance) && - (scfIter < d_dftParamsPtr->numSCFIterations)) + while (!scfConverged && (scfIter < d_dftParamsPtr->numSCFIterations)) { dealii::Timer local_timer(d_mpiCommParent, true); if (d_dftParamsPtr->verbosity >= 1) @@ -2541,7 +2541,10 @@ namespace dftfe d_phiTotRhoIn = d_phiTotRhoOut; computing_timer.leave_subsection("density mixing"); - if (!(norm > d_dftParamsPtr->selfConsistentSolverTolerance)) + if (!((norm > d_dftParamsPtr->selfConsistentSolverTolerance) || + (d_dftParamsPtr->useEnergyResidualTolerance && + energyResidual > + d_dftParamsPtr->selfConsistentSolverEnergyTolerance))) scfConverged = true; if (d_dftParamsPtr->multipoleBoundaryConditions) @@ -3303,8 +3306,9 @@ namespace dftfe // // phiTot with rhoOut // - if (d_dftParamsPtr->computeEnergyEverySCF && - d_numEigenValuesRR == d_numEigenValues) + if ((d_dftParamsPtr->computeEnergyEverySCF && + d_numEigenValuesRR == d_numEigenValues) || + d_dftParamsPtr->useEnergyResidualTolerance) { if (d_dftParamsPtr->verbosity >= 2) pcout @@ -3406,25 +3410,44 @@ namespace dftfe d_phiTotRhoOut, d_phiOutQuadValues, dummy); - - - // - // impose integral phi equals 0 - // - /* - if(d_dftParamsPtr->periodicX && d_dftParamsPtr->periodicY && - d_dftParamsPtr->periodicZ && !d_dftParamsPtr->pinnedNodeForPBC) - { - if(d_dftParamsPtr->verbosity>=2) - pcout<<"Value of integPhiOut: - "< &quadrature = - matrix_free_data.get_quadrature(d_densityQuadratureId); + } + if (d_dftParamsPtr->useEnergyResidualTolerance) + { + computing_timer.enter_subsection("Energy residual computation"); + energyResidual = energyCalc.computeEnergyResidual( + d_basisOperationsPtrHost, + d_basisOperationsPtrElectroHost, + d_densityQuadratureId, + d_densityQuadratureIdElectro, + d_smearedChargeQuadratureIdElectro, + d_lpspQuadratureIdElectro, + d_excManagerPtr, + d_phiInQuadValues, + d_phiOutQuadValues, + d_phiTotRhoIn, + d_phiTotRhoOut, + d_densityInQuadValues, + d_densityOutQuadValues, + d_gradDensityInQuadValues, + d_gradDensityOutQuadValues, + d_rhoCore, + d_gradRhoCore, + d_bQuadValuesAllAtoms, + d_bCellNonTrivialAtomIds, + d_localVselfs, + d_atomNodeIdToChargeMap, + d_dftParamsPtr->smearedNuclearCharges); + if (d_dftParamsPtr->verbosity >= 1) + pcout << "Energy residual : " << energyResidual << std::endl; + if (d_dftParamsPtr->reproducible_output) + pcout << "Energy residual : " << std::setprecision(4) + << energyResidual << std::endl; + computing_timer.leave_subsection("Energy residual computation"); + } + if (d_dftParamsPtr->computeEnergyEverySCF && + d_numEigenValuesRR == d_numEigenValues) + { d_dispersionCorr.computeDispresionCorrection( atomLocations, d_domainBoundingVectors); const double totalEnergy = energyCalc.computeEnergy( diff --git a/src/dft/energyCalculator.cc b/src/dft/energyCalculator.cc index 6512208fc..df13ae576 100644 --- a/src/dft/energyCalculator.cc +++ b/src/dft/energyCalculator.cc @@ -57,6 +57,37 @@ namespace dftfe } template double + computeFieldTimesDensityResidual( + const std::shared_ptr< + dftfe::basis:: + FEBasisOperations> + & basisOperationsPtr, + const unsigned int quadratureId, + const std::map> &fieldValues, + const dftfe::utils::MemoryStorage + &densityQuadValuesIn, + const dftfe::utils::MemoryStorage + &densityQuadValuesOut) + { + double result = 0.0; + basisOperationsPtr->reinit(0, 0, quadratureId, false); + const unsigned int nQuadsPerCell = basisOperationsPtr->nQuadsPerCell(); + for (unsigned int iCell = 0; iCell < basisOperationsPtr->nCells(); + ++iCell) + { + const std::vector &cellFieldValues = + fieldValues.find(basisOperationsPtr->cellID(iCell))->second; + for (unsigned int iQuad = 0; iQuad < nQuadsPerCell; ++iQuad) + result += + cellFieldValues[iQuad] * + (densityQuadValuesOut[iCell * nQuadsPerCell + iQuad] - + densityQuadValuesIn[iCell * nQuadsPerCell + iQuad]) * + basisOperationsPtr->JxWBasisData()[iCell * nQuadsPerCell + iQuad]; + } + return result; + } + template + double computeFieldTimesDensity( const std::shared_ptr< dftfe::basis:: @@ -82,6 +113,36 @@ namespace dftfe } return result; } + template + double + computeFieldTimesDensityResidual( + const std::shared_ptr< + dftfe::basis:: + FEBasisOperations> + & basisOperationsPtr, + const unsigned int quadratureId, + const dftfe::utils::MemoryStorage + &fieldValues, + const dftfe::utils::MemoryStorage + &densityQuadValuesIn, + const dftfe::utils::MemoryStorage + &densityQuadValuesOut) + { + double result = 0.0; + basisOperationsPtr->reinit(0, 0, quadratureId, false); + const unsigned int nQuadsPerCell = basisOperationsPtr->nQuadsPerCell(); + for (unsigned int iCell = 0; iCell < basisOperationsPtr->nCells(); + ++iCell) + { + for (unsigned int iQuad = 0; iQuad < nQuadsPerCell; ++iQuad) + result += + fieldValues[iCell * nQuadsPerCell + iQuad] * + (densityQuadValuesOut[iCell * nQuadsPerCell + iQuad] - + densityQuadValuesIn[iCell * nQuadsPerCell + iQuad]) * + basisOperationsPtr->JxWBasisData()[iCell * nQuadsPerCell + iQuad]; + } + return result; + } void printEnergy(const double bandEnergy, const double totalkineticEnergy, @@ -374,6 +435,71 @@ namespace dftfe return 0.5 * (phiContribution - vSelfContribution); } + double + nuclearElectrostaticEnergyResidualLocal( + const distributedCPUVec & phiTotRhoIn, + const distributedCPUVec & phiTotRhoOut, + const std::map> &smearedbValues, + const std::map> + & smearedbNonTrivialAtomIds, + const dealii::DoFHandler<3> &dofHandlerElectrostatic, + const dealii::Quadrature<3> &quadratureSmearedCharge, + const std::map + & atomElectrostaticNodeIdToChargeMap, + const bool smearedNuclearCharges = false) + { + double phiContribution = 0.0, vSelfContribution = 0.0; + + if (!smearedNuclearCharges) + { + for (std::map::const_iterator + it = atomElectrostaticNodeIdToChargeMap.begin(); + it != atomElectrostaticNodeIdToChargeMap.end(); + ++it) + phiContribution += + (-it->second) * (phiTotRhoOut(it->first) - + phiTotRhoIn(it->first)); //-charge*potential + } + else + { + distributedCPUVec phiRes; + phiRes = phiTotRhoOut; + phiRes -= phiTotRhoIn; + dealii::FEValues<3> fe_values(dofHandlerElectrostatic.get_fe(), + quadratureSmearedCharge, + dealii::update_values | + dealii::update_JxW_values); + const unsigned int n_q_points = quadratureSmearedCharge.size(); + dealii::DoFHandler<3>::active_cell_iterator + cell = dofHandlerElectrostatic.begin_active(), + endc = dofHandlerElectrostatic.end(); + + for (; cell != endc; ++cell) + if (cell->is_locally_owned()) + { + if ((smearedbNonTrivialAtomIds.find(cell->id())->second) + .size() > 0) + { + const std::vector &bQuadValuesCell = + smearedbValues.find(cell->id())->second; + fe_values.reinit(cell); + + std::vector tempPhiTot(n_q_points); + fe_values.get_function_values(phiRes, tempPhiTot); + + double temp = 0; + for (unsigned int q = 0; q < n_q_points; ++q) + temp += + tempPhiTot[q] * bQuadValuesCell[q] * fe_values.JxW(q); + + phiContribution += temp; + } + } + } + + return 0.5 * (phiContribution); + } + double computeRepulsiveEnergy( @@ -627,6 +753,154 @@ namespace dftfe return totalEnergy; } + // compute energie residual, + // E_KS-E_HWF=\int(V_{in}(\rho_{out}-\rho_{in}))+E_{pot}[\rho_{out}]-E_{pot}[\rho_{in}] + double + energyCalculator::computeEnergyResidual( + const std::shared_ptr< + dftfe::basis::FEBasisOperations> + &basisOperationsPtr, + const std::shared_ptr< + dftfe::basis:: + FEBasisOperations> + & basisOperationsPtrElectro, + const unsigned int densityQuadratureID, + const unsigned int densityQuadratureIDElectro, + const unsigned int smearedChargeQuadratureIDElectro, + const unsigned int lpspQuadratureIDElectro, + const std::shared_ptr excManagerPtr, + const dftfe::utils::MemoryStorage + &phiTotRhoInValues, + const dftfe::utils::MemoryStorage + & phiTotRhoOutValues, + const distributedCPUVec &phiTotRhoIn, + const distributedCPUVec &phiTotRhoOut, + const std::vector< + dftfe::utils::MemoryStorage> + &densityInValues, + const std::vector< + dftfe::utils::MemoryStorage> + &densityOutValues, + const std::vector< + dftfe::utils::MemoryStorage> + &gradDensityInValues, + const std::vector< + dftfe::utils::MemoryStorage> + & gradDensityOutValues, + const std::map> &rhoCoreValues, + const std::map> &gradRhoCoreValues, + const std::map> &smearedbValues, + const std::map> + & smearedbNonTrivialAtomIds, + const std::vector> &localVselfs, + const std::map + & atomElectrostaticNodeIdToChargeMap, + const bool smearedNuclearCharges) + { + const dealii::ConditionalOStream scout( + std::cout, + (dealii::Utilities::MPI::this_mpi_process(mpi_communicator) == 0)); + double excCorrPotentialTimesRho = 0.0, electrostaticPotentialTimesRho = 0.0, + exchangeEnergy = 0.0, correlationEnergy = 0.0, + electrostaticEnergyTotPot = 0.0; + + + electrostaticPotentialTimesRho = + internal::computeFieldTimesDensityResidual(basisOperationsPtr, + densityQuadratureID, + phiTotRhoInValues, + densityInValues[0], + densityOutValues[0]); + electrostaticEnergyTotPot = + 0.5 * (internal::computeFieldTimesDensity(basisOperationsPtrElectro, + densityQuadratureIDElectro, + phiTotRhoOutValues, + densityOutValues[0]) - + internal::computeFieldTimesDensity(basisOperationsPtrElectro, + densityQuadratureIDElectro, + phiTotRhoInValues, + densityInValues[0])); + if (d_dftParams.spinPolarized == 1) + computeXCEnergyTermsSpinPolarized(basisOperationsPtr, + densityQuadratureID, + excManagerPtr, + densityInValues, + densityInValues, + gradDensityInValues, + gradDensityInValues, + rhoCoreValues, + gradRhoCoreValues, + exchangeEnergy, + correlationEnergy, + excCorrPotentialTimesRho); + else + computeXCEnergyTerms(basisOperationsPtr, + densityQuadratureID, + excManagerPtr, + densityInValues, + densityInValues, + gradDensityInValues, + gradDensityInValues, + rhoCoreValues, + gradRhoCoreValues, + exchangeEnergy, + correlationEnergy, + excCorrPotentialTimesRho); + excCorrPotentialTimesRho *= -1.0; + exchangeEnergy *= -1.0; + correlationEnergy *= -1.0; + if (d_dftParams.spinPolarized == 1) + computeXCEnergyTermsSpinPolarized(basisOperationsPtr, + densityQuadratureID, + excManagerPtr, + densityInValues, + densityOutValues, + gradDensityInValues, + gradDensityOutValues, + rhoCoreValues, + gradRhoCoreValues, + exchangeEnergy, + correlationEnergy, + excCorrPotentialTimesRho); + else + computeXCEnergyTerms(basisOperationsPtr, + densityQuadratureID, + excManagerPtr, + densityInValues, + densityOutValues, + gradDensityInValues, + gradDensityOutValues, + rhoCoreValues, + gradRhoCoreValues, + exchangeEnergy, + correlationEnergy, + excCorrPotentialTimesRho); + const double potentialTimesRho = + excCorrPotentialTimesRho + electrostaticPotentialTimesRho; + const double nuclearElectrostaticEnergy = + internal::nuclearElectrostaticEnergyResidualLocal( + phiTotRhoIn, + phiTotRhoOut, + smearedbValues, + smearedbNonTrivialAtomIds, + basisOperationsPtrElectro->getDofHandler(), + basisOperationsPtrElectro->matrixFreeData().get_quadrature( + smearedChargeQuadratureIDElectro), + atomElectrostaticNodeIdToChargeMap, + smearedNuclearCharges); + + double energy = -potentialTimesRho + exchangeEnergy + correlationEnergy + + electrostaticEnergyTotPot + nuclearElectrostaticEnergy; + + + // sum over all processors + double totalEnergy = dealii::Utilities::MPI::sum(energy, mpi_communicator); + + return std::abs(totalEnergy); + } + void energyCalculator::computeXCEnergyTermsSpinPolarized( const std::shared_ptr< diff --git a/src/dft/psiInitialGuess.cc b/src/dft/psiInitialGuess.cc index 171de2205..83489ccba 100644 --- a/src/dft/psiInitialGuess.cc +++ b/src/dft/psiInitialGuess.cc @@ -82,7 +82,7 @@ namespace dftfe DFTFE_PATH, Z, n, - l); + l); } } else diff --git a/src/linAlg/mixingClass.cc b/src/linAlg/mixingClass.cc index 329c77bfe..bd45c0473 100644 --- a/src/linAlg/mixingClass.cc +++ b/src/linAlg/mixingClass.cc @@ -261,19 +261,23 @@ namespace dftfe d_mixingParameter[key] *= ci; } if (d_verbosity > 0) - if (d_adaptMixingParameter[mixingVariable::rho]) - pcout << "Adaptive Anderson mixing parameter for Rho: " - << d_mixingParameter[mixingVariable::rho] << std::endl; - else - pcout << "Anderson mixing parameter for Rho: " - << d_mixingParameter[mixingVariable::rho] << std::endl; - if (d_verbosity > 0) - if (d_adaptMixingParameter[mixingVariable::magZ]) - pcout << "Adaptive Anderson mixing parameter for magZ: " - << d_mixingParameter[mixingVariable::rho] << std::endl; - else - pcout << "Anderson mixing parameter for magZ: " - << d_mixingParameter[mixingVariable::magZ] << std::endl; + for (const auto &[key, value] : d_variableHistoryIn) + { + if (key == mixingVariable::rho && + d_adaptMixingParameter[mixingVariable::rho]) + pcout << "Adaptive Anderson mixing parameter for Rho: " + << d_mixingParameter[mixingVariable::rho] << std::endl; + else if (key == mixingVariable::rho) + pcout << "Anderson mixing parameter for Rho: " + << d_mixingParameter[mixingVariable::rho] << std::endl; + if (key == mixingVariable::magZ && + d_adaptMixingParameter[mixingVariable::magZ]) + pcout << "Adaptive Anderson mixing parameter for magZ: " + << d_mixingParameter[mixingVariable::magZ] << std::endl; + else if (key == mixingVariable::magZ) + pcout << "Anderson mixing parameter for magZ: " + << d_mixingParameter[mixingVariable::magZ] << std::endl; + } } // Fucntions to add to the history diff --git a/testsGPU/pseudopotential/complex/accuracyBenchmarks/outputMg2x_8 b/testsGPU/pseudopotential/complex/accuracyBenchmarks/outputMg2x_8 new file mode 100644 index 000000000..e8474b169 --- /dev/null +++ b/testsGPU/pseudopotential/complex/accuracyBenchmarks/outputMg2x_8 @@ -0,0 +1,154 @@ +number of atoms: 31 +number of atoms types: 1 +Z:12 +============================================================================================================================= +number of electrons: 310 +number of eigen values: 180 +============================================================================================================================= +Total number of k-points 2 +-----------Simulation Domain bounding vectors (lattice vectors in fully periodic case)------------- +v1 : 1.176399999999999935e+01 0.000000000000000000e+00 0.000000000000000000e+00 +v2 : 0.000000000000000000e+00 1.917200000000000060e+01 0.000000000000000000e+00 +v3 : 0.000000000000000000e+00 0.000000000000000000e+00 2.037584570023999930e+01 +----------------------------------------------------------------------------------------- +-----Fractional coordinates of atoms------ +AtomId 0: 2.500000000000000000e-01 2.500000000000000000e-01 4.166666666669999741e-01 +AtomId 1: 2.500000000000000000e-01 0.000000000000000000e+00 2.500000000000000000e-01 +AtomId 2: 0.000000000000000000e+00 2.500000000000000000e-01 1.666666666670000019e-01 +AtomId 3: 0.000000000000000000e+00 0.000000000000000000e+00 5.000000000000000000e-01 +AtomId 4: 2.500000000000000000e-01 2.500000000000000000e-01 9.166666666670000296e-01 +AtomId 5: 2.500000000000000000e-01 0.000000000000000000e+00 7.500000000000000000e-01 +AtomId 6: 0.000000000000000000e+00 2.500000000000000000e-01 6.666666666670000296e-01 +AtomId 7: 0.000000000000000000e+00 5.000000000000000000e-01 0.000000000000000000e+00 +AtomId 8: 2.500000000000000000e-01 7.500000000000000000e-01 4.166666666669999741e-01 +AtomId 9: 2.500000000000000000e-01 5.000000000000000000e-01 2.500000000000000000e-01 +AtomId 10: 0.000000000000000000e+00 7.500000000000000000e-01 1.666666666670000019e-01 +AtomId 11: 0.000000000000000000e+00 5.000000000000000000e-01 5.000000000000000000e-01 +AtomId 12: 2.500000000000000000e-01 7.500000000000000000e-01 9.166666666670000296e-01 +AtomId 13: 2.500000000000000000e-01 5.000000000000000000e-01 7.500000000000000000e-01 +AtomId 14: 0.000000000000000000e+00 7.500000000000000000e-01 6.666666666670000296e-01 +AtomId 15: 5.000000000000000000e-01 0.000000000000000000e+00 0.000000000000000000e+00 +AtomId 16: 7.500000000000000000e-01 2.500000000000000000e-01 4.166666666669999741e-01 +AtomId 17: 7.500000000000000000e-01 0.000000000000000000e+00 2.500000000000000000e-01 +AtomId 18: 5.000000000000000000e-01 2.500000000000000000e-01 1.666666666670000019e-01 +AtomId 19: 5.000000000000000000e-01 0.000000000000000000e+00 5.000000000000000000e-01 +AtomId 20: 7.500000000000000000e-01 2.500000000000000000e-01 9.166666666670000296e-01 +AtomId 21: 7.500000000000000000e-01 0.000000000000000000e+00 7.500000000000000000e-01 +AtomId 22: 5.000000000000000000e-01 2.500000000000000000e-01 6.666666666670000296e-01 +AtomId 23: 5.000000000000000000e-01 5.000000000000000000e-01 0.000000000000000000e+00 +AtomId 24: 7.500000000000000000e-01 7.500000000000000000e-01 4.166666666669999741e-01 +AtomId 25: 7.500000000000000000e-01 5.000000000000000000e-01 2.500000000000000000e-01 +AtomId 26: 5.000000000000000000e-01 7.500000000000000000e-01 1.666666666670000019e-01 +AtomId 27: 5.000000000000000000e-01 5.000000000000000000e-01 5.000000000000000000e-01 +AtomId 28: 7.500000000000000000e-01 7.500000000000000000e-01 9.166666666670000296e-01 +AtomId 29: 7.500000000000000000e-01 5.000000000000000000e-01 7.500000000000000000e-01 +AtomId 30: 5.000000000000000000e-01 7.500000000000000000e-01 6.666666666670000296e-01 +----------------------------------------------------------------------------------------- +Number Image Charges 2094 + +Finite element mesh information +------------------------------------------------- +FE interpolating polynomial order for Kohn-Sham eigenvalue problem: 3 +FE interpolating polynomial order for electrostatics solve: 3 +FE interpolating polynomial order for nodal electron density computation: 5 +number of elements: 1440 +number of degrees of freedom for the Kohn-Sham eigenvalue problem : 52791 +------------------------------------------------- + +Setting initial guess for wavefunctions.... + +Reading initial guess for electron-density..... + +Pseudopotential initalization.... + +Starting SCF iterations.... +Energy residual : 2.8460e+01 +Energy residual : 2.2528e+01 +Energy residual : 2.1117e+00 +Energy residual : 9.6181e-01 +Energy residual : 2.5866e-01 +Energy residual : 1.4833e-01 +Energy residual : 5.0433e-02 +Energy residual : 7.0491e-02 +Energy residual : 6.3258e-02 +Energy residual : 4.8374e-02 +Energy residual : 3.6016e-02 +Energy residual : 3.6163e-02 +Energy residual : 3.1624e-02 +Energy residual : 2.6834e-02 +Energy residual : 1.6281e-02 +Energy residual : 1.3537e-02 +Energy residual : 6.1284e-03 +Energy residual : 3.8847e-03 +Energy residual : 9.1916e-04 +Energy residual : 4.9047e-04 +Energy residual : 2.3086e-04 +Energy residual : 4.0514e-04 +Energy residual : 3.5866e-04 +Energy residual : 3.0853e-04 +Energy residual : 1.3754e-04 +Energy residual : 2.2222e-04 +Energy residual : 1.5146e-04 +Energy residual : 1.1316e-04 +Energy residual : 1.0185e-04 +Energy residual : 7.0003e-05 +Energy residual : 4.6342e-05 +Energy residual : 2.7636e-05 +Energy residual : 1.5984e-05 +Energy residual : 3.3789e-06 +Energy residual : 1.0235e-05 +Energy residual : 5.1345e-06 +Energy residual : 6.5343e-06 +Energy residual : 4.0463e-06 +Energy residual : 1.8893e-06 +Energy residual : 4.4723e-07 +Energy residual : 3.5242e-07 +Energy residual : 7.1301e-07 +Energy residual : 6.6343e-07 +Energy residual : 5.2183e-07 +Energy residual : 2.3832e-07 +Energy residual : 2.5120e-07 +Energy residual : 1.5266e-07 +Energy residual : 1.1084e-07 +Energy residual : 7.5416e-08 +Energy residual : 4.2787e-08 +SCF iterations converged to the specified tolerance after: 50 iterations. + +Energy computations (Hartree) +------------------- + Total energy: -1673.56610057 + +Absolute values of ion forces (Hartree/Bohr) +-------------------------------------------------------------------------------------------- +AtomId 0: 0.000820,0.001892,0.012176 +AtomId 1: 0.001957,0.000000,0.000544 +AtomId 2: 0.000000,0.001558,0.141588 +AtomId 3: 0.000050,0.000000,0.001169 +AtomId 4: 0.000591,0.001197,0.010672 +AtomId 5: 0.002144,0.000000,0.000842 +AtomId 6: 0.000000,0.000888,0.144485 +AtomId 7: 0.000007,0.000000,0.001078 +AtomId 8: 0.000820,0.001892,0.012176 +AtomId 9: 0.000298,0.000000,0.001855 +AtomId 10: 0.000000,0.001558,0.141588 +AtomId 11: 0.000007,0.000000,0.000027 +AtomId 12: 0.000591,0.001197,0.010672 +AtomId 13: 0.000615,0.000000,0.002027 +AtomId 14: 0.000000,0.000889,0.144485 +AtomId 15: 0.000048,0.000000,0.001309 +AtomId 16: 0.000820,0.001892,0.012176 +AtomId 17: 0.001957,0.000000,0.000544 +AtomId 18: 0.000059,0.001056,0.143329 +AtomId 19: 0.000039,0.000000,0.000261 +AtomId 20: 0.000591,0.001197,0.010672 +AtomId 21: 0.002144,0.000000,0.000842 +AtomId 22: 0.000059,0.001258,0.144065 +AtomId 23: 0.000007,0.000000,0.000710 +AtomId 24: 0.000820,0.001892,0.012176 +AtomId 25: 0.000298,0.000000,0.001855 +AtomId 26: 0.000059,0.001056,0.143329 +AtomId 27: 0.000007,0.000000,0.000171 +AtomId 28: 0.000591,0.001197,0.010672 +AtomId 29: 0.000615,0.000000,0.002027 +AtomId 30: 0.000059,0.001258,0.144065 +-------------------------------------------------------------------------------------------- diff --git a/testsGPU/pseudopotential/complex/jobscripts/crusher.slurm b/testsGPU/pseudopotential/complex/jobscripts/crusher.slurm index 15df4fb3c..cb33089d2 100644 --- a/testsGPU/pseudopotential/complex/jobscripts/crusher.slurm +++ b/testsGPU/pseudopotential/complex/jobscripts/crusher.slurm @@ -28,5 +28,7 @@ srun -n 6 -c 1 ./dftfe parameterFileMg2x_6.prm > outputMg2x_6 srun -n 6 -c 1 ./dftfe parameterFileMg2x_7.prm > outputMg2x_7 +srun -n 6 -c 1 ./dftfe parameterFileMg2x_8.prm > outputMg2x_8 + srun -n 6 -c 1 ./dftfe parameterFileBe.prm > outputBe diff --git a/testsGPU/pseudopotential/complex/jobscripts/frontierJobScript6GCDs6MPITasks.rc b/testsGPU/pseudopotential/complex/jobscripts/frontierJobScript6GCDs6MPITasks.rc index 8de363644..153ea4342 100644 --- a/testsGPU/pseudopotential/complex/jobscripts/frontierJobScript6GCDs6MPITasks.rc +++ b/testsGPU/pseudopotential/complex/jobscripts/frontierJobScript6GCDs6MPITasks.rc @@ -30,4 +30,5 @@ srun -n 6 -c 7 --gpu-bind closest $BASE/dftfe parameterFileMg2x_4.prm > outputMg srun -n 6 -c 7 --gpu-bind closest $BASE/dftfe parameterFileMg2x_5.prm > outputMg2x_5 srun -n 6 -c 7 --gpu-bind closest $BASE/dftfe parameterFileMg2x_6.prm > outputMg2x_6 srun -n 6 -c 7 --gpu-bind closest $BASE/dftfe parameterFileMg2x_7.prm > outputMg2x_7 +srun -n 6 -c 7 --gpu-bind closest $BASE/dftfe parameterFileMg2x_8.prm > outputMg2x_8 srun -n 6 -c 7 --gpu-bind closest $BASE/dftfe parameterFileBe.prm > outputBe diff --git a/testsGPU/pseudopotential/complex/jobscripts/matrixlabgpu18Tasks6GPUs.slurm b/testsGPU/pseudopotential/complex/jobscripts/matrixlabgpu18Tasks6GPUs.slurm index 27e23dd78..e5639ffeb 100644 --- a/testsGPU/pseudopotential/complex/jobscripts/matrixlabgpu18Tasks6GPUs.slurm +++ b/testsGPU/pseudopotential/complex/jobscripts/matrixlabgpu18Tasks6GPUs.slurm @@ -29,4 +29,5 @@ srun -n $SLURM_NTASKS --mpi=pmi2 $DFTFE_PATH/dftfe parameterFileMg2x_4.prm > out srun -n $SLURM_NTASKS --mpi=pmi2 $DFTFE_PATH/dftfe parameterFileMg2x_5.prm > outputMg2x_5 srun -n $SLURM_NTASKS --mpi=pmi2 $DFTFE_PATH/dftfe parameterFileMg2x_6.prm > outputMg2x_6 srun -n $SLURM_NTASKS --mpi=pmi2 $DFTFE_PATH/dftfe parameterFileMg2x_7.prm > outputMg2x_7 +srun -n $SLURM_NTASKS --mpi=pmi2 $DFTFE_PATH/dftfe parameterFileMg2x_8.prm > outputMg2x_8 srun -n $SLURM_NTASKS --mpi=pmi2 $DFTFE_PATH/dftfe parameterFileBe.prm > outputBe \ No newline at end of file diff --git a/testsGPU/pseudopotential/complex/jobscripts/perlmutter.slurm b/testsGPU/pseudopotential/complex/jobscripts/perlmutter.slurm index d688032eb..537065377 100644 --- a/testsGPU/pseudopotential/complex/jobscripts/perlmutter.slurm +++ b/testsGPU/pseudopotential/complex/jobscripts/perlmutter.slurm @@ -33,5 +33,7 @@ srun ./dftfe parameterFileMg2x_6.prm > outputMg2x_6 srun ./dftfe parameterFileMg2x_7.prm > outputMg2x_7 +srun ./dftfe parameterFileMg2x_8.prm > outputMg2x_8 + srun -n 6 -c 1 ./dftfe parameterFileBe.prm > outputBe diff --git a/testsGPU/pseudopotential/complex/jobscripts/summit.lsf b/testsGPU/pseudopotential/complex/jobscripts/summit.lsf index d2c1bc50f..334434fa2 100644 --- a/testsGPU/pseudopotential/complex/jobscripts/summit.lsf +++ b/testsGPU/pseudopotential/complex/jobscripts/summit.lsf @@ -25,4 +25,6 @@ jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 -d packed -b packed:7 ./dftfe p jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 -d packed -b packed:7 ./dftfe parameterFileMg2x_7.prm > outputMg2x_7 +jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 -d packed -b packed:7 ./dftfe parameterFileMg2x_8.prm > outputMg2x_8 + jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 -d packed -b packed:7 ./dftfe parameterFileBe.prm > outputBe diff --git a/testsGPU/pseudopotential/complex/parameterFileMg2x_8.prm b/testsGPU/pseudopotential/complex/parameterFileMg2x_8.prm new file mode 100644 index 000000000..ec9136862 --- /dev/null +++ b/testsGPU/pseudopotential/complex/parameterFileMg2x_8.prm @@ -0,0 +1,86 @@ +set VERBOSITY = 0 +set REPRODUCIBLE OUTPUT=true +set USE GPU=true +subsection GPU + set AUTO GPU BLOCK SIZES=false + set USE GPUDIRECT MPI ALL REDUCE = true + set USE ELPA GPU KERNEL=false +end + +subsection Boundary conditions + set SMEARED NUCLEAR CHARGES=true + set FLOATING NUCLEAR CHARGES=true + set CONSTRAINTS FROM SERIAL DOFHANDLER = false + set CONSTRAINTS PARALLEL CHECK = false + set PERIODIC1 = true + set PERIODIC2 = true + set PERIODIC3 = true + set SELF POTENTIAL RADIUS = 3.0 + set POINT WISE DIRICHLET CONSTRAINT =false +end + + +subsection DFT functional parameters + set EXCHANGE CORRELATION TYPE = 4 + set PSEUDOPOTENTIAL CALCULATION = true + set PSEUDOPOTENTIAL FILE NAMES LIST = pseudoMg.inp + set SPIN POLARIZATION=1 + set START MAGNETIZATION=0.1 +end + + +subsection Finite element mesh parameters + set POLYNOMIAL ORDER = 3 + subsection Auto mesh generation parameters + set AUTO ADAPT BASE MESH SIZE=false + set ATOM BALL RADIUS = 2.0 + set BASE MESH SIZE = 4.0 + set MESH SIZE AROUND ATOM = 1.0 + set MESH SIZE AT ATOM = 1.0 + end +end + +subsection Geometry + set NATOMS=31 + set NATOM TYPES=1 + set ATOMIC COORDINATES FILE = coordinatesMg2x.inp + set DOMAIN VECTORS FILE = domainVectorsMg2x.inp + subsection Optimization + set ION FORCE = true + end +end + +subsection Brillouin zone k point sampling options + set USE TIME REVERSAL SYMMETRY = false + subsection Monkhorst-Pack (MP) grid generation + set SAMPLING POINTS 1 = 2 + set SAMPLING POINTS 2 = 1 + set SAMPLING POINTS 3 = 1 + set SAMPLING SHIFT 1 = 1 + set SAMPLING SHIFT 2 = 0 + set SAMPLING SHIFT 3 = 0 + end +end + + +subsection Parallelization + set NPBAND=1 + set NPKPT=2 +end + +subsection SCF parameters + set COMPUTE ENERGY EACH ITER = false + set MAXIMUM ITERATIONS = 100 + set TEMPERATURE = 500 + set TOLERANCE = 1e-6 + set STARTING WFC=ATOMIC + set USE ENERGY RESIDUAL METRIC = true + subsection Eigen-solver parameters + set CHEBYSHEV POLYNOMIAL DEGREE = 20 + set NUMBER OF KOHN-SHAM WAVEFUNCTIONS = 180 + set CHEBY WFC BLOCK SIZE=90 + set WFC BLOCK SIZE=90 + set CHEBYSHEV FILTER TOLERANCE=1e-5 + set SCALAPACKPROCS=2 + end +end diff --git a/utils/dftParameters.cc b/utils/dftParameters.cc index aca4cf417..a6fa9d58b 100644 --- a/utils/dftParameters.cc +++ b/utils/dftParameters.cc @@ -734,6 +734,12 @@ namespace dftfe dealii::Patterns::Double(1e-12, 1.0), "[Standard] SCF iterations stopping tolerance in terms of $L_2$ norm of the electron-density difference between two successive iterations. The default tolerance of is set to a tight value of 1e-5 for accurate ionic forces and cell stresses keeping structural optimization and molecular dynamics in mind. A tolerance of 1e-4 would be accurate enough for calculations without structural optimization and dynamics. CAUTION: A tolerance close to 1e-7 or lower can deteriorate the SCF convergence due to the round-off error accumulation."); + prm.declare_entry( + "ENERGY TOLERANCE", + "1e-07", + dealii::Patterns::Double(1e-12, 1.0), + "[Standard] SCF iterations stopping tolerance in terms of difference between Harris-Foulkes and Kohn-Sham energies. The default tolerance of is set to a tight value of 1e-7 for accurate ionic forces and cell stresses keeping structural optimization and molecular dynamics in mind. A tolerance of 1e-6 would be accurate enough for calculations without structural optimization and dynamics."); + prm.declare_entry( "MIXING HISTORY", "10", @@ -749,7 +755,7 @@ namespace dftfe prm.declare_entry( "SPIN MIXING ENHANCEMENT FACTOR", "4.0", - dealii::Patterns::Double(-1e-12, 10.0), + dealii::Patterns::Double(-1e-12, 100.0), "[Standard] Scales the mixing parameter for the spin densities as SPIN MIXING ENHANCEMENT FACTOR times MIXING PARAMETER. This parameter is not used for LOW\_RANK\_DIELECM\_PRECOND mixing method."); prm.declare_entry( @@ -802,6 +808,12 @@ namespace dftfe dealii::Patterns::Bool(), "[Advanced] Boolean parameter specifying whether to compute the total energy at the end of every SCF. Setting it to false can lead to some computational time savings. Default value is false but is internally set to true if VERBOSITY==5"); + prm.declare_entry( + "USE ENERGY RESIDUAL METRIC", + "false", + dealii::Patterns::Bool(), + "[Advanced] Boolean parameter specifying whether to use the energy residual metric (equation 7.23 of Richard Matrin second edition) for convergence check. Setting it to false can lead to some computational time savings. Default value is false"); + prm.enter_subsection("LOW RANK DIELECM PRECOND"); { @@ -1557,19 +1569,21 @@ namespace dftfe TVal = prm.get_double("TEMPERATURE"); numSCFIterations = prm.get_integer("MAXIMUM ITERATIONS"); selfConsistentSolverTolerance = prm.get_double("TOLERANCE"); - mixingHistory = prm.get_integer("MIXING HISTORY"); - mixingParameter = prm.get_double("MIXING PARAMETER"); + selfConsistentSolverEnergyTolerance = prm.get_double("ENERGY TOLERANCE"); + mixingHistory = prm.get_integer("MIXING HISTORY"); + mixingParameter = prm.get_double("MIXING PARAMETER"); spinMixingEnhancementFactor = prm.get_double("SPIN MIXING ENHANCEMENT FACTOR"); adaptAndersonMixingParameter = prm.get_bool("ADAPT ANDERSON MIXING PARAMETER"); - kerkerParameter = prm.get_double("KERKER MIXING PARAMETER"); - restaFermiWavevector = prm.get_double("RESTA FERMI WAVEVECTOR"); - restaScreeningLength = prm.get_double("RESTA SCREENING LENGTH"); - mixingMethod = prm.get("MIXING METHOD"); - constraintMagnetization = prm.get_bool("CONSTRAINT MAGNETIZATION"); - startingWFCType = prm.get("STARTING WFC"); - computeEnergyEverySCF = prm.get_bool("COMPUTE ENERGY EACH ITER"); + kerkerParameter = prm.get_double("KERKER MIXING PARAMETER"); + restaFermiWavevector = prm.get_double("RESTA FERMI WAVEVECTOR"); + restaScreeningLength = prm.get_double("RESTA SCREENING LENGTH"); + mixingMethod = prm.get("MIXING METHOD"); + constraintMagnetization = prm.get_bool("CONSTRAINT MAGNETIZATION"); + startingWFCType = prm.get("STARTING WFC"); + computeEnergyEverySCF = prm.get_bool("COMPUTE ENERGY EACH ITER"); + useEnergyResidualTolerance = prm.get_bool("USE ENERGY RESIDUAL METRIC"); prm.enter_subsection("LOW RANK DIELECM PRECOND"); {