You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
This was my quick "get my feet wet" try at using autodiff. It tries to train a simple fully connetd neural network, implemented as a template class. Everything copiles, but it hangs when computing the gradient, in what looks like an infinite recursion.
I let it run for an hour and it never completed.
There could be math buigs in the code, I was never able to run it past the gradient calculation (I realize that this "neural network" is missing an activation function), but that wouldn't have anything to do with the crash.
autodiff_test.exe!autodiff::detail::MulExpr<float>::propagate(const float & wprime) Line 384 C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237 C++
autodiff_test.exe!autodiff::detail::AddExpr<float>::propagate(const float & wprime) Line 329 C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237 C++
autodiff_test.exe!autodiff::detail::AddExpr<float>::propagate(const float & wprime) Line 330 C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237 C++
autodiff_test.exe!autodiff::detail::AddExpr<float>::propagate(const float & wprime) Line 329 C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237 C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237 C++
autodiff_test.exe!autodiff::detail::MulExpr<float>::propagate(const float & wprime) Line 386 C++
...
You can see that this code mostly consists of indexing operations to cast elements of the parameter vector to matrices, and that I rangecheck those conversions.
Note: I was able to get a much more trivial reverse mode diff problem to work, one without temp vars and only 3 params. But it fell apart when I tried something more substantive. I suspect some issue with expression templates?
using namespace autodiff;
using varf = Variable<float>;
// Declare float versions of eigen types
AUTODIFF_DEFINE_EIGEN_TYPEDEFS_ALL_SIZES( autodiff::Variable<float>, varf );
template<int nNumRows, int nNumCols>
INLINE auto CastVectorToMatrix( auto &matinput, int nStartingOffset, int nLimit )
{
Assert( nStartingOffset + nNumCols * nNumRows <= nLimit );
using T = std::remove_reference<decltype( matinput( 0, 0 ) )>::type;
return Eigen::Map<Eigen::Matrix<T, nNumRows, nNumCols>>( matinput.data() + nStartingOffset );
}
template<int NUM_INPUTS, int NUM_OUTPUTS, int NUM_HIDDEN_LAYERS, int HIDDEN_LAYER_WIDTH>
struct CNetworkParms
{
static constexpr int nNumWeights = ( NUM_INPUTS * HIDDEN_LAYER_WIDTH ) + ( NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH * HIDDEN_LAYER_WIDTH )
+ ( HIDDEN_LAYER_WIDTH * NUM_OUTPUTS );
static constexpr int nNumBiases = ( HIDDEN_LAYER_WIDTH + NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH + NUM_OUTPUTS );
static constexpr int nNumParms = nNumWeights + nNumBiases;
Eigen::Matrix<float, nNumParms, 1> m_flParams;
// accessors to get at the submatrices
inline auto InputLayerWeights( auto &flParams)
{
return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, NUM_INPUTS>( m_flParams, 0, nNumParms );
}
inline auto InputLayerBiases( auto &flParams)
{
return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, 1>( m_flParams, nNumWeights, nNumParms ); // biases stored after weights
}
inline auto HiddenLayerWeights( auto &flParams, int nLayerIndex )
{
return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, HIDDEN_LAYER_WIDTH>( m_flParams,
NUM_INPUTS * HIDDEN_LAYER_WIDTH +
nLayerIndex * HIDDEN_LAYER_WIDTH * HIDDEN_LAYER_WIDTH, nNumParms );
}
inline auto HiddenLayerBiases( auto &flParams, int nLayerIndex )
{
return CastVectorToMatrix<1,HIDDEN_LAYER_WIDTH>( m_flParams,
nNumWeights + HIDDEN_LAYER_WIDTH +
nLayerIndex * HIDDEN_LAYER_WIDTH, nNumParms );
}
inline auto OutputLayerWeights( auto &flParams )
{
return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, NUM_OUTPUTS>( m_flParams,
NUM_INPUTS * HIDDEN_LAYER_WIDTH +
NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH * HIDDEN_LAYER_WIDTH, nNumParms );
}
inline auto OutputLayerBiases( auto &flParams )
{
return CastVectorToMatrix<NUM_OUTPUTS, 1>( m_flParams,
nNumWeights + HIDDEN_LAYER_WIDTH +
NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH, nNumParms );
}
auto gradient_Loss()
{
// attempt o train a NN to normalize the vector [1,2,3].
Eigen::Matrix<float, nNumParms, 1> vRet;
vRet.setZero();
Eigen::Matrix<varf, 1, HIDDEN_LAYER_WIDTH> vPrevLayerOutputs[NUM_HIDDEN_LAYERS+1];
Eigen::Matrix<varf, 3, 1> vTrialInput;
vTrialInput << 1,2,3; //.setRandom();
Eigen::Matrix<varf, nNumParms, 1 > parmVars = m_flParams;
vPrevLayerOutputs[0] = InputLayerWeights( parmVars ) * vTrialInput + InputLayerBiases( parmVars );
for( int i =0; i < NUM_HIDDEN_LAYERS; i++ )
{
vPrevLayerOutputs[i+1] = vPrevLayerOutputs[i] * HiddenLayerWeights( parmVars, i ) + HiddenLayerBiases( parmVars, i );
}
// now, calculate the output
Eigen::Matrix<varf, 1, NUM_OUTPUTS> vResult = vPrevLayerOutputs[NUM_HIDDEN_LAYERS] * OutputLayerWeights( parmVars );
vResult += OutputLayerBiases( parmVars );
// and update the loss
varf flDiffMag = vResult.squaredNorm() - 1.f;
flDiffMag *= flDiffMag;
vRet += gradient( flDiffMag, parmVars );
std::cout << "gad!" << vRet << std::endl;
return vRet.eval();
}
bool Iterate( float flLearningRate = 0.1f ) // returns false if done
{
Eigen::Matrix<varf, nNumParms, 1 > parmVars = m_flParams;
auto vGradient = gradient_Loss();
m_flParams -= flLearningRate * vGradient;
return false;
}
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
This was my quick "get my feet wet" try at using autodiff. It tries to train a simple fully connetd neural network, implemented as a template class. Everything copiles, but it hangs when computing the gradient, in what looks like an infinite recursion.
I let it run for an hour and it never completed.
There could be math buigs in the code, I was never able to run it past the gradient calculation (I realize that this "neural network" is missing an activation function), but that wouldn't have anything to do with the crash.
...
You can see that this code mostly consists of indexing operations to cast elements of the parameter vector to matrices, and that I rangecheck those conversions.
Note: I was able to get a much more trivial reverse mode diff problem to work, one without temp vars and only 3 params. But it fell apart when I tried something more substantive. I suspect some issue with expression templates?
Beta Was this translation helpful? Give feedback.
All reactions