What am I doing wrong? Hanging when computing gradient of a loss function w/r 1173 parameters #320

aachrisg · 2024-02-09T18:15:22Z

aachrisg
Feb 9, 2024

This was my quick "get my feet wet" try at using autodiff. It tries to train a simple fully connetd neural network, implemented as a template class. Everything copiles, but it hangs when computing the gradient, in what looks like an infinite recursion.
I let it run for an hour and it never completed.
There could be math buigs in the code, I was never able to run it past the gradient calculation (I realize that this "neural network" is missing an activation function), but that wouldn't have anything to do with the crash.

autodiff_test.exe!autodiff::detail::MulExpr<float>::propagate(const float & wprime) Line 384	C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237	C++
autodiff_test.exe!autodiff::detail::AddExpr<float>::propagate(const float & wprime) Line 329	C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237	C++
autodiff_test.exe!autodiff::detail::AddExpr<float>::propagate(const float & wprime) Line 330	C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237	C++
autodiff_test.exe!autodiff::detail::AddExpr<float>::propagate(const float & wprime) Line 329	C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237	C++
autodiff_test.exe!autodiff::detail::DependentVariableExpr<float>::propagate(const float & wprime) Line 237	C++
autodiff_test.exe!autodiff::detail::MulExpr<float>::propagate(const float & wprime) Line 386	C++

...

You can see that this code mostly consists of indexing operations to cast elements of the parameter vector to matrices, and that I rangecheck those conversions.

Note: I was able to get a much more trivial reverse mode diff problem to work, one without temp vars and only 3 params. But it fell apart when I tried something more substantive. I suspect some issue with expression templates?

using namespace autodiff;

using varf = Variable<float>;

// Declare float versions of eigen types
AUTODIFF_DEFINE_EIGEN_TYPEDEFS_ALL_SIZES( autodiff::Variable<float>, varf );


template<int nNumRows, int nNumCols>
INLINE auto CastVectorToMatrix( auto &matinput, int nStartingOffset, int nLimit )
{
	Assert( nStartingOffset + nNumCols * nNumRows <= nLimit );
	using T = std::remove_reference<decltype( matinput( 0, 0 ) )>::type;

	return Eigen::Map<Eigen::Matrix<T, nNumRows, nNumCols>>( matinput.data() + nStartingOffset );
}

template<int NUM_INPUTS, int NUM_OUTPUTS, int NUM_HIDDEN_LAYERS, int HIDDEN_LAYER_WIDTH>
struct CNetworkParms
{
	static constexpr int nNumWeights = ( NUM_INPUTS * HIDDEN_LAYER_WIDTH ) + ( NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH * HIDDEN_LAYER_WIDTH )
		+ ( HIDDEN_LAYER_WIDTH * NUM_OUTPUTS );

	static constexpr int nNumBiases = ( HIDDEN_LAYER_WIDTH + NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH + NUM_OUTPUTS );

	static constexpr int nNumParms = nNumWeights + nNumBiases;

	Eigen::Matrix<float, nNumParms, 1> m_flParams;
	
	// accessors to get at the submatrices
	inline auto InputLayerWeights( auto &flParams)
	{
		return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, NUM_INPUTS>( m_flParams, 0, nNumParms );
	}
	inline auto InputLayerBiases( auto &flParams)
	{
		return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, 1>( m_flParams, nNumWeights, nNumParms ); // biases stored after weights
	}
	inline auto HiddenLayerWeights( auto &flParams, int nLayerIndex )
	{
		return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, HIDDEN_LAYER_WIDTH>( m_flParams,
																		   NUM_INPUTS * HIDDEN_LAYER_WIDTH +
																		   nLayerIndex * HIDDEN_LAYER_WIDTH * HIDDEN_LAYER_WIDTH, nNumParms );
	}
	inline auto HiddenLayerBiases( auto &flParams, int nLayerIndex )
	{
		return CastVectorToMatrix<1,HIDDEN_LAYER_WIDTH>( m_flParams,
														 nNumWeights + HIDDEN_LAYER_WIDTH +
														 nLayerIndex * HIDDEN_LAYER_WIDTH, nNumParms );
	}

	inline auto OutputLayerWeights( auto &flParams )
	{
		return CastVectorToMatrix<HIDDEN_LAYER_WIDTH, NUM_OUTPUTS>( m_flParams,
																	NUM_INPUTS * HIDDEN_LAYER_WIDTH +
																	NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH * HIDDEN_LAYER_WIDTH, nNumParms );
		
	}
	
	inline auto OutputLayerBiases( auto &flParams )
	{
		return CastVectorToMatrix<NUM_OUTPUTS, 1>( m_flParams,
												   nNumWeights + HIDDEN_LAYER_WIDTH +
												   NUM_HIDDEN_LAYERS * HIDDEN_LAYER_WIDTH, nNumParms );
	}

	


	auto gradient_Loss()
	{
		// attempt o train a NN to normalize the vector [1,2,3].
		Eigen::Matrix<float, nNumParms, 1> vRet;
		vRet.setZero();

		Eigen::Matrix<varf, 1, HIDDEN_LAYER_WIDTH> vPrevLayerOutputs[NUM_HIDDEN_LAYERS+1];
		Eigen::Matrix<varf, 3, 1> vTrialInput;
		vTrialInput << 1,2,3; //.setRandom();

		Eigen::Matrix<varf, nNumParms, 1 > parmVars = m_flParams;

		vPrevLayerOutputs[0] = InputLayerWeights( parmVars ) * vTrialInput + InputLayerBiases( parmVars );
		for( int i =0; i < NUM_HIDDEN_LAYERS; i++ )
		{
			vPrevLayerOutputs[i+1] = vPrevLayerOutputs[i] * HiddenLayerWeights( parmVars, i ) + HiddenLayerBiases( parmVars, i );
		}
		// now, calculate the output
		Eigen::Matrix<varf, 1, NUM_OUTPUTS> vResult = vPrevLayerOutputs[NUM_HIDDEN_LAYERS] * OutputLayerWeights( parmVars );
		vResult += OutputLayerBiases( parmVars );
		// and update the loss
		varf flDiffMag = vResult.squaredNorm() - 1.f;
		flDiffMag *= flDiffMag;
		vRet += gradient( flDiffMag, parmVars );
		std::cout << "gad!" << vRet << std::endl;
		return vRet.eval();
	}


	bool Iterate( float flLearningRate = 0.1f )											// returns false if done
	{
		Eigen::Matrix<varf, nNumParms, 1 > parmVars = m_flParams;
		auto vGradient = gradient_Loss();
		m_flParams -= flLearningRate * vGradient;
		return false;
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

What am I doing wrong? Hanging when computing gradient of a loss function w/r 1173 parameters #320

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 0 comments

Select a reply

What am I doing wrong? Hanging when computing gradient of a loss function w/r 1173 parameters #320

aachrisg Feb 9, 2024

Replies: 0 comments

aachrisg
Feb 9, 2024