diff --git a/.clang-format b/.clang-format index 36bcaf1..1177faf 100644 --- a/.clang-format +++ b/.clang-format @@ -1,4 +1,4 @@ ---- +--- AccessModifierOffset: '-4' AlignAfterOpenBracket: Align AlignConsecutiveAssignments: 'false' @@ -7,6 +7,7 @@ AlignEscapedNewlinesLeft: 'false' AlignOperands: 'true' AlignTrailingComments: 'false' AllowAllParametersOfDeclarationOnNextLine: 'false' +AllowAllArgumentsOnNextLine: 'true' AllowShortBlocksOnASingleLine: 'true' AllowShortCaseLabelsOnASingleLine: 'true' AllowShortFunctionsOnASingleLine: 'true' @@ -48,6 +49,7 @@ IncludeCategories: IndentCaseLabels: 'true' IndentWidth: '4' IndentWrappedFunctionNames: 'false' +LambdaBodyIndentation: Signature Language: Cpp MaxEmptyLinesToKeep: '3' NamespaceIndentation: Inner diff --git a/benchmark/Benchmark.hpp b/benchmark/Benchmark.hpp index 276f133..bf4e058 100644 --- a/benchmark/Benchmark.hpp +++ b/benchmark/Benchmark.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -19,14 +19,6 @@ #define MATHTER_TSC_USES_CHRONO #endif -#ifdef _MSC_VER -#define MATHTER_NOINLINE __declspec(noinline) -#define MATHTER_FORCEINLINE __forceinline -#else -#define MATHTER_NOINLINE __attribute__((noinline)) -#define MATHTER_FORCEINLINE __attribute__((always_inline)) -#endif - namespace impl { diff --git a/include/Mathter/CMakeLists.txt b/include/Mathter/CMakeLists.txt index 20b22be..0b75126 100644 --- a/include/Mathter/CMakeLists.txt +++ b/include/Mathter/CMakeLists.txt @@ -15,7 +15,7 @@ target_sources(Mathter # Common "Common/DeterministicInitializer.hpp" "Common/Functional.hpp" - "Common/LoopUtil.hpp" + "Common/OptimizationUtil.hpp" "Common/MathUtil.hpp" "Common/Range.hpp" "Common/Types.hpp" diff --git a/include/Mathter/Common/LoopUtil.hpp b/include/Mathter/Common/LoopUtil.hpp deleted file mode 100644 index d4005dd..0000000 --- a/include/Mathter/Common/LoopUtil.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include -#include - - -namespace mathter { - -namespace impl { - - template - auto LoopUnroll(Func&& func, std::index_sequence) { - return func(Indices...); - } - -} // namespace impl - - -template -auto LoopUnroll(Func&& func) { - return impl::LoopUnroll(std::forward(func), std::make_index_sequence{}); -} - -} // namespace mathter \ No newline at end of file diff --git a/include/Mathter/Common/OptimizationUtil.hpp b/include/Mathter/Common/OptimizationUtil.hpp new file mode 100644 index 0000000..26f7542 --- /dev/null +++ b/include/Mathter/Common/OptimizationUtil.hpp @@ -0,0 +1,69 @@ +#pragma once + +#include +#include + + +namespace mathter { + + +#ifdef _MSC_VER +#define MATHTER_NOINLINE __declspec(noinline) +#define MATHTER_FORCEINLINE __forceinline +#define MATHTER_FLATTEN [[msvc::flatten]] +#else +#define MATHTER_NOINLINE __attribute__((noinline)) +#define MATHTER_FORCEINLINE __attribute__((always_inline)) +#define MATHTER_FLATTEN __attribute__((flatten)) +#endif + + +namespace impl { + + template + MATHTER_FORCEINLINE auto LoopUnrollHelper(std::index_sequence, Func&& func, Args&&... args) { + return func(std::forward(args)..., Indices...); + } + +} // namespace impl + + +template +MATHTER_FORCEINLINE auto LoopUnroll(Func&& func, Args&&... args) { + return impl::LoopUnrollHelper(std::make_index_sequence{}, std::forward(func), std::forward(args)...); +} + + +template +void ForUnrolled(std::integral_constant, + std::integral_constant last, + std::integral_constant step, + std::integral_constant limit, + Fun&& fun, + Args&&... args) { + constexpr auto Count = Last - First / Step; + if constexpr (Count <= Limit) { + if constexpr (0 < Step ? First < Last : First > Last) { + fun(First, args...); + ForUnrolled(std::integral_constant{}, last, step, limit, std::move(fun), std::forward(args)...); + } + } + else { + for (auto i = First; i < Last; i += Step) { + fun(i, args...); + } + } +} + + +template +auto ForUnrolled(Fun&& fun, Args&&... args) -> std::enable_if_t>> { + ForUnrolled(std::integral_constant{}, + std::integral_constant{}, + std::integral_constant{}, + std::integral_constant{}, + std::forward(fun), + std::forward(args)...); +} + +} // namespace mathter \ No newline at end of file diff --git a/include/Mathter/Matrix/Arithmetic.hpp b/include/Mathter/Matrix/Arithmetic.hpp index b72b05c..db09e7c 100644 --- a/include/Mathter/Matrix/Arithmetic.hpp +++ b/include/Mathter/Matrix/Arithmetic.hpp @@ -5,9 +5,10 @@ #pragma once -#include "../Common/LoopUtil.hpp" +#include "../Common/OptimizationUtil.hpp" #include "../Vector/Arithmetic.hpp" #include "../Vector/Math.hpp" +#include "Cast.hpp" #include "Matrix.hpp" namespace mathter { @@ -20,21 +21,19 @@ namespace impl { auto Multiply(const Matrix& lhs, const Matrix& rhs) { using T = common_arithmetic_type_t; - using M = Matrix; - - const auto calcPartialRow = [&lhs, &rhs](size_t rowIdx, size_t runIdx) { - return lhs(rowIdx, runIdx) * rhs.Row(runIdx); - }; - - const auto calcRow = [&calcPartialRow](size_t rowIdx) { - return ::mathter::LoopUnroll([rowIdx, &calcPartialRow](auto... runIdx) { - return (... + calcPartialRow(rowIdx, runIdx)); - }); - }; + constexpr auto Packed = Packed1 && Packed2; + using Mat = Matrix; + using Vec = Vector; + + Mat m; + for (size_t rowIdx = 0; rowIdx < Rows1; ++rowIdx) { + m.Row(rowIdx, lhs(rowIdx, 0) * rhs.Row(0)); + for (size_t runIdx = 1; runIdx < Match; ++runIdx) { + m.Row(rowIdx, MultiplyAdd(Vec(lhs(rowIdx, runIdx)), rhs.Row(runIdx), m.Row(rowIdx))); + } + } - return ::mathter::LoopUnroll([&calcRow](auto... rowIdx) { - return M(stripeArg, calcRow(rowIdx)...); - }); + return m; } @@ -44,21 +43,15 @@ namespace impl { auto Multiply(const Matrix& lhs, const Matrix& rhs) { using T = common_arithmetic_type_t; - using M = Matrix; - - const auto calcElement = [&lhs, &rhs](size_t rowIdx, size_t colIdx) { - return Sum(lhs.Row(rowIdx) * rhs.Column(colIdx)); - }; + using Mat = Matrix; - const auto calcRow = [&calcElement](size_t rowIdx) { - return ::mathter::LoopUnroll([rowIdx, &calcElement](auto... runIdx) { - return Vector(calcElement(rowIdx, runIdx)...); - }); - }; - - return ::mathter::LoopUnroll([&calcRow](auto... rowIdx) { - return M(stripeArg, calcRow(rowIdx)...); - }); + Mat m; + for (size_t rowIdx = 0; rowIdx < Rows1; ++rowIdx) { + for (size_t colIdx = 0; colIdx < Columns2; ++colIdx) { + m(rowIdx, colIdx) = Sum(lhs.Row(rowIdx) * rhs.Column(colIdx)); + } + } + return m; } @@ -68,21 +61,18 @@ namespace impl { auto Multiply(const Matrix& lhs, const Matrix& rhs) { using T = common_arithmetic_type_t; - using M = Matrix; - - const auto calcPartialColumn = [&lhs, &rhs](size_t colIdx, size_t runIdx) { - return lhs.Column(runIdx) * rhs(runIdx, colIdx); - }; - - const auto calcColumn = [&calcPartialColumn](size_t colIdx) { - return ::mathter::LoopUnroll([colIdx, &calcPartialColumn](auto... runIdx) { - return (... + calcPartialColumn(colIdx, runIdx)); - }); - }; - - return ::mathter::LoopUnroll([&calcColumn](auto... colIdx) { - return M(stripeArg, calcColumn(colIdx)...); - }); + constexpr auto Packed = Packed1 && Packed2; + using Mat = Matrix; + using Vec = Vector; + + Mat m; + for (size_t colIdx = 0; colIdx < Columns2; ++colIdx) { + m.Column(colIdx, lhs.Column(0) * rhs(0, colIdx)); + for (size_t runIdx = 1; runIdx < Match; ++runIdx) { + m.Column(colIdx, MultiplyAdd(lhs.Column(runIdx), Vec(rhs(runIdx, colIdx)), m.Column(colIdx))); + } + } + return m; } @@ -94,20 +84,18 @@ namespace impl { const Matrix& rhs, Func&& func) { using T = std::invoke_result_t; - using M = Matrix; + using Mat = Matrix; - const auto calcStripe = [&lhs, &rhs, &func](size_t stripeIdx) { + Mat m; + for (size_t stripeIdx = 0; stripeIdx < Mat::stripeCount; ++stripeIdx) { if constexpr (Layout1 == eMatrixLayout::ROW_MAJOR) { - return func(lhs.Row(stripeIdx), rhs.Row(stripeIdx)); + m.Row(stripeIdx, func(lhs.Row(stripeIdx), rhs.Row(stripeIdx))); } else { - return func(lhs.Column(stripeIdx), rhs.Column(stripeIdx)); + m.Column(stripeIdx, func(lhs.Column(stripeIdx), rhs.Column(stripeIdx))); } - }; - - return ::mathter::LoopUnroll([&calcStripe](auto... stripeIdx) { - return M(stripeArg, calcStripe(stripeIdx)...); - }); + } + return m; } @@ -119,11 +107,13 @@ namespace impl { const T2& rhs, Func&& func) { using T = std::invoke_result_t; - using M = Matrix; + using Mat = Matrix; - return ::mathter::LoopUnroll([&lhs, &rhs, &func](auto... stripeIdx) { - return M(stripeArg, func(lhs.stripes[stripeIdx], rhs)...); - }); + Mat m; + for (size_t stripeIdx = 0; stripeIdx < Mat::stripeCount; ++stripeIdx) { + m.stripes[stripeIdx] = func(lhs.stripes[stripeIdx], rhs); + } + return m; } @@ -135,11 +125,13 @@ namespace impl { const Matrix& rhs, Func&& func) { using T = std::invoke_result_t; - using M = Matrix; + using Mat = Matrix; - return ::mathter::LoopUnroll([&lhs, &rhs, &func](auto... stripeIdx) { - return M(stripeArg, func(lhs, rhs.stripes[stripeIdx])...); - }); + Mat m; + for (size_t stripeIdx = 0; stripeIdx < Mat::stripeCount; ++stripeIdx) { + m.stripes[stripeIdx] = func(lhs, rhs.stripes[stripeIdx]); + } + return m; } template auto Multiply(const Vector& lhs, const Matrix& rhs) { - return ::mathter::LoopUnroll([lhs, rhs](auto... runIdx) { - return (... + (lhs(runIdx) * rhs.Row(runIdx))); - }); + auto v = lhs(0) * rhs.Row(0); + using Vec = std::decay_t; + for (size_t runIdx = 1; runIdx < Match; ++runIdx) { + v = MultiplyAdd(Vec(lhs(runIdx)), rhs.Row(runIdx), v); + } + return v; } template auto Multiply(const Vector& lhs, const Matrix& rhs) { - return ::mathter::LoopUnroll([lhs, rhs](auto... elementIdx) { - return Vector(Sum(lhs * rhs.Column(elementIdx))...); - }); + using Vec = Vector, Columns2, Packed1 && Packed2>; + Vec v; + for (size_t elementIdx = 0; elementIdx < Columns2; ++elementIdx) { + v[elementIdx] = Sum(lhs * rhs.Column(elementIdx)); + } + return v; } template auto Multiply(const Matrix& lhs, const Vector& rhs) { - return ::mathter::LoopUnroll([lhs, rhs](auto... runIdx) { - return (... + (lhs.Column(runIdx) * rhs(runIdx))); - }); + return Multiply(rhs, FlipLayoutAndOrder(lhs)); } template auto Multiply(const Matrix& lhs, const Vector& rhs) { - return ::mathter::LoopUnroll([lhs, rhs](auto... elementIdx) { - return Vector(Sum(lhs.Row(elementIdx) * rhs)...); - }); + return Multiply(rhs, FlipLayoutAndOrder(lhs)); } } // namespace impl @@ -402,15 +396,19 @@ auto& operator*=(Vector& lhs, //------------------------------------------------------------------------------ template -auto operator+(const Matrix& mat) { - return mat; +auto operator+(const Matrix& arg) { + return arg; } template -auto operator-(const Matrix& mat) { - return ::mathter::LoopUnroll::stripeCount>([&mat](auto... stripeIdx) { - return Matrix(stripeArg, -mat.stripes[stripeIdx]...); - }); +auto operator-(const Matrix& arg) { + using Mat = std::decay_t; + + Mat m; + for (size_t i = 0; i < Mat::stripeCount; ++i) { + m.stripes[i] = -arg.stripes[i]; + } + return m; } } // namespace mathter \ No newline at end of file diff --git a/include/Mathter/Matrix/Cast.hpp b/include/Mathter/Matrix/Cast.hpp index ea5d03c..39b2202 100644 --- a/include/Mathter/Matrix/Cast.hpp +++ b/include/Mathter/Matrix/Cast.hpp @@ -5,7 +5,7 @@ #pragma once -#include "../Common/LoopUtil.hpp" +#include "../Common/OptimizationUtil.hpp" #include "Matrix.hpp" @@ -45,7 +45,9 @@ auto FlipOrder(const Matrix& m, std::in return Mat(m); } else { - return LoopUnroll([&m](auto... indices) { return Mat(stripeArg, m.stripes[indices]...); }); + Mat out; + out.stripes = m.stripes; + return out; } } @@ -60,7 +62,9 @@ auto SetOrder(const Matrix& m, std::int return Mat(m); } else { - return LoopUnroll([&m](auto... indices) { return Mat(stripeArg, m.stripes[indices]...); }); + Mat out; + out.stripes = m.stripes; + return out; } } diff --git a/include/Mathter/Transforms/ZeroBuilder.hpp b/include/Mathter/Transforms/ZeroBuilder.hpp index 4316852..d0163cb 100644 --- a/include/Mathter/Transforms/ZeroBuilder.hpp +++ b/include/Mathter/Transforms/ZeroBuilder.hpp @@ -6,7 +6,7 @@ #pragma once -#include "../Common/LoopUtil.hpp" +#include "../Common/OptimizationUtil.hpp" #include "../Matrix/Matrix.hpp" #include "../Quaternion/Quaternion.hpp" #include "../Vector/Vector.hpp" diff --git a/include/Mathter/Vector/Vector.hpp b/include/Mathter/Vector/Vector.hpp index c792d73..1c73ec6 100644 --- a/include/Mathter/Vector/Vector.hpp +++ b/include/Mathter/Vector/Vector.hpp @@ -6,7 +6,7 @@ #pragma once #include "../Common/DeterministicInitializer.hpp" -#include "../Common/LoopUtil.hpp" +#include "../Common/OptimizationUtil.hpp" #include "../Common/TypeTraits.hpp" #include "SIMDUtil.hpp" #include "Swizzle.hpp"