From 2eea6a579bb183a7a28a3d3d9aefc49d5c0820bc Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 23 Aug 2021 13:23:40 +0200 Subject: [PATCH] MSL: Consider that function/private variables can be block-like. Handles a special case with array copies. The implementation of this fix is not perfect, but should be good enough for time being. --- ...py-memory-block-like-thread-local.asm.frag | 170 ++++++++++++++++++ ...py-memory-block-like-thread-local.asm.frag | 50 ++++++ spirv_msl.cpp | 14 ++ 3 files changed, 234 insertions(+) create mode 100644 reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag create mode 100644 shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag diff --git a/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag b/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag new file mode 100644 index 000000000..5af18ebf3 --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag @@ -0,0 +1,170 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct _3 +{ + float _m0[4]; +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +fragment void main0() +{ + spvUnsafeArray _20; + _20[0u] = 0.0; + _20[1u] = 0.0; + _20[2u] = 0.0; + _20[3u] = 0.0; + _3 _19; + spvArrayCopyFromStackToStack1(_19._m0, _20.elements); +} + diff --git a/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag b/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag new file mode 100644 index 000000000..ebab7fd0c --- /dev/null +++ b/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google rspirv; 0 +; Bound: 43 +; Schema: 0 + OpCapability ImageQuery + OpCapability Int8 + OpCapability RuntimeDescriptorArray + OpCapability StorageImageWriteWithoutFormat + OpCapability Shader + OpCapability VulkanMemoryModel + OpExtension "SPV_EXT_descriptor_indexing" + OpExtension "SPV_KHR_vulkan_memory_model" + OpMemoryModel Logical Vulkan + OpEntryPoint Fragment %1 "main" + OpExecutionMode %1 OriginUpperLeft + OpDecorate %2 ArrayStride 4 + OpMemberDecorate %3 0 Offset 0 + %4 = OpTypeInt 32 0 + %5 = OpTypeFloat 32 + %6 = OpTypePointer Function %5 + %7 = OpTypeVoid + %8 = OpTypeFunction %7 + %9 = OpConstant %4 0 + %10 = OpConstant %4 1 + %11 = OpConstant %4 2 + %12 = OpConstant %4 4 + %13 = OpConstant %4 3 + %14 = OpConstant %5 0 + %2 = OpTypeArray %5 %12 + %15 = OpTypePointer Function %2 + %16 = OpTypeFunction %7 %15 + %3 = OpTypeStruct %2 + %17 = OpTypePointer Function %3 + %1 = OpFunction %7 None %8 + %31 = OpLabel + %33 = OpVariable %17 Function + %34 = OpVariable %15 Function + %39 = OpAccessChain %6 %34 %9 + OpStore %39 %14 + %40 = OpAccessChain %6 %34 %10 + OpStore %40 %14 + %41 = OpAccessChain %6 %34 %11 + OpStore %41 %14 + %42 = OpAccessChain %6 %34 %13 + OpStore %42 %14 + %37 = OpAccessChain %15 %33 %9 + OpCopyMemory %37 %34 + OpReturn + OpFunctionEnd diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 1e4aa4bdd..56794c32e 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -8511,13 +8511,27 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t r // Special considerations for stage IO variables. // If the variable is actually backed by non-user visible device storage, we use array templates for those. + // + // Another special consideration is given to thread local variables which happen to have Offset decorations + // applied to them. Block-like types do not use array templates, so we need to force POD path if we detect + // these scenarios. This check isn't perfect since it would be technically possible to mix and match these things, + // and for a fully correct solution we might have to track array template state through access chains as well, + // but for all reasonable use cases, this should suffice. + // This special case should also only apply to Function/Private storage classes. + // We should not check backing variable for temporaries. auto *lhs_var = maybe_get_backing_variable(lhs_id); if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage)) lhs_is_array_template = true; + else if (lhs_var && (lhs_storage == StorageClassFunction || lhs_storage == StorageClassPrivate) && + type_is_block_like(get(lhs_var->basetype))) + lhs_is_array_template = false; auto *rhs_var = maybe_get_backing_variable(rhs_id); if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage)) rhs_is_array_template = true; + else if (rhs_var && (rhs_storage == StorageClassFunction || rhs_storage == StorageClassPrivate) && + type_is_block_like(get(rhs_var->basetype))) + rhs_is_array_template = false; // If threadgroup storage qualifiers are *not* used: // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.