From af4e4ff018112712ea3664ffb41355fab2ed1b87 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Sun, 14 Feb 2021 02:17:05 +0100 Subject: [PATCH 1/2] Fix vectorized subsampling --- .../Formats/Jpeg/Components/Block8x8F.cs | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 56afae68c7..84c59cd989 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -494,32 +494,35 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl var f2 = Vector256.Create(2f); var f025 = Vector256.Create(0.25f); Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); - - ref Vector256 in1 = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Vector256 in2 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 1)); ref Vector256 destRef = ref Unsafe.As>(ref destination); - for (int i = 0; i < 8; i++) + for (int i = 0; i < 2; i++) { - Vector256 a = in1; - Vector256 b = Unsafe.Add(ref in1, 1); - Vector256 c = in2; - Vector256 d = Unsafe.Add(ref in2, 1); + ref Vector256 in1 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i)); + ref Vector256 in2 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1)); + + for (int j = 0; j < 4; j++) + { + Vector256 a = in1; + Vector256 b = Unsafe.Add(ref in1, 1); + Vector256 c = in2; + Vector256 d = Unsafe.Add(ref in2, 1); - Vector256 calc1 = Avx.Shuffle(a, c, 0b10_00_10_00); - Vector256 calc2 = Avx.Shuffle(a, c, 0b11_01_11_01); - Vector256 calc3 = Avx.Shuffle(b, d, 0b10_00_10_00); - Vector256 calc4 = Avx.Shuffle(b, d, 0b11_01_11_01); + Vector256 calc1 = Avx.Shuffle(a, c, 0b10_00_10_00); + Vector256 calc2 = Avx.Shuffle(a, c, 0b11_01_11_01); + Vector256 calc3 = Avx.Shuffle(b, d, 0b10_00_10_00); + Vector256 calc4 = Avx.Shuffle(b, d, 0b11_01_11_01); - Vector256 sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4)); - Vector256 add = Avx.Add(sum, f2); - Vector256 res = Avx.Multiply(add, f025); + Vector256 sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4)); + Vector256 add = Avx.Add(sum, f2); + Vector256 res = Avx.Multiply(add, f025); - destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords); - destRef = ref Unsafe.Add(ref destRef, 1); + destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords); + destRef = ref Unsafe.Add(ref destRef, 1); - in1 = ref Unsafe.Add(ref in1, 2); - in2 = ref Unsafe.Add(ref in2, 2); + in1 = ref Unsafe.Add(ref in1, 2); + in2 = ref Unsafe.Add(ref in2, 2); + } } #endif } From f7d467548241b80fddcda4c0017823ebda40fb96 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Sun, 14 Feb 2021 02:50:34 +0100 Subject: [PATCH 2/2] Cleanup --- src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 84c59cd989..2d19f5ce26 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -501,12 +501,12 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl ref Vector256 in1 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i)); ref Vector256 in2 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1)); - for (int j = 0; j < 4; j++) + for (int j = 0; j < 8; j += 2) { - Vector256 a = in1; - Vector256 b = Unsafe.Add(ref in1, 1); - Vector256 c = in2; - Vector256 d = Unsafe.Add(ref in2, 1); + Vector256 a = Unsafe.Add(ref in1, j); + Vector256 b = Unsafe.Add(ref in1, j + 1); + Vector256 c = Unsafe.Add(ref in2, j); + Vector256 d = Unsafe.Add(ref in2, j + 1); Vector256 calc1 = Avx.Shuffle(a, c, 0b10_00_10_00); Vector256 calc2 = Avx.Shuffle(a, c, 0b11_01_11_01); @@ -519,9 +519,6 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords); destRef = ref Unsafe.Add(ref destRef, 1); - - in1 = ref Unsafe.Add(ref in1, 2); - in2 = ref Unsafe.Add(ref in2, 2); } } #endif