Skip to content

Commit

Permalink
Merge pull request #1551 from tkp1n/bugfix/fix-vectorized-subsampling
Browse files Browse the repository at this point in the history
Fix vectorized 4:2:0 subsampling
  • Loading branch information
JimBobSquarePants authored Feb 15, 2021
2 parents 2d88f2c + f7d4675 commit 523cc22
Showing 1 changed file with 22 additions and 22 deletions.
44 changes: 22 additions & 22 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
Original file line number Diff line number Diff line change
Expand Up @@ -494,32 +494,32 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl
var f2 = Vector256.Create(2f);
var f025 = Vector256.Create(0.25f);
Vector256<int> switchInnerDoubleWords = Unsafe.As<byte, Vector256<int>>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32));

ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 1));
ref Vector256<float> destRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref destination);

for (int i = 0; i < 8; i++)
for (int i = 0; i < 2; i++)
{
Vector256<float> a = in1;
Vector256<float> b = Unsafe.Add(ref in1, 1);
Vector256<float> c = in2;
Vector256<float> d = Unsafe.Add(ref in2, 1);

Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);

Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
Vector256<float> add = Avx.Add(sum, f2);
Vector256<float> res = Avx.Multiply(add, f025);
ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i));
ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1));

destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
destRef = ref Unsafe.Add(ref destRef, 1);

in1 = ref Unsafe.Add(ref in1, 2);
in2 = ref Unsafe.Add(ref in2, 2);
for (int j = 0; j < 8; j += 2)
{
Vector256<float> a = Unsafe.Add(ref in1, j);
Vector256<float> b = Unsafe.Add(ref in1, j + 1);
Vector256<float> c = Unsafe.Add(ref in2, j);
Vector256<float> d = Unsafe.Add(ref in2, j + 1);

Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);

Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
Vector256<float> add = Avx.Add(sum, f2);
Vector256<float> res = Avx.Multiply(add, f025);

destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
#endif
}
Expand Down

0 comments on commit 523cc22

Please sign in to comment.