[x265] [PATCH 2/6] pixel-util-sve2.S: Fix accumulators in pixel_var_*_sve2
George Steed
george.steed at arm.com
Mon Jan 6 17:16:43 UTC 2025
The z1 accumulator register in pixel_var_16x16_sve2 was previously left
uninitialized, leading to incorrect results when running with longer SVE
vectors. Initialize it to zero.
In pixel_var_64x64_sve2 the z2 register is used as an accumulator when
running with longer SVE vector lengths however the existing code
mistakenly initializes z1 instead. Adjust the initialization code to
correctly zero the z2 register.
Co-authored-by: Hari Limaye <hari.limaye at arm.com>
---
source/common/aarch64/pixel-util-sve2.S | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/source/common/aarch64/pixel-util-sve2.S b/source/common/aarch64/pixel-util-sve2.S
index 2af5d63c1..00aa2f984 100644
--- a/source/common/aarch64/pixel-util-sve2.S
+++ b/source/common/aarch64/pixel-util-sve2.S
@@ -74,6 +74,7 @@ function PFX(pixel_var_16x16_sve2)
.vl_gt_16_pixel_var_16x16:
ptrue p0.h, vl16
mov z0.d, #0
+ mov z1.d, #0
.rept 16
ld1b {z4.h}, p0/z, [x0]
add x0, x0, x1
@@ -194,7 +195,7 @@ function PFX(pixel_var_64x64_sve2)
bgt .vl_gt_112_pixel_var_64x64
ptrue p0.b, vl64
mov z0.d, #0
- mov z1.d, #0
+ mov z2.d, #0
.rept 64
ld1b {z4.b}, p0/z, [x0]
add x0, x0, x1
--
2.34.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-pixel-util-sve2.S-Fix-accumulators-in-pixel_var_-_sv.patch
Type: text/x-diff
Size: 1797 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250106/b82ce9ac/attachment.patch>
More information about the x265-devel
mailing list