[x265] [PATCH 6/6] pixel-util-sve2.S: Fix normFact/ssimDist64 for longer SVE vectors

Mon Jan 6 17:18:01 UTC 2025

The existing code for vectors of at least 1024-bits uses predication to
ensure that only the low 1024 bits of the vector are operated on,
however the address arithmetic is performed relative to the vector
length so for 2048-bit vectors this is incorrect. Since we are operating
on the fixed low 1024 bits of the vectors here, hard-code the address
offset to 32 bytes to fix.
---
 source/common/aarch64/pixel-util-sve2.S | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/source/common/aarch64/pixel-util-sve2.S b/source/common/aarch64/pixel-util-sve2.S
index 5b872f437..c7ff0b35e 100644
--- a/source/common/aarch64/pixel-util-sve2.S
+++ b/source/common/aarch64/pixel-util-sve2.S
@@ -1339,12 +1339,13 @@ function PFX(ssimDist64_sve2)
 .vl_gt_112_ssimDist64:
     ssimDist_start_sve2
     ptrue           p0.s, vl32
+    mov             x5, #32
 .vl_gt_112_loop_ssimDist64_sve2:
     sub             w12, w12, #1
     ld1b            {z2.s}, p0/z, [x0]
-    ld1b            {z3.s}, p0/z, [x0, #1, mul vl]
+    ld1b            {z3.s}, p0/z, [x0, x5]
     ld1b            {z23.s}, p0/z, [x2]
-    ld1b            {z24.s}, p0/z, [x2, #1, mul vl]
+    ld1b            {z24.s}, p0/z, [x2, x5]
     ssimDist_1_sve2 z2, z3, z23, z24
     add             x0, x0, x1
     add             x2, x2, x3
@@ -1596,10 +1597,11 @@ function PFX(normFact64_sve2)
 .vl_gt_112_normFact64:
     normFact_start_sve2
     ptrue           p0.s, vl32
+    mov             x4, #32
 .vl_gt_112_loop_normFact64_sve2:
     sub             w12, w12, #1
     ld1b            {z4.s}, p0/z, [x0]
-    ld1b            {z5.s}, p0/z, [x0, #1, mul vl]
+    ld1b            {z5.s}, p0/z, [x0, x4]
     normFact_1_sve2 z4, z5
     add             x0, x0, x1
     cbnz            w12, .vl_gt_112_loop_normFact64_sve2
-- 
2.34.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0006-pixel-util-sve2.S-Fix-normFact-ssimDist64-for-longer.patch
Type: text/x-diff
Size: 2256 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250106/dc6dcae6/attachment.patch>