[x265] [PATCH] p2s-sve.S: Fix filterPixelToShort_48x64 for longer SVE vectors
George Steed
george.steed at arm.com
Wed Apr 16 12:51:53 UTC 2025
Ping :)
On Tue, Mar 11, 2025 at 04:11:17PM +0000, George Steed wrote:
> The existing filterPixelToShort_48x64 high bit-depth code for vectors of
> at least 256-bits uses predication to ensure that only the low 256 bits
> of the vector are operated on, however the address arithmetic is
> performed relative to the vector length so for vectors of 512-bits or
> longer this is incorrect. Since we are operating on the fixed low 256
> bits of the vectors here, fix the code by hard-coding the address offset
> to multiples of 32 bytes.
> ---
> source/common/aarch64/p2s-sve.S | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/source/common/aarch64/p2s-sve.S b/source/common/aarch64/p2s-sve.S
> index 85bb14b3d..11e63ddab 100644
> --- a/source/common/aarch64/p2s-sve.S
> +++ b/source/common/aarch64/p2s-sve.S
> @@ -401,10 +401,12 @@ function PFX(filterPixelToShort_48x64_sve)
> ret
> .vl_gt_16_filterPixelToShort_high_48x64:
> ptrue p0.h, vl16
> + mov x4, #16
> + mov x5, #32
> .rept 64
> ld1h {z0.h}, p0/z, [x0]
> - ld1h {z1.h}, p0/z, [x0, #1, mul vl]
> - ld1h {z2.h}, p0/z, [x0, #2, mul vl]
> + ld1h {z1.h}, p0/z, [x0, x4, lsl #1]
> + ld1h {z2.h}, p0/z, [x0, x5, lsl #1]
> add x0, x0, x1
> lsl z0.h, p0/m, z0.h, #P2S_SHIFT
> lsl z1.h, p0/m, z1.h, #P2S_SHIFT
> @@ -413,8 +415,8 @@ function PFX(filterPixelToShort_48x64_sve)
> add z1.h, p0/m, z1.h, z31.h
> add z2.h, p0/m, z2.h, z31.h
> st1h {z0.h}, p0, [x2]
> - st1h {z1.h}, p0, [x2, #1, mul vl]
> - st1h {z2.h}, p0, [x2, #2, mul vl]
> + st1h {z1.h}, p0, [x2, x4, lsl #1]
> + st1h {z2.h}, p0, [x2, x5, lsl #1]
> add x2, x2, x3
> .endr
> ret
> --
> 2.34.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-p2s-sve.S-Fix-filterPixelToShort_48x64-for-longer-SV.patch
Type: text/x-diff
Size: 2021 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250416/9b4e95bc/attachment.patch>
More information about the x265-devel
mailing list