[x265] [PATCH 1/4] AArch64: Remove SVE assembly implementation of quant

Mon Aug 12 21:15:09 UTC 2024

Remove the SVE assembly implementation of the quant primitive, as this
gives no performance improvement over the Neon implementation.
---
 source/common/aarch64/asm-primitives.cpp |  3 --
 source/common/aarch64/fun-decls.h        |  2 -
 source/common/aarch64/pixel-util-sve.S   | 57 ------------------------
 3 files changed, 62 deletions(-)

diff --git a/source/common/aarch64/asm-primitives.cpp b/source/common/aarch64/asm-primitives.cpp
index 32d75ee35..33c2e5864 100644
--- a/source/common/aarch64/asm-primitives.cpp
+++ b/source/common/aarch64/asm-primitives.cpp
@@ -1030,9 +1030,6 @@ void setupSvePrimitives(EncoderPrimitives &p)
     p.cu[BLOCK_4x4].sa8d   = PFX(pixel_satd_4x4_sve);
     p.chroma[X265_CSP_I420].cu[BLOCK_8x8].sa8d = PFX(pixel_satd_4x4_sve);
 #endif
-
-    // quant
-    p.quant = PFX(quant_sve);
 }
 #endif
 
diff --git a/source/common/aarch64/fun-decls.h b/source/common/aarch64/fun-decls.h
index 363725fef..09ec1755a 100644
--- a/source/common/aarch64/fun-decls.h
+++ b/source/common/aarch64/fun-decls.h
@@ -266,8 +266,6 @@ int PFX(pixel_satd_32x16_sve(const pixel* pix1, intptr_t stride_pix1, const pixe
 int PFX(pixel_satd_32x32_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
 int PFX(pixel_satd_64x48_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
 
-uint32_t PFX(quant_sve)(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
-
 void PFX(dequant_scaling_sve2(const int16_t* quantCoef, const int32_t* deQuantCoef, int16_t* coef, int num, int per, int shift));
 void PFX(dequant_normal_sve2(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift));
 
diff --git a/source/common/aarch64/pixel-util-sve.S b/source/common/aarch64/pixel-util-sve.S
index c1d6b4129..3d073d42e 100644
--- a/source/common/aarch64/pixel-util-sve.S
+++ b/source/common/aarch64/pixel-util-sve.S
@@ -314,60 +314,3 @@ function PFX(pixel_satd_64x48_sve)
     mov             x0, x7
     ret             x10
 endfunc
-
-/********* ssim ***********/
-// uint32_t quant_c(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff)
-// No need to fully use sve instructions for this function
-function PFX(quant_sve)
-    mov             w9, #1
-    lsl             w9, w9, w4
-    mov             z0.s, w9
-    neg             w9, w4
-    mov             z1.s, w9
-    add             w9, w9, #8
-    mov             z2.s, w9
-    mov             z3.s, w5
-
-    lsr             w6, w6, #2
-    eor             z4.d, z4.d, z4.d
-    eor             w10, w10, w10
-    eor             z17.d, z17.d, z17.d
-
-.Loop_quant_sve:
-    ld1             {v18.4h}, [x0], #8
-    ld1             {v7.4s}, [x1], #16
-    sxtl            v6.4s, v18.4h
-
-    cmlt            v5.4s, v6.4s, #0
-
-    abs             v6.4s, v6.4s
-
-
-    mul             v6.4s, v6.4s, v7.4s
-
-    add             v7.4s, v6.4s, v3.4s
-    sshl            v7.4s, v7.4s, v1.4s
-
-    mls             v6.4s, v7.4s, v0.s[0]
-    sshl            v16.4s, v6.4s, v2.4s
-    st1             {v16.4s}, [x2], #16
-
-    // numsig
-    cmeq            v16.4s, v7.4s, v17.4s
-    add             v4.4s, v4.4s, v16.4s
-    add             w10, w10, #4
-
-    // level *= sign
-    eor             z16.d, z7.d, z5.d
-    sub             v16.4s, v16.4s, v5.4s
-    sqxtn           v5.4h, v16.4s
-    st1             {v5.4h}, [x3], #8
-
-    subs            w6, w6, #1
-    b.ne             .Loop_quant_sve
-
-    addv            s4, v4.4s
-    mov             w9, v4.s[0]
-    add             w0, w10, w9
-    ret
-endfunc
-- 
2.42.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-AArch64-Remove-SVE-assembly-implementation-of-quant.patch
Type: text/x-patch
Size: 4130 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240812/b7dedc24/attachment.bin>