[x265] [PATCH 12/12] AArch64: Use PFX macro for all assembly functions

Pop, Sebastian spop at amazon.com
Fri May 3 22:22:07 UTC 2024


The 12 patches in this series look good.
Thank you for the patches.

Sebastian

On 5/2/24, 4:24 PM, "x265-devel on behalf of Hari Limaye" <x265-devel-bounces at videolan.org <mailto:x265-devel-bounces at videolan.org> on behalf of hari.limaye at arm.com <mailto:hari.limaye at arm.com>> wrote:


CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.






Use the PFX macro to wrap all AArch64 function declarations and
definitions, instead of hardcoding the prefix as `x265_`, to fix
compilation when X265_NS is changed.
---
source/common/aarch64/fun-decls.h | 140 +++++++++++++-------------
source/common/aarch64/ipfilter-sve2.S | 16 +--
source/common/aarch64/ipfilter.S | 30 +++---
3 files changed, 93 insertions(+), 93 deletions(-)


diff --git a/source/common/aarch64/fun-decls.h b/source/common/aarch64/fun-decls.h
index 1a1f3b489..ec17deda2 100644
--- a/source/common/aarch64/fun-decls.h
+++ b/source/common/aarch64/fun-decls.h
@@ -155,69 +155,69 @@ DECLS(sve);
DECLS(sve2);




-void x265_pixel_planecopy_cp_neon(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
+void PFX(pixel_planecopy_cp_neon(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift));


-uint64_t x265_pixel_var_8x8_neon(const pixel* pix, intptr_t stride);
-uint64_t x265_pixel_var_16x16_neon(const pixel* pix, intptr_t stride);
-uint64_t x265_pixel_var_32x32_neon(const pixel* pix, intptr_t stride);
-uint64_t x265_pixel_var_64x64_neon(const pixel* pix, intptr_t stride);
+uint64_t PFX(pixel_var_8x8_neon(const pixel* pix, intptr_t stride));
+uint64_t PFX(pixel_var_16x16_neon(const pixel* pix, intptr_t stride));
+uint64_t PFX(pixel_var_32x32_neon(const pixel* pix, intptr_t stride));
+uint64_t PFX(pixel_var_64x64_neon(const pixel* pix, intptr_t stride));


-void x265_getResidual4_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
-void x265_getResidual8_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
-void x265_getResidual16_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
-void x265_getResidual32_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
+void PFX(getResidual4_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride));
+void PFX(getResidual8_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride));
+void PFX(getResidual16_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride));
+void PFX(getResidual32_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride));


-void x265_scale1D_128to64_neon(pixel *dst, const pixel *src);
-void x265_scale2D_64to32_neon(pixel* dst, const pixel* src, intptr_t stride);
+void PFX(scale1D_128to64_neon(pixel *dst, const pixel *src));
+void PFX(scale2D_64to32_neon(pixel* dst, const pixel* src, intptr_t stride));


-int x265_pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x12_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_16x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_16x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_16x12_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_16x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_16x24_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_16x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_16x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_24x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_24x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x24_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x48_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_48x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_64x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_64x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_64x48_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_64x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
+int PFX(pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x12_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_16x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_16x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_16x12_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_16x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_16x24_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_16x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_16x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_24x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_24x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x24_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x48_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_48x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_64x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_64x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_64x48_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_64x64_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));


-int x265_pixel_sa8d_8x8_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
-int x265_pixel_sa8d_8x16_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
-int x265_pixel_sa8d_16x16_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
-int x265_pixel_sa8d_16x32_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
-int x265_pixel_sa8d_32x32_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
-int x265_pixel_sa8d_32x64_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
-int x265_pixel_sa8d_64x64_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
+int PFX(pixel_sa8d_8x8_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2));
+int PFX(pixel_sa8d_8x16_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2));
+int PFX(pixel_sa8d_16x16_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2));
+int PFX(pixel_sa8d_16x32_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2));
+int PFX(pixel_sa8d_32x32_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2));
+int PFX(pixel_sa8d_32x64_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2));
+int PFX(pixel_sa8d_64x64_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2));


uint32_t PFX(quant_neon)(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
uint32_t PFX(nquant_neon)(const int16_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);


-void x265_dequant_scaling_neon(const int16_t* quantCoef, const int32_t* deQuantCoef, int16_t* coef, int num, int per, int shift);
-void x265_dequant_normal_neon(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
+void PFX(dequant_scaling_neon(const int16_t* quantCoef, const int32_t* deQuantCoef, int16_t* coef, int num, int per, int shift));
+void PFX(dequant_normal_neon(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift));


-void x265_ssim_4x4x2_core_neon(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]);
+void PFX(ssim_4x4x2_core_neon(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]));


int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
int PFX(psyCost_8x8_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
@@ -226,30 +226,30 @@ void PFX(weight_sp_neon)(const int16_t* src, pixel* dst, intptr_t srcStride, int
int PFX(scanPosLast_neon)(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize);
uint32_t PFX(costCoeffNxN_neon)(const uint16_t *scan, const coeff_t *coeff, intptr_t trSize, uint16_t *absCoeff, const uint8_t *tabSigCtx, uint32_t scanFlagMask, uint8_t *baseCtx, int offset, int scanPosSigOff, int subPosBase);


-uint64_t x265_pixel_var_8x8_sve2(const pixel* pix, intptr_t stride);
-uint64_t x265_pixel_var_16x16_sve2(const pixel* pix, intptr_t stride);
-uint64_t x265_pixel_var_32x32_sve2(const pixel* pix, intptr_t stride);
-uint64_t x265_pixel_var_64x64_sve2(const pixel* pix, intptr_t stride);
+uint64_t PFX(pixel_var_8x8_sve2(const pixel* pix, intptr_t stride));
+uint64_t PFX(pixel_var_16x16_sve2(const pixel* pix, intptr_t stride));
+uint64_t PFX(pixel_var_32x32_sve2(const pixel* pix, intptr_t stride));
+uint64_t PFX(pixel_var_64x64_sve2(const pixel* pix, intptr_t stride));


-void x265_getResidual16_sve2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
-void x265_getResidual32_sve2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
+void PFX(getResidual16_sve2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride));
+void PFX(getResidual32_sve2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride));


-void x265_scale1D_128to64_sve2(pixel *dst, const pixel *src);
-void x265_scale2D_64to32_sve2(pixel* dst, const pixel* src, intptr_t stride);
+void PFX(scale1D_128to64_sve2(pixel *dst, const pixel *src));
+void PFX(scale2D_64to32_sve2(pixel* dst, const pixel* src, intptr_t stride));


-int x265_pixel_satd_4x4_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x4_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_8x12_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x16_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_32x32_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
-int x265_pixel_satd_64x48_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
+int PFX(pixel_satd_4x4_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x4_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_8x12_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x16_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_32x32_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));
+int PFX(pixel_satd_64x48_sve(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2));


uint32_t PFX(quant_sve)(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);


-void x265_dequant_scaling_sve2(const int16_t* quantCoef, const int32_t* deQuantCoef, int16_t* coef, int num, int per, int shift);
-void x265_dequant_normal_sve2(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
+void PFX(dequant_scaling_sve2(const int16_t* quantCoef, const int32_t* deQuantCoef, int16_t* coef, int num, int per, int shift));
+void PFX(dequant_normal_sve2(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift));


-void x265_ssim_4x4x2_core_sve2(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]);
+void PFX(ssim_4x4x2_core_sve2(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]));


int PFX(psyCost_8x8_sve2)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
void PFX(weight_sp_sve2)(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff --git a/source/common/aarch64/ipfilter-sve2.S b/source/common/aarch64/ipfilter-sve2.S
index 525ed1172..ab0ad2fae 100644
--- a/source/common/aarch64/ipfilter-sve2.S
+++ b/source/common/aarch64/ipfilter-sve2.S
@@ -456,7 +456,7 @@


// void interp_vert_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VPP_SVE2 w, h
-function x265_interp_8tap_vert_pp_\w\()x\h\()_sve2
+function PFX(interp_8tap_vert_pp_\w\()x\h\()_sve2)
cmp x4, #0
b.eq 0f
cmp x4, #1
@@ -501,7 +501,7 @@ LUMA_VPP_SVE2 64, 48


// void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VPS_4xN_SVE2 h
-function x265_interp_8tap_vert_ps_4x\h\()_sve2
+function PFX(interp_8tap_vert_ps_4x\h\()_sve2)
lsl x3, x3, #1
lsl x5, x4, #6
lsl x4, x1, #2
@@ -568,7 +568,7 @@ LUMA_VPS_4xN_SVE2 16


// void interp_vert_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VSP_4xN_SVE2 h
-function x265_interp_8tap_vert_sp_4x\h\()_sve2
+function PFX(interp_8tap_vert_sp_4x\h\()_sve2)
lsl x5, x4, #6
lsl x1, x1, #1
lsl x4, x1, #2
@@ -736,7 +736,7 @@ LUMA_VSP_4xN_SVE2 16


// void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VPS_SVE2 w, h
-function x265_interp_8tap_vert_ps_\w\()x\h\()_sve2
+function PFX(interp_8tap_vert_ps_\w\()x\h\()_sve2)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -830,7 +830,7 @@ LUMA_VPS_SVE2 64, 48


// void interp_vert_ss_c(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VSS_SVE2 w, h
-function x265_interp_8tap_vert_ss_\w\()x\h\()_sve2
+function PFX(interp_8tap_vert_ss_\w\()x\h\()_sve2)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -924,7 +924,7 @@ LUMA_VSS_SVE2 48, 64


// void interp_vert_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_VPP_SVE2 w, h
-function x265_interp_4tap_vert_pp_\w\()x\h\()_sve2
+function PFX(interp_4tap_vert_pp_\w\()x\h\()_sve2)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -1047,7 +1047,7 @@ CHROMA_VPP_SVE2 48, 64


// void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_VPS_SVE2 w, h
-function x265_interp_4tap_vert_ps_\w\()x\h\()_sve2
+function PFX(interp_4tap_vert_ps_\w\()x\h\()_sve2)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -1210,7 +1210,7 @@ CHROMA_VPS_SVE2 48, 64


// void interp_vert_ss_c(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_VSS_SVE2 w, h
-function x265_interp_4tap_vert_ss_\w\()x\h\()_sve2
+function PFX(interp_4tap_vert_ss_\w\()x\h\()_sve2)
cmp x4, #0
beq 0f
cmp x4, #1
diff --git a/source/common/aarch64/ipfilter.S b/source/common/aarch64/ipfilter.S
index 228ffae29..0d1a374eb 100644
--- a/source/common/aarch64/ipfilter.S
+++ b/source/common/aarch64/ipfilter.S
@@ -51,7 +51,7 @@
// ***** luma_vpp *****
// void interp_vert_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VPP_4xN h
-function x265_interp_8tap_vert_pp_4x\h\()_neon
+function PFX(interp_8tap_vert_pp_4x\h\()_neon)
movrel x10, g_luma_s16
sub x0, x0, x1
sub x0, x0, x1, lsl #1 // src -= 3 * srcStride
@@ -135,7 +135,7 @@ LUMA_VPP_4xN 16


// void interp_vert_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VPP w, h
-function x265_interp_8tap_vert_pp_\w\()x\h\()_neon
+function PFX(interp_8tap_vert_pp_\w\()x\h\()_neon)
cmp x4, #0
b.eq 0f
cmp x4, #1
@@ -181,7 +181,7 @@ LUMA_VPP 64, 48
// ***** luma_vps *****
// void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VPS_4xN h
-function x265_interp_8tap_vert_ps_4x\h\()_neon
+function PFX(interp_8tap_vert_ps_4x\h\()_neon)
lsl x3, x3, #1
lsl x5, x4, #6
lsl x4, x1, #2
@@ -263,7 +263,7 @@ LUMA_VPS_4xN 16


// void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VPS w, h
-function x265_interp_8tap_vert_ps_\w\()x\h\()_neon
+function PFX(interp_8tap_vert_ps_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -309,7 +309,7 @@ LUMA_VPS 64, 48
// ***** luma_vsp *****
// void interp_vert_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VSP_4xN h
-function x265_interp_8tap_vert_sp_4x\h\()_neon
+function PFX(interp_8tap_vert_sp_4x\h\()_neon)
lsl x5, x4, #6
lsl x1, x1, #1
lsl x4, x1, #2
@@ -379,7 +379,7 @@ LUMA_VSP_4xN 16


// void interp_vert_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VSP w, h
-function x265_interp_8tap_vert_sp_\w\()x\h\()_neon
+function PFX(interp_8tap_vert_sp_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -425,7 +425,7 @@ LUMA_VSP 48, 64
// ***** luma_vss *****
// void interp_vert_ss_c(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_VSS w, h
-function x265_interp_8tap_vert_ss_\w\()x\h\()_neon
+function PFX(interp_8tap_vert_ss_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -474,7 +474,7 @@ LUMA_VSS 48, 64
// ***** luma_hpp *****
// void interp_horiz_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro LUMA_HPP w, h
-function x265_interp_horiz_pp_\w\()x\h\()_neon
+function PFX(interp_horiz_pp_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -523,7 +523,7 @@ LUMA_HPP 64, 64
// ***** luma_hps *****
// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
.macro LUMA_HPS w, h
-function x265_interp_horiz_ps_\w\()x\h\()_neon
+function PFX(interp_horiz_ps_\w\()x\h\()_neon)
mov w10, #\h
cmp w5, #0
b.eq 6f
@@ -580,7 +580,7 @@ LUMA_HPS 64, 64
// ***** chroma_vpp *****
// void interp_vert_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_VPP w, h
-function x265_interp_4tap_vert_pp_\w\()x\h\()_neon
+function PFX(interp_4tap_vert_pp_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -660,7 +660,7 @@ CHROMA_VPP 48, 64
// ***** chroma_vps *****
// void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_VPS w, h
-function x265_interp_4tap_vert_ps_\w\()x\h\()_neon
+function PFX(interp_4tap_vert_ps_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -740,7 +740,7 @@ CHROMA_VPS 48, 64
// ***** chroma_vsp *****
// void interp_vert_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_VSP w, h
-function x265_interp_4tap_vert_sp_\w\()x\h\()_neon
+function PFX(interp_4tap_vert_sp_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -814,7 +814,7 @@ CHROMA_VSP 48, 64
// ***** chroma_vss *****
// void interp_vert_ss_c(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_VSS w, h
-function x265_interp_4tap_vert_ss_\w\()x\h\()_neon
+function PFX(interp_4tap_vert_ss_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -888,7 +888,7 @@ CHROMA_VSS 48, 64
// ***** chroma_hpp *****
// void interp_horiz_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
.macro CHROMA_HPP w, h
-function x265_interp_4tap_horiz_pp_\w\()x\h\()_neon
+function PFX(interp_4tap_horiz_pp_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
@@ -968,7 +968,7 @@ CHROMA_HPP 64, 64
// ***** chroma_hps *****
// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
.macro CHROMA_HPS w, h
-function x265_interp_4tap_horiz_ps_\w\()x\h\()_neon
+function PFX(interp_4tap_horiz_ps_\w\()x\h\()_neon)
cmp x4, #0
beq 0f
cmp x4, #1
--
2.42.1


IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.


_______________________________________________
x265-devel mailing list
x265-devel at videolan.org <mailto:x265-devel at videolan.org>
https://mailman.videolan.org/listinfo/x265-devel <https://mailman.videolan.org/listinfo/x265-devel>







More information about the x265-devel mailing list