[x265] [PATCH 11/14] AArch64: Add Armv8.6 Neon I8MM implementation of interp_hv_pp

Hari Limaye hari.limaye at arm.com
Fri Sep 6 13:35:38 UTC 2024


Add an implementation of luma_hvpp, using Neon I8MM implementation
for the horizontal part, and Armv8.0 Neon implementation for the
vertical part.
---
 source/common/aarch64/filter-neon-i8mm.cpp | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/source/common/aarch64/filter-neon-i8mm.cpp b/source/common/aarch64/filter-neon-i8mm.cpp
index f8334016d..fb42d6672 100644
--- a/source/common/aarch64/filter-neon-i8mm.cpp
+++ b/source/common/aarch64/filter-neon-i8mm.cpp
@@ -755,9 +755,29 @@ void interp4_horiz_pp_i8mm(const uint8_t *src, intptr_t srcStride, uint8_t *dst,
     }
 }
 
+// Declaration for use in interp_hv_pp_i8mm().
+template<int N, int width, int height>
+void interp_vert_sp_neon(const int16_t *src, intptr_t srcStride, uint8_t *dst,
+                         intptr_t dstStride, int coeffIdx);
+
+// Implementation of luma_hvpp, using Neon i8mm implementation for the
+// horizontal part, and Armv8.0 Neon implementation for the vertical part.
+template<int width, int height>
+void interp_hv_pp_i8mm(const pixel *src, intptr_t srcStride, pixel *dst,
+                       intptr_t dstStride, int idxX, int idxY)
+{
+    const int N_TAPS = 8;
+    ALIGN_VAR_32(int16_t, immed[width * (height + N_TAPS - 1)]);
+
+    interp8_horiz_ps_i8mm<width, height>(src, srcStride, immed, width, idxX, 1);
+    interp_vert_sp_neon<N_TAPS, width, height>(immed + (N_TAPS / 2 - 1) * width,
+                                               width, dst, dstStride, idxY);
+}
+
 #define LUMA_I8MM(W, H) \
         p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp8_horiz_pp_i8mm<W, H>; \
-        p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>;
+        p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>; \
+        p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_i8mm<W, H>;
 
 #define CHROMA_420_I8MM(W, H) \
         p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = \
-- 
2.42.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0011-AArch64-Add-Armv8.6-Neon-I8MM-implementation-of-inte.patch
Type: text/x-patch
Size: 2411 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240906/6b3a6d31/attachment.bin>


More information about the x265-devel mailing list