[x265] [PATCH 11/14] AArch64: Add Armv8.6 Neon I8MM implementation of interp_hv_pp
Hari Limaye
hari.limaye at arm.com
Fri Sep 6 13:35:38 UTC 2024
Add an implementation of luma_hvpp, using Neon I8MM implementation
for the horizontal part, and Armv8.0 Neon implementation for the
vertical part.
---
source/common/aarch64/filter-neon-i8mm.cpp | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/source/common/aarch64/filter-neon-i8mm.cpp b/source/common/aarch64/filter-neon-i8mm.cpp
index f8334016d..fb42d6672 100644
--- a/source/common/aarch64/filter-neon-i8mm.cpp
+++ b/source/common/aarch64/filter-neon-i8mm.cpp
@@ -755,9 +755,29 @@ void interp4_horiz_pp_i8mm(const uint8_t *src, intptr_t srcStride, uint8_t *dst,
}
}
+// Declaration for use in interp_hv_pp_i8mm().
+template<int N, int width, int height>
+void interp_vert_sp_neon(const int16_t *src, intptr_t srcStride, uint8_t *dst,
+ intptr_t dstStride, int coeffIdx);
+
+// Implementation of luma_hvpp, using Neon i8mm implementation for the
+// horizontal part, and Armv8.0 Neon implementation for the vertical part.
+template<int width, int height>
+void interp_hv_pp_i8mm(const pixel *src, intptr_t srcStride, pixel *dst,
+ intptr_t dstStride, int idxX, int idxY)
+{
+ const int N_TAPS = 8;
+ ALIGN_VAR_32(int16_t, immed[width * (height + N_TAPS - 1)]);
+
+ interp8_horiz_ps_i8mm<width, height>(src, srcStride, immed, width, idxX, 1);
+ interp_vert_sp_neon<N_TAPS, width, height>(immed + (N_TAPS / 2 - 1) * width,
+ width, dst, dstStride, idxY);
+}
+
#define LUMA_I8MM(W, H) \
p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp8_horiz_pp_i8mm<W, H>; \
- p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_i8mm<W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_i8mm<W, H>;
#define CHROMA_420_I8MM(W, H) \
p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = \
--
2.42.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0011-AArch64-Add-Armv8.6-Neon-I8MM-implementation-of-inte.patch
Type: text/x-patch
Size: 2411 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240906/6b3a6d31/attachment.bin>
More information about the x265-devel
mailing list