[x265] [PATCH 06/14] AArch64: Add Armv8.4 Neon DotProd implementation of interp_hv_pp
Hari Limaye
hari.limaye at arm.com
Fri Sep 6 13:33:51 UTC 2024
Add an implementation of luma_hvpp, using Neon DotProd implementation
for the horizontal part, and Armv8.0 Neon implementation for the
vertical part.
---
source/common/aarch64/filter-neon-dotprod.cpp | 23 ++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/source/common/aarch64/filter-neon-dotprod.cpp b/source/common/aarch64/filter-neon-dotprod.cpp
index eb42b43f2..1828440d1 100644
--- a/source/common/aarch64/filter-neon-dotprod.cpp
+++ b/source/common/aarch64/filter-neon-dotprod.cpp
@@ -642,9 +642,30 @@ void interp4_horiz_ps_dotprod(const uint8_t *src, intptr_t srcStride,
}
}
+// Declaration for use in interp_hv_pp_dotprod().
+template<int N, int width, int height>
+void interp_vert_sp_neon(const int16_t *src, intptr_t srcStride, uint8_t *dst,
+ intptr_t dstStride, int coeffIdx);
+
+// Implementation of luma_hvpp, using Neon DotProd implementation for the
+// horizontal part, and Armv8.0 Neon implementation for the vertical part.
+template<int width, int height>
+void interp_hv_pp_dotprod(const pixel *src, intptr_t srcStride, pixel *dst,
+ intptr_t dstStride, int idxX, int idxY)
+{
+ const int N_TAPS = 8;
+ ALIGN_VAR_32(int16_t, immed[width * (height + N_TAPS - 1)]);
+
+ interp8_horiz_ps_dotprod<width, height>(src, srcStride, immed, width, idxX,
+ 1);
+ interp_vert_sp_neon<N_TAPS, width, height>(immed + (N_TAPS / 2 - 1) * width,
+ width, dst, dstStride, idxY);
+}
+
#define LUMA_DOTPROD(W, H) \
p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp8_horiz_pp_dotprod<W, H>; \
- p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_dotprod<W, H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_hps = interp8_horiz_ps_dotprod<W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_dotprod<W, H>;
#define CHROMA_420_DOTPROD(W, H) \
p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = \
--
2.42.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0006-AArch64-Add-Armv8.4-Neon-DotProd-implementation-of-i.patch
Type: text/x-patch
Size: 2496 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240906/483864f5/attachment.bin>
More information about the x265-devel
mailing list