[x265] [PATCH 11/12] AArch64: Define all low bitdepth Neon ipfilter primitives

Hari Limaye hari.limaye at arm.com
Fri Aug 30 19:20:14 UTC 2024


Currently the Neon intrinsics-based ipfilter primitives are defined only
on certain architectures and for a subset of the block sizes. This patch
defines them on all architectures for all block sizes, for low bitdepth,
so that they are always tested.

As the high bitdepth paths are not yet implemented for all block sizes,
only the currently supported block sizes are defined for high bitdepth.
---
 source/common/aarch64/filter-prim.cpp | 334 +++++++-------------------
 1 file changed, 91 insertions(+), 243 deletions(-)

diff --git a/source/common/aarch64/filter-prim.cpp b/source/common/aarch64/filter-prim.cpp
index 8e15cdeb9..71dfc0d63 100644
--- a/source/common/aarch64/filter-prim.cpp
+++ b/source/common/aarch64/filter-prim.cpp
@@ -2682,63 +2682,29 @@ void interp_hv_pp_neon(const pixel *src, intptr_t srcStride, pixel *dst, intptr_
 
 
 
-#if defined(__APPLE__)
 #define CHROMA_420(W, H) \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;  \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>;  \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
-    
-#define CHROMA_FILTER_420(W, H) \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>;
-    
-#else // defined(__APPLE__)
-#define CHROMA_420(W, H) \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>; \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
-    
-#define CHROMA_FILTER_420(W, H) \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;
-#endif // defined(__APPLE__)
 
-#if defined(__APPLE__)
 #define CHROMA_422(W, H) \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;  \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>;  \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
-    
-#define CHROMA_FILTER_422(W, H) \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>;
-    
-#else // defined(__APPLE__)
-#define CHROMA_422(W, H) \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>; \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
-    
-#define CHROMA_FILTER_422(W, H) \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;
-#endif // defined(__APPLE__)
 
-#if defined(__APPLE__)
 #define CHROMA_444(W, H) \
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;  \
@@ -2746,26 +2712,9 @@ void interp_hv_pp_neon(const pixel *src, intptr_t srcStride, pixel *dst, intptr_
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
 
-#define CHROMA_FILTER_444(W, H) \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>;
-    
-#else // defined(__APPLE__)
-#define CHROMA_444(W, H) \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
-    
-#define CHROMA_FILTER_444(W, H) \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;  \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>;
-#endif // defined(__APPLE__)
-
-#if defined(__APPLE__)
 #define LUMA(W, H) \
     p.pu[LUMA_ ## W ## x ## H].luma_hpp     = interp_horiz_pp_neon<8, W, H>; \
+    p.pu[LUMA_ ## W ## x ## H].luma_hps     = interp_horiz_ps_neon<8, W, H>; \
     p.pu[LUMA_ ## W ## x ## H].luma_vpp     = interp_vert_pp_neon<8, W, H>;  \
     p.pu[LUMA_ ## W ## x ## H].luma_vps     = interp_vert_ps_neon<8, W, H>;  \
     p.pu[LUMA_ ## W ## x ## H].luma_vsp     = interp_vert_sp_neon<8, W, H>;  \
@@ -2773,219 +2722,118 @@ void interp_hv_pp_neon(const pixel *src, intptr_t srcStride, pixel *dst, intptr_
     p.pu[LUMA_ ## W ## x ## H].luma_hvpp    = interp_hv_pp_neon<8, W, H>; \
     p.pu[LUMA_ ## W ## x ## H].convert_p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
     p.pu[LUMA_ ## W ## x ## H].convert_p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
-    
-#else // defined(__APPLE__)
-#define LUMA(W, H) \
-    p.pu[LUMA_ ## W ## x ## H].luma_vss     = interp_vert_ss_neon<8, W, H>;  \
-    p.pu[LUMA_ ## W ## x ## H].convert_p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
-    p.pu[LUMA_ ## W ## x ## H].convert_p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
-    
-#define LUMA_FILTER(W, H) \
-    p.pu[LUMA_ ## W ## x ## H].luma_hpp     = interp_horiz_pp_neon<8, W, H>; \
-    p.pu[LUMA_ ## W ## x ## H].luma_vpp     = interp_vert_pp_neon<8, W, H>;  \
-    p.pu[LUMA_ ## W ## x ## H].luma_vps     = interp_vert_ps_neon<8, W, H>;  \
-    p.pu[LUMA_ ## W ## x ## H].luma_vsp     = interp_vert_sp_neon<8, W, H>;  \
-    p.pu[LUMA_ ## W ## x ## H].luma_hvpp    = interp_hv_pp_neon<8, W, H>;
-#endif // defined(__APPLE__)
 
 void setupFilterPrimitives_neon(EncoderPrimitives &p)
 {
-
-    // All neon functions assume width of multiple of 8, (2,4,12 variants are not optimized)
-
-    LUMA(8, 8);
+#if !HIGH_BIT_DEPTH
+    LUMA(4, 4);
+    LUMA(4, 8);
+    LUMA(4, 16);
+    LUMA(12, 16);
+#endif
     LUMA(8, 4);
-    LUMA(16, 16);
-    CHROMA_420(8,  8);
-    LUMA(16,  8);
-    CHROMA_420(8,  4);
+    LUMA(8, 8);
     LUMA(8, 16);
+    LUMA(8, 32);
+    LUMA(16, 4);
+    LUMA(16, 8);
     LUMA(16, 12);
-    CHROMA_420(8,  6);
-    LUMA(16,  4);
-    CHROMA_420(8,  2);
-    LUMA(32, 32);
-    CHROMA_420(16, 16);
-    LUMA(32, 16);
-    CHROMA_420(16, 8);
+    LUMA(16, 16);
     LUMA(16, 32);
-    CHROMA_420(8,  16);
-    LUMA(32, 24);
-    CHROMA_420(16, 12);
+    LUMA(16, 64);
     LUMA(24, 32);
-    LUMA(32,  8);
-    CHROMA_420(16, 4);
-    LUMA(8, 32);
-    LUMA(64, 64);
-    CHROMA_420(32, 32);
-    LUMA(64, 32);
-    CHROMA_420(32, 16);
+    LUMA(32, 8);
+    LUMA(32, 16);
+    LUMA(32, 24);
+    LUMA(32, 32);
     LUMA(32, 64);
-    CHROMA_420(16, 32);
-    LUMA(64, 48);
-    CHROMA_420(32, 24);
     LUMA(48, 64);
-    CHROMA_420(24, 32);
     LUMA(64, 16);
+    LUMA(64, 32);
+    LUMA(64, 48);
+    LUMA(64, 64);
+
+#if !HIGH_BIT_DEPTH
+    CHROMA_420(2, 4);
+    CHROMA_420(2, 8);
+    CHROMA_420(4, 2);
+    CHROMA_420(4, 4);
+    CHROMA_420(4, 8);
+    CHROMA_420(4, 16);
+    CHROMA_420(6, 8);
+    CHROMA_420(12, 16);
+#endif
+    CHROMA_420(8, 2);
+    CHROMA_420(8, 4);
+    CHROMA_420(8, 6);
+    CHROMA_420(8, 8);
+    CHROMA_420(8, 16);
+    CHROMA_420(8, 32);
+    CHROMA_420(16, 4);
+    CHROMA_420(16, 8);
+    CHROMA_420(16, 12);
+    CHROMA_420(16, 16);
+    CHROMA_420(16, 32);
+    CHROMA_420(24, 32);
     CHROMA_420(32, 8);
-    LUMA(16, 64);
-    CHROMA_420(8,  32);
-    CHROMA_422(8,  16);
-    CHROMA_422(8,  8);
-    CHROMA_422(8,  12);
-    CHROMA_422(8,  4);
-    CHROMA_422(16, 32);
+    CHROMA_420(32, 16);
+    CHROMA_420(32, 24);
+    CHROMA_420(32, 32);
+
+#if !HIGH_BIT_DEPTH
+    CHROMA_422(2, 8);
+    CHROMA_422(2, 16);
+    CHROMA_422(4, 4);
+    CHROMA_422(4, 8);
+    CHROMA_422(4, 16);
+    CHROMA_422(4, 32);
+    CHROMA_422(6, 16);
+    CHROMA_422(12, 32);
+#endif
+    CHROMA_422(8, 4);
+    CHROMA_422(8, 8);
+    CHROMA_422(8, 12);
+    CHROMA_422(8, 16);
+    CHROMA_422(8, 32);
+    CHROMA_422(8, 64);
+    CHROMA_422(16, 8);
     CHROMA_422(16, 16);
-    CHROMA_422(8,  32);
     CHROMA_422(16, 24);
-    CHROMA_422(16, 8);
-    CHROMA_422(32, 64);
-    CHROMA_422(32, 32);
+    CHROMA_422(16, 32);
     CHROMA_422(16, 64);
-    CHROMA_422(32, 48);
     CHROMA_422(24, 64);
     CHROMA_422(32, 16);
-    CHROMA_422(8,  64);
-    CHROMA_444(8,  8);
-    CHROMA_444(8,  4);
-    CHROMA_444(16, 16);
+    CHROMA_422(32, 32);
+    CHROMA_422(32, 48);
+    CHROMA_422(32, 64);
+
+#if !HIGH_BIT_DEPTH
+    CHROMA_444(4, 4);
+    CHROMA_444(4, 8);
+    CHROMA_444(4, 16);
+    CHROMA_444(12, 16);
+#endif
+    CHROMA_444(8, 4);
+    CHROMA_444(8, 8);
+    CHROMA_444(8, 16);
+    CHROMA_444(8, 32);
+    CHROMA_444(16, 4);
     CHROMA_444(16, 8);
-    CHROMA_444(8,  16);
     CHROMA_444(16, 12);
-    CHROMA_444(16, 4);
-    CHROMA_444(32, 32);
-    CHROMA_444(32, 16);
+    CHROMA_444(16, 16);
     CHROMA_444(16, 32);
-    CHROMA_444(32, 24);
+    CHROMA_444(16, 64);
     CHROMA_444(24, 32);
     CHROMA_444(32, 8);
-    CHROMA_444(8,  32);
-    CHROMA_444(64, 64);
-    CHROMA_444(64, 32);
+    CHROMA_444(32, 16);
+    CHROMA_444(32, 24);
+    CHROMA_444(32, 32);
     CHROMA_444(32, 64);
-    CHROMA_444(64, 48);
     CHROMA_444(48, 64);
     CHROMA_444(64, 16);
-    CHROMA_444(16, 64);
-
-#if defined(__APPLE__) || HIGH_BIT_DEPTH
-    p.pu[LUMA_8x4].luma_hps     = interp_horiz_ps_neon<8, 8, 4>;
-    p.pu[LUMA_8x8].luma_hps     = interp_horiz_ps_neon<8, 8, 8>;
-    p.pu[LUMA_8x16].luma_hps     = interp_horiz_ps_neon<8, 8, 16>;
-    p.pu[LUMA_8x32].luma_hps     = interp_horiz_ps_neon<8, 8, 32>;
-#endif // HIGH_BIT_DEPTH
-
-#if !defined(__APPLE__) && HIGH_BIT_DEPTH
-    p.pu[LUMA_24x32].luma_hps     = interp_horiz_ps_neon<8, 24, 32>;
-#endif // !defined(__APPLE__)
-
-#if !defined(__APPLE__)
-    p.pu[LUMA_32x8].luma_hpp      = interp_horiz_pp_neon<8, 32, 8>;
-    p.pu[LUMA_32x16].luma_hpp     = interp_horiz_pp_neon<8, 32, 16>;
-    p.pu[LUMA_32x24].luma_hpp     = interp_horiz_pp_neon<8, 32, 24>;
-    p.pu[LUMA_32x32].luma_hpp     = interp_horiz_pp_neon<8, 32, 32>;
-    p.pu[LUMA_32x64].luma_hpp     = interp_horiz_pp_neon<8, 32, 64>;
-    p.pu[LUMA_48x64].luma_hpp     = interp_horiz_pp_neon<8, 48, 64>;
-    p.pu[LUMA_64x16].luma_hpp     = interp_horiz_pp_neon<8, 64, 16>;
-    p.pu[LUMA_64x32].luma_hpp     = interp_horiz_pp_neon<8, 64, 32>;
-    p.pu[LUMA_64x48].luma_hpp     = interp_horiz_pp_neon<8, 64, 48>;
-    p.pu[LUMA_64x64].luma_hpp     = interp_horiz_pp_neon<8, 64, 64>;
-
-    LUMA_FILTER(8, 4);
-    LUMA_FILTER(8, 8);
-    LUMA_FILTER(8, 16);
-    LUMA_FILTER(8, 32);
-    LUMA_FILTER(24, 32);
-
-    LUMA_FILTER(16, 32);
-    LUMA_FILTER(32, 16);
-    LUMA_FILTER(32, 24);
-    LUMA_FILTER(32, 32);
-    LUMA_FILTER(32, 64);
-    LUMA_FILTER(48, 64);
-    LUMA_FILTER(64, 32);
-    LUMA_FILTER(64, 48);
-    LUMA_FILTER(64, 64);
-    
-    CHROMA_FILTER_420(24, 32);
-    
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_hpp = interp_horiz_pp_neon<4, 32, 8>;
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_hpp = interp_horiz_pp_neon<4, 32, 16>;
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_hpp = interp_horiz_pp_neon<4, 32, 24>;
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = interp_horiz_pp_neon<4, 32, 32>;
-    
-    CHROMA_FILTER_422(24, 64);
-    
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_hpp = interp_horiz_pp_neon<4, 32, 16>;
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_hpp = interp_horiz_pp_neon<4, 32, 32>;
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_hpp = interp_horiz_pp_neon<4, 32, 48>;
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_hpp = interp_horiz_pp_neon<4, 32, 64>;
-    
-    CHROMA_FILTER_444(24, 32);
-    
-    p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_hpp  = interp_horiz_pp_neon<4, 32, 8>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_hpp = interp_horiz_pp_neon<4, 32, 16>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x24].filter_hpp = interp_horiz_pp_neon<4, 32, 24>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_hpp = interp_horiz_pp_neon<4, 32, 32>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_hpp = interp_horiz_pp_neon<4, 32, 64>;
-    p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_hpp = interp_horiz_pp_neon<4, 48, 64>;
-    p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_hpp = interp_horiz_pp_neon<4, 64, 16>;
-    p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_hpp = interp_horiz_pp_neon<4, 64, 32>;
-    p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_hpp = interp_horiz_pp_neon<4, 64, 48>;
-    p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_hpp = interp_horiz_pp_neon<4, 64, 64>;
-    
-    p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_vss  = interp_vert_ss_neon<4, 16, 4>;
-    p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_vss  = interp_vert_ss_neon<4, 16, 8>;
-    p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_vss = interp_vert_ss_neon<4, 16, 12>;
-    p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_vss = interp_vert_ss_neon<4, 16, 16>;
-    p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_vss = interp_vert_ss_neon<4, 16, 32>;
-    p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vss = interp_vert_ss_neon<4, 16, 64>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_vss  = interp_vert_ss_neon<4, 32, 8>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_vss = interp_vert_ss_neon<4, 32, 16>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x24].filter_vss = interp_vert_ss_neon<4, 32, 24>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_vss = interp_vert_ss_neon<4, 32, 32>;
-    p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_vss = interp_vert_ss_neon<4, 32, 64>;
-#endif // !defined(__APPLE__)
-
-    CHROMA_FILTER_420(8, 2);
-    CHROMA_FILTER_420(8, 4);
-    CHROMA_FILTER_420(8, 6);
-    CHROMA_FILTER_420(8, 8);
-    CHROMA_FILTER_420(8, 16);
-    CHROMA_FILTER_420(8, 32);
-    
-    CHROMA_FILTER_422(8, 4);
-    CHROMA_FILTER_422(8, 8);
-    CHROMA_FILTER_422(8, 12);
-    CHROMA_FILTER_422(8, 16);
-    CHROMA_FILTER_422(8, 32);
-    CHROMA_FILTER_422(8, 64);
-    
-    CHROMA_FILTER_444(8, 4);
-    CHROMA_FILTER_444(8, 8);
-    CHROMA_FILTER_444(8, 16);
-    CHROMA_FILTER_444(8, 32);
-    
-#if defined(__APPLE__)
-    CHROMA_FILTER_420(16, 4);
-    CHROMA_FILTER_420(16, 8);
-    CHROMA_FILTER_420(16, 12);
-    CHROMA_FILTER_420(16, 16);
-    CHROMA_FILTER_420(16, 32);
-
-    CHROMA_FILTER_422(16, 8);
-    CHROMA_FILTER_422(16, 16);
-    CHROMA_FILTER_422(16, 24);
-    CHROMA_FILTER_422(16, 32);
-    CHROMA_FILTER_422(16, 64);
-    
-    CHROMA_FILTER_444(16, 4);
-    CHROMA_FILTER_444(16, 8);
-    CHROMA_FILTER_444(16, 12);
-    CHROMA_FILTER_444(16, 16);
-    CHROMA_FILTER_444(16, 32);
-    CHROMA_FILTER_444(16, 64);
-#endif // defined(__APPLE__)
+    CHROMA_444(64, 32);
+    CHROMA_444(64, 48);
+    CHROMA_444(64, 64);
 }
 
 };
-- 
2.42.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0011-AArch64-Define-all-low-bitdepth-Neon-ipfilter-primit.patch
Type: text/x-patch
Size: 18053 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240830/04077a65/attachment-0001.bin>


More information about the x265-devel mailing list