[x265] [PATCH v2 09/10] AArch64: Enable existing interp_vert_ss impl for HBD

Gerda Zsejke More gerdazsejke.more at arm.com
Tue Mar 11 19:47:57 UTC 2025


SBD 4-tap and 8-tap Neon implementations of interp_vert_ss_neon are
used for HBD as well, extend these functions to support all CHROMA
and LUMA block sizes.
---
 source/common/aarch64/filter-prim.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/source/common/aarch64/filter-prim.cpp b/source/common/aarch64/filter-prim.cpp
index 568e6f40f..5032567f3 100644
--- a/source/common/aarch64/filter-prim.cpp
+++ b/source/common/aarch64/filter-prim.cpp
@@ -5097,6 +5097,24 @@ void setupFilterPrimitives_neon(EncoderPrimitives &p)
     p.pu[LUMA_4x16].luma_hvpp                               = interp_hv_pp_neon<8, 4, 16>;
     p.pu[LUMA_12x16].luma_hvpp                              = interp_hv_pp_neon<8, 12, 16>;
 
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_vss   = interp_vert_ss_neon<4, 2, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vss  = interp_vert_ss_neon<4, 2, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vss   = interp_vert_ss_neon<4, 4, 4>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_vss   = interp_vert_ss_neon<4, 4, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_vss  = interp_vert_ss_neon<4, 4, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_vss  = interp_vert_ss_neon<4, 4, 32>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vss  = interp_vert_ss_neon<4, 6, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vss = interp_vert_ss_neon<4, 12, 32>;
+
+    p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss         = interp_vert_ss_neon<4, 4, 4>;
+    p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vss         = interp_vert_ss_neon<4, 4, 8>;
+    p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vss        = interp_vert_ss_neon<4, 4, 16>;
+    p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vss       = interp_vert_ss_neon<4, 12, 16>;
+
+    p.pu[LUMA_4x4].luma_vss                                 = interp_vert_ss_neon<8, 4, 4>;
+    p.pu[LUMA_4x8].luma_vss                                 = interp_vert_ss_neon<8, 4, 8>;
+    p.pu[LUMA_4x16].luma_vss                                = interp_vert_ss_neon<8, 4, 16>;
+    p.pu[LUMA_12x16].luma_vss                               = interp_vert_ss_neon<8, 12, 16>;
 #endif // HIGH_BIT_DEPTH
 }
 
-- 
2.39.5 (Apple Git-154)

-------------- next part --------------
>From d5a5d2e4ec03bff5a120481eba489fc078fe8bb4 Mon Sep 17 00:00:00 2001
Message-Id: <d5a5d2e4ec03bff5a120481eba489fc078fe8bb4.1741721714.git.gerdazsejke.more at arm.com>
In-Reply-To: <cover.1741721714.git.gerdazsejke.more at arm.com>
References: <cover.1741721714.git.gerdazsejke.more at arm.com>
From: Gerda Zsejke More <gerdazsejke.more at arm.com>
Date: Thu, 20 Feb 2025 18:50:54 +0100
Subject: [PATCH v2 09/10] AArch64: Enable existing interp_vert_ss impl for HBD

SBD 4-tap and 8-tap Neon implementations of interp_vert_ss_neon are
used for HBD as well, extend these functions to support all CHROMA
and LUMA block sizes.
---
 source/common/aarch64/filter-prim.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/source/common/aarch64/filter-prim.cpp b/source/common/aarch64/filter-prim.cpp
index 568e6f40f..5032567f3 100644
--- a/source/common/aarch64/filter-prim.cpp
+++ b/source/common/aarch64/filter-prim.cpp
@@ -5097,6 +5097,24 @@ void setupFilterPrimitives_neon(EncoderPrimitives &p)
     p.pu[LUMA_4x16].luma_hvpp                               = interp_hv_pp_neon<8, 4, 16>;
     p.pu[LUMA_12x16].luma_hvpp                              = interp_hv_pp_neon<8, 12, 16>;
 
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_vss   = interp_vert_ss_neon<4, 2, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vss  = interp_vert_ss_neon<4, 2, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vss   = interp_vert_ss_neon<4, 4, 4>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_vss   = interp_vert_ss_neon<4, 4, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_vss  = interp_vert_ss_neon<4, 4, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_vss  = interp_vert_ss_neon<4, 4, 32>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vss  = interp_vert_ss_neon<4, 6, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vss = interp_vert_ss_neon<4, 12, 32>;
+
+    p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss         = interp_vert_ss_neon<4, 4, 4>;
+    p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vss         = interp_vert_ss_neon<4, 4, 8>;
+    p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vss        = interp_vert_ss_neon<4, 4, 16>;
+    p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vss       = interp_vert_ss_neon<4, 12, 16>;
+
+    p.pu[LUMA_4x4].luma_vss                                 = interp_vert_ss_neon<8, 4, 4>;
+    p.pu[LUMA_4x8].luma_vss                                 = interp_vert_ss_neon<8, 4, 8>;
+    p.pu[LUMA_4x16].luma_vss                                = interp_vert_ss_neon<8, 4, 16>;
+    p.pu[LUMA_12x16].luma_vss                               = interp_vert_ss_neon<8, 12, 16>;
 #endif // HIGH_BIT_DEPTH
 }
 
-- 
2.39.5 (Apple Git-154)



More information about the x265-devel mailing list