[x265] [PATCH 09/10] AArch64: Enable existing interp_vert_ss impl for HBD
Gerda Zsejke More
gerdazsejke.more at arm.com
Fri Feb 21 16:08:34 UTC 2025
SBD 4-tap and 8-tap Neon implementations of interp_vert_ss_neon are
used for HBD as well, extend these functions to support all CHROMA
and LUMA block sizes.
---
source/common/aarch64/filter-prim.cpp | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/source/common/aarch64/filter-prim.cpp b/source/common/aarch64/filter-prim.cpp
index b2e6a8210..88fbe89c9 100644
--- a/source/common/aarch64/filter-prim.cpp
+++ b/source/common/aarch64/filter-prim.cpp
@@ -5097,6 +5097,24 @@ void setupFilterPrimitives_neon(EncoderPrimitives &p)
p.pu[LUMA_4x16].luma_hvpp = interp_hv_pp_neon<8, 4, 16>;
p.pu[LUMA_12x16].luma_hvpp = interp_hv_pp_neon<8, 12, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_vss = interp_vert_ss_neon<4, 2, 8>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vss = interp_vert_ss_neon<4, 2, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_vss = interp_vert_ss_neon<4, 4, 32>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vss = interp_vert_ss_neon<4, 6, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vss = interp_vert_ss_neon<4, 12, 32>;
+
+ p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>;
+ p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>;
+ p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>;
+ p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vss = interp_vert_ss_neon<4, 12, 16>;
+
+ p.pu[LUMA_4x4].luma_vss = interp_vert_ss_neon<8, 4, 4>;
+ p.pu[LUMA_4x8].luma_vss = interp_vert_ss_neon<8, 4, 8>;
+ p.pu[LUMA_4x16].luma_vss = interp_vert_ss_neon<8, 4, 16>;
+ p.pu[LUMA_12x16].luma_vss = interp_vert_ss_neon<8, 12, 16>;
#endif // HIGH_BIT_DEPTH
}
--
2.39.5 (Apple Git-154)
-------------- next part --------------
>From 19a75e4edcca7a1a53c92b9b04966ef90073df3a Mon Sep 17 00:00:00 2001
Message-Id: <19a75e4edcca7a1a53c92b9b04966ef90073df3a.1740153395.git.gerdazsejke.more at arm.com>
In-Reply-To: <cover.1740153395.git.gerdazsejke.more at arm.com>
References: <cover.1740153395.git.gerdazsejke.more at arm.com>
From: Gerda Zsejke More <gerdazsejke.more at arm.com>
Date: Thu, 20 Feb 2025 18:50:54 +0100
Subject: [PATCH 09/10] AArch64: Enable existing interp_vert_ss impl for HBD
SBD 4-tap and 8-tap Neon implementations of interp_vert_ss_neon are
used for HBD as well, extend these functions to support all CHROMA
and LUMA block sizes.
---
source/common/aarch64/filter-prim.cpp | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/source/common/aarch64/filter-prim.cpp b/source/common/aarch64/filter-prim.cpp
index b2e6a8210..88fbe89c9 100644
--- a/source/common/aarch64/filter-prim.cpp
+++ b/source/common/aarch64/filter-prim.cpp
@@ -5097,6 +5097,24 @@ void setupFilterPrimitives_neon(EncoderPrimitives &p)
p.pu[LUMA_4x16].luma_hvpp = interp_hv_pp_neon<8, 4, 16>;
p.pu[LUMA_12x16].luma_hvpp = interp_hv_pp_neon<8, 12, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_vss = interp_vert_ss_neon<4, 2, 8>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vss = interp_vert_ss_neon<4, 2, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_vss = interp_vert_ss_neon<4, 4, 32>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vss = interp_vert_ss_neon<4, 6, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vss = interp_vert_ss_neon<4, 12, 32>;
+
+ p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss = interp_vert_ss_neon<4, 4, 4>;
+ p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vss = interp_vert_ss_neon<4, 4, 8>;
+ p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vss = interp_vert_ss_neon<4, 4, 16>;
+ p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vss = interp_vert_ss_neon<4, 12, 16>;
+
+ p.pu[LUMA_4x4].luma_vss = interp_vert_ss_neon<8, 4, 4>;
+ p.pu[LUMA_4x8].luma_vss = interp_vert_ss_neon<8, 4, 8>;
+ p.pu[LUMA_4x16].luma_vss = interp_vert_ss_neon<8, 4, 16>;
+ p.pu[LUMA_12x16].luma_vss = interp_vert_ss_neon<8, 12, 16>;
#endif // HIGH_BIT_DEPTH
}
--
2.39.5 (Apple Git-154)
More information about the x265-devel
mailing list