[x265] [PATCH 10/12] AArch64: Separate setup of optimized primitives
Hari Limaye
hari.limaye at arm.com
Thu May 2 21:19:45 UTC 2024
Currently the intrinsics-based optimized primitives for AArch64 are
setup at the start of setupAssemblyPrimitives, rather than in the
separate, intrinsics-specific function. As this same combined setup
function is used when setting up the tests, only a specific subset of
intrinsics primitives are tested in testbench.cpp.
This patch moves the setup for AArch64 intrinsics implementations to
setupIntrinsicPrimitives, and enables separate testing for
intrinsics primitives. This change does not alter the function table
produced in x265_setup_primitives(), as the intrinsics primitives are
still setup prior to the ASM primitives.
---
source/common/aarch64/asm-primitives.cpp | 35 +++++++++---------------
source/common/primitives.cpp | 2 +-
source/test/testbench.cpp | 4 +--
3 files changed, 16 insertions(+), 25 deletions(-)
diff --git a/source/common/aarch64/asm-primitives.cpp b/source/common/aarch64/asm-primitives.cpp
index 8c46bfd21..f20d1e57d 100644
--- a/source/common/aarch64/asm-primitives.cpp
+++ b/source/common/aarch64/asm-primitives.cpp
@@ -708,12 +708,6 @@ void interp_8tap_hv_pp_cpu(const pixel *src, intptr_t srcStride, pixel *dst, int
void setupNeonPrimitives(EncoderPrimitives &p)
{
- setupPixelPrimitives_neon(p);
- setupFilterPrimitives_neon(p);
- setupDCTPrimitives_neon(p);
- setupLoopFilterPrimitives_neon(p);
- setupIntraPrimitives_neon(p);
-
ALL_CHROMA_420_PU(p2s[NONALIGNED], filterPixelToShort, neon);
ALL_CHROMA_422_PU(p2s[ALIGNED], filterPixelToShort, neon);
ALL_CHROMA_444_PU(p2s[ALIGNED], filterPixelToShort, neon);
@@ -1083,14 +1077,6 @@ void setupNeonPrimitives(EncoderPrimitives &p)
#if defined(HAVE_SVE2) || defined(HAVE_SVE)
void setupSvePrimitives(EncoderPrimitives &p)
{
- // When these primitives will use SVE/SVE2 instructions set,
- // change the following definitions to point to the SVE/SVE2 implementation
- setupPixelPrimitives_neon(p);
- setupFilterPrimitives_neon(p);
- setupDCTPrimitives_neon(p);
- setupLoopFilterPrimitives_neon(p);
- setupIntraPrimitives_neon(p);
-
CHROMA_420_PU_FILTER_PIXEL_TO_SHORT_NEON(p2s[NONALIGNED]);
CHROMA_420_PU_SVE_FILTER_PIXEL_TO_SHORT(p2s[NONALIGNED]);
CHROMA_422_PU_NEON_FILTER_PIXEL_TO_SHORT(p2s[ALIGNED]);
@@ -1499,14 +1485,6 @@ void setupSvePrimitives(EncoderPrimitives &p)
#if defined(HAVE_SVE2)
void setupSve2Primitives(EncoderPrimitives &p)
{
- // When these primitives will use SVE/SVE2 instructions set,
- // change the following definitions to point to the SVE/SVE2 implementation
- setupPixelPrimitives_neon(p);
- setupFilterPrimitives_neon(p);
- setupDCTPrimitives_neon(p);
- setupLoopFilterPrimitives_neon(p);
- setupIntraPrimitives_neon(p);
-
CHROMA_420_PU_FILTER_PIXEL_TO_SHORT_NEON(p2s[NONALIGNED]);
CHROMA_420_PU_SVE_FILTER_PIXEL_TO_SHORT(p2s[NONALIGNED]);
CHROMA_422_PU_NEON_FILTER_PIXEL_TO_SHORT(p2s[ALIGNED]);
@@ -1961,4 +1939,17 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask)
#endif
}
+
+void setupIntrinsicPrimitives(EncoderPrimitives &p, int cpuMask)
+{
+ if (cpuMask & X265_CPU_NEON)
+ {
+ setupPixelPrimitives_neon(p);
+ setupFilterPrimitives_neon(p);
+ setupDCTPrimitives_neon(p);
+ setupLoopFilterPrimitives_neon(p);
+ setupIntraPrimitives_neon(p);
+ }
+}
+
} // namespace X265_NS
diff --git a/source/common/primitives.cpp b/source/common/primitives.cpp
index 0a7278f2e..56d4319bf 100644
--- a/source/common/primitives.cpp
+++ b/source/common/primitives.cpp
@@ -258,7 +258,7 @@ void x265_setup_primitives(x265_param *param)
primitives.cu[i].intra_pred_allangs = NULL;
#if ENABLE_ASSEMBLY
-#if X265_ARCH_X86
+#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64)
setupIntrinsicPrimitives(primitives, param->cpuid);
#endif
setupAssemblyPrimitives(primitives, param->cpuid);
diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp
index 20d29182d..45da893a7 100644
--- a/source/test/testbench.cpp
+++ b/source/test/testbench.cpp
@@ -190,7 +190,7 @@ int main(int argc, char *argv[])
else
continue;
-#if X265_ARCH_X86
+#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64)
EncoderPrimitives vecprim;
memset(&vecprim, 0, sizeof(vecprim));
setupIntrinsicPrimitives(vecprim, test_arch[i].flag);
@@ -231,7 +231,7 @@ int main(int argc, char *argv[])
EncoderPrimitives optprim;
memset(&optprim, 0, sizeof(optprim));
-#if X265_ARCH_X86
+#if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64)
setupIntrinsicPrimitives(optprim, cpuid);
#endif
--
2.42.1
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
More information about the x265-devel
mailing list