[x265] [PATCH 1/3] AArch64: Delete redundant SAD Neon intrinsics primitives
Jonathan Wright
jonathan.wright at arm.com
Tue Jan 7 16:26:34 UTC 2025
Delete the setup code for SAD Neon intrinsics primitives since we now
have optimized Neon assembly implementations for all block sizes and
bitdepths.
The sad_pp_neon function is retained as it is used in psyCost_pp_neon
for both 4x4 and 8x8 blocks.
---
source/common/aarch64/pixel-prim.cpp | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp
index 1ceec869d..c57057f5d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1641,7 +1641,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg_neon<W, H>; \
- p.pu[LUMA_ ## W ## x ## H].sad = sad_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp_neon<W, H>; \
@@ -1702,16 +1701,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
LUMA_PU(48, 64);
LUMA_PU(64, 16);
LUMA_PU(16, 64);
-
-#if defined(__APPLE__)
- p.pu[LUMA_4x4].sad = sad_pp_neon<4, 4>;
- p.pu[LUMA_4x8].sad = sad_pp_neon<4, 8>;
- p.pu[LUMA_4x16].sad = sad_pp_neon<4, 16>;
-#endif // defined(__APPLE__)
- p.pu[LUMA_8x4].sad = sad_pp_neon<8, 4>;
- p.pu[LUMA_8x8].sad = sad_pp_neon<8, 8>;
- p.pu[LUMA_8x16].sad = sad_pp_neon<8, 16>;
- p.pu[LUMA_8x32].sad = sad_pp_neon<8, 32>;
#if !(HIGH_BIT_DEPTH)
p.pu[LUMA_4x4].sad_x3 = sad_x3_neon<4, 4>;
--
2.39.3 (Apple Git-146)
-------------- next part --------------
>From 0db3ecb6d9eb6ea4ad186cc11fd19bedc5c0df3a Mon Sep 17 00:00:00 2001
Message-Id: <0db3ecb6d9eb6ea4ad186cc11fd19bedc5c0df3a.1736263010.git.jonathan.wright at arm.com>
In-Reply-To: <cover.1736263010.git.jonathan.wright at arm.com>
References: <cover.1736263010.git.jonathan.wright at arm.com>
From: Jonathan Wright <jonathan.wright at arm.com>
Date: Mon, 9 Dec 2024 11:36:52 +0000
Subject: [PATCH 1/3] AArch64: Delete redundant SAD Neon intrinsics primitives
Delete the setup code for SAD Neon intrinsics primitives since we now
have optimized Neon assembly implementations for all block sizes and
bitdepths.
The sad_pp_neon function is retained as it is used in psyCost_pp_neon
for both 4x4 and 8x8 blocks.
---
source/common/aarch64/pixel-prim.cpp | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp
index 1ceec869d..c57057f5d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1641,7 +1641,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg_neon<W, H>; \
- p.pu[LUMA_ ## W ## x ## H].sad = sad_pp_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4_neon<W, H>; \
p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp_neon<W, H>; \
@@ -1702,16 +1701,6 @@ void setupPixelPrimitives_neon(EncoderPrimitives &p)
LUMA_PU(48, 64);
LUMA_PU(64, 16);
LUMA_PU(16, 64);
-
-#if defined(__APPLE__)
- p.pu[LUMA_4x4].sad = sad_pp_neon<4, 4>;
- p.pu[LUMA_4x8].sad = sad_pp_neon<4, 8>;
- p.pu[LUMA_4x16].sad = sad_pp_neon<4, 16>;
-#endif // defined(__APPLE__)
- p.pu[LUMA_8x4].sad = sad_pp_neon<8, 4>;
- p.pu[LUMA_8x8].sad = sad_pp_neon<8, 8>;
- p.pu[LUMA_8x16].sad = sad_pp_neon<8, 16>;
- p.pu[LUMA_8x32].sad = sad_pp_neon<8, 32>;
#if !(HIGH_BIT_DEPTH)
p.pu[LUMA_4x4].sad_x3 = sad_x3_neon<4, 4>;
--
2.39.3 (Apple Git-146)
More information about the x265-devel
mailing list