[x265] [PATCH 11/11] AArch64: Delete pixel_ssd_s_neon implementation
Gerda Zsejke More
gerdazsejke.more at arm.com
Tue Dec 10 16:04:43 UTC 2024
The Neon intrinsics implementation of SSD_S is not used anymore given
that a faster asm implementation exists. Delete the pixel_ssd_s_neon
function.
Change-Id: I419bc8609f6f3d8bfb07d06d77b817947ce1ccc1
---
source/common/aarch64/pixel-prim.cpp | 28 ----------------------------
1 file changed, 28 deletions(-)
diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp
index 63b30604c..1ceec869d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1625,34 +1625,6 @@ void transpose_neon<64>(pixel *dst, const pixel *src, intptr_t stride)
}
-template<int size>
-sse_t pixel_ssd_s_neon(const int16_t *a, intptr_t dstride)
-{
- sse_t sum = 0;
-
-
- int32x4_t vsum = vdupq_n_s32(0);
-
- for (int y = 0; y < size; y++)
- {
- int x = 0;
-
- for (; (x + 8) <= size; x += 8)
- {
- int16x8_t in = vld1q_s16(a + x);
- vsum = vmlal_s16(vsum, vget_low_s16(in), vget_low_s16(in));
- vsum = vmlal_high_s16(vsum, (in), (in));
- }
- for (; x < size; x++)
- {
- sum += a[x] * a[x];
- }
-
- a += dstride;
- }
- return sum + vaddvq_s32(vsum);
-}
-
};
--
2.39.5 (Apple Git-154)
-------------- next part --------------
>From 25369992e65c239c350255c4a3ae7a53682bab7a Mon Sep 17 00:00:00 2001
Message-Id: <25369992e65c239c350255c4a3ae7a53682bab7a.1733846134.git.gerdazsejke.more at arm.com>
In-Reply-To: <cover.1733846134.git.gerdazsejke.more at arm.com>
References: <cover.1733846134.git.gerdazsejke.more at arm.com>
From: Gerda Zsejke More <gerdazsejke.more at arm.com>
Date: Sat, 7 Dec 2024 10:55:33 +0100
Subject: [PATCH 11/11] AArch64: Delete pixel_ssd_s_neon implementation
The Neon intrinsics implementation of SSD_S is not used anymore given
that a faster asm implementation exists. Delete the pixel_ssd_s_neon
function.
Change-Id: I419bc8609f6f3d8bfb07d06d77b817947ce1ccc1
---
source/common/aarch64/pixel-prim.cpp | 28 ----------------------------
1 file changed, 28 deletions(-)
diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp
index 63b30604c..1ceec869d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1625,34 +1625,6 @@ void transpose_neon<64>(pixel *dst, const pixel *src, intptr_t stride)
}
-template<int size>
-sse_t pixel_ssd_s_neon(const int16_t *a, intptr_t dstride)
-{
- sse_t sum = 0;
-
-
- int32x4_t vsum = vdupq_n_s32(0);
-
- for (int y = 0; y < size; y++)
- {
- int x = 0;
-
- for (; (x + 8) <= size; x += 8)
- {
- int16x8_t in = vld1q_s16(a + x);
- vsum = vmlal_s16(vsum, vget_low_s16(in), vget_low_s16(in));
- vsum = vmlal_high_s16(vsum, (in), (in));
- }
- for (; x < size; x++)
- {
- sum += a[x] * a[x];
- }
-
- a += dstride;
- }
- return sum + vaddvq_s32(vsum);
-}
-
};
--
2.39.5 (Apple Git-154)
More information about the x265-devel
mailing list