[x265] [PATCH 11/11] AArch64: Delete pixel_ssd_s_neon implementation

Gerda Zsejke More gerdazsejke.more at arm.com
Tue Dec 10 16:04:43 UTC 2024


The Neon intrinsics implementation of SSD_S is not used anymore given
that a faster asm implementation exists. Delete the pixel_ssd_s_neon
function.

Change-Id: I419bc8609f6f3d8bfb07d06d77b817947ce1ccc1
---
 source/common/aarch64/pixel-prim.cpp | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp
index 63b30604c..1ceec869d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1625,34 +1625,6 @@ void transpose_neon<64>(pixel *dst, const pixel *src, intptr_t stride)
 }
 
 
-template<int size>
-sse_t pixel_ssd_s_neon(const int16_t *a, intptr_t dstride)
-{
-    sse_t sum = 0;
-
-
-    int32x4_t vsum = vdupq_n_s32(0);
-
-    for (int y = 0; y < size; y++)
-    {
-        int x = 0;
-
-        for (; (x + 8) <= size; x += 8)
-        {
-            int16x8_t in = vld1q_s16(a + x);
-            vsum = vmlal_s16(vsum, vget_low_s16(in), vget_low_s16(in));
-            vsum = vmlal_high_s16(vsum, (in), (in));
-        }
-        for (; x < size; x++)
-        {
-            sum += a[x] * a[x];
-        }
-
-        a += dstride;
-    }
-    return sum + vaddvq_s32(vsum);
-}
-
 
 };
 
-- 
2.39.5 (Apple Git-154)

-------------- next part --------------
>From 25369992e65c239c350255c4a3ae7a53682bab7a Mon Sep 17 00:00:00 2001
Message-Id: <25369992e65c239c350255c4a3ae7a53682bab7a.1733846134.git.gerdazsejke.more at arm.com>
In-Reply-To: <cover.1733846134.git.gerdazsejke.more at arm.com>
References: <cover.1733846134.git.gerdazsejke.more at arm.com>
From: Gerda Zsejke More <gerdazsejke.more at arm.com>
Date: Sat, 7 Dec 2024 10:55:33 +0100
Subject: [PATCH 11/11] AArch64: Delete pixel_ssd_s_neon implementation

The Neon intrinsics implementation of SSD_S is not used anymore given
that a faster asm implementation exists. Delete the pixel_ssd_s_neon
function.

Change-Id: I419bc8609f6f3d8bfb07d06d77b817947ce1ccc1
---
 source/common/aarch64/pixel-prim.cpp | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp
index 63b30604c..1ceec869d 100644
--- a/source/common/aarch64/pixel-prim.cpp
+++ b/source/common/aarch64/pixel-prim.cpp
@@ -1625,34 +1625,6 @@ void transpose_neon<64>(pixel *dst, const pixel *src, intptr_t stride)
 }
 
 
-template<int size>
-sse_t pixel_ssd_s_neon(const int16_t *a, intptr_t dstride)
-{
-    sse_t sum = 0;
-
-
-    int32x4_t vsum = vdupq_n_s32(0);
-
-    for (int y = 0; y < size; y++)
-    {
-        int x = 0;
-
-        for (; (x + 8) <= size; x += 8)
-        {
-            int16x8_t in = vld1q_s16(a + x);
-            vsum = vmlal_s16(vsum, vget_low_s16(in), vget_low_s16(in));
-            vsum = vmlal_high_s16(vsum, (in), (in));
-        }
-        for (; x < size; x++)
-        {
-            sum += a[x] * a[x];
-        }
-
-        a += dstride;
-    }
-    return sum + vaddvq_s32(vsum);
-}
-
 
 };
 
-- 
2.39.5 (Apple Git-154)



More information about the x265-devel mailing list