[x264-devel] [PATCH 3/3] PPC: Improve SSD functions by using vec_extract
Michail Alvanos
malvanos at gmail.com
Sun Apr 14 12:45:11 CEST 2019
Improve SSD functions by using vec_extract
instead of vec_splat and vec_ste.
Power9:
ssd_8x8_altivec: 105 --> ssd_8x8_altivec: 95
ssd_16x16_altivec: 158 --> ssd_16x16_altivec: 146
Power8:
ssd_8x8_altivec: 109 --> ssd_8x8_altivec: 88
ssd_16x16_altivec: 225 --> ssd_16x16_altivec: 196
---
common/ppc/pixel.c | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index d1a14ca9..995aca3a 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -940,8 +940,6 @@ PIXEL_SAD_X4_ALTIVEC( pixel_sad_x4_8x16_altivec, 8 )
static int pixel_ssd_16x16_altivec( uint8_t *pix1, intptr_t i_stride_pix1,
uint8_t *pix2, intptr_t i_stride_pix2 )
{
- ALIGNED_16( int sum );
-
LOAD_ZERO;
vec_u8_t pix1vA, pix2vA, pix1vB, pix2vB;
vec_u32_t sumv;
@@ -986,16 +984,12 @@ static int pixel_ssd_16x16_altivec( uint8_t *pix1, intptr_t i_stride_pix1,
sumv = vec_msum(diffB, diffB, sumv);
sumv = (vec_u32_t) vec_sums((vec_s32_t) sumv, zero_s32v);
- sumv = vec_splat(sumv, 3);
- vec_ste((vec_s32_t) sumv, 0, &sum);
- return sum;
+ return vec_extract(sumv,3);
}
static int pixel_ssd_8x8_altivec( uint8_t *pix1, intptr_t i_stride_pix1,
uint8_t *pix2, intptr_t i_stride_pix2 )
{
- ALIGNED_16( int sum );
-
LOAD_ZERO;
vec_u8_t pix1v, pix2v;
vec_u32_t sumv;
@@ -1020,10 +1014,8 @@ static int pixel_ssd_8x8_altivec( uint8_t *pix1, intptr_t i_stride_pix1,
sumv = vec_sel( zero_u32v, sumv, sel );
sumv = (vec_u32_t) vec_sums((vec_s32_t) sumv, zero_s32v);
- sumv = vec_splat(sumv, 3);
- vec_ste((vec_s32_t) sumv, 0, &sum);
- return sum;
+ return vec_extract(sumv,3);
}
--
2.17.1
More information about the x264-devel
mailing list