[x264-devel] [PATCH 1/3] ppc: Rework the adds in satd8x8

Luca Barbato lu_zero at gentoo.org
Sun Aug 19 09:28:40 CEST 2018


10% faster.
---
 common/ppc/pixel.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index 5e1a1cec..8de70a82 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -279,6 +279,7 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, intptr_t i_pix1,
 
     VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v,
                  temp0v, temp1v, temp2v, temp3v );
+
     VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v,
                  temp4v, temp5v, temp6v, temp7v );
 
@@ -292,14 +293,29 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, intptr_t i_pix1,
     VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v,
                  temp4v, temp5v, temp6v, temp7v );
 
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+    vec_s16_t t0 = vec_abs( temp0v );
+    vec_s16_t t1 = vec_abs( temp1v );
+    vec_s16_t t2 = vec_abs( temp2v );
+    vec_s16_t t3 = vec_abs( temp3v );
+
+    vec_s16_t s0 = vec_adds(t0, t1);
+    vec_s16_t s1 = vec_adds(t2, t3);
+
+    vec_s32_t s01 = vec_sum4s( s0, zero_s32v );
+    vec_s32_t s23 = vec_sum4s( s1, zero_s32v );
+
+    vec_s16_t t4 = vec_abs( temp4v );
+    vec_s16_t t5 = vec_abs( temp5v );
+    vec_s16_t t6 = vec_abs( temp6v );
+    vec_s16_t t7 = vec_abs( temp7v );
+
+    vec_s16_t s2 = vec_adds(t4, t5);
+    vec_s16_t s3 = vec_adds(t6, t7);
+
+    vec_s32_t s0145 = vec_sum4s( s2, s01 );
+    vec_s32_t s2367 = vec_sum4s( s3, s23 );
+
+    satdv = vec_add(s0145, s2367);
 
     satdv = vec_sums( satdv, zero_s32v );
     satdv = vec_splat( satdv, 3 );
-- 
2.12.2



More information about the x264-devel mailing list