[x264-devel] [PATCH 1/1] PPC: Improve SATD by using vec_extract.

Michail Alvanos malvanos at gmail.com
Sun Apr 7 11:52:45 CEST 2019


Around 1-10% speedup of intra_satd_x3 and satd_ functions by
using vec_extract instead of vec_splat and vec_ste.

---
 common/ppc/pixel.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index f94029e3..11424e36 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -161,9 +161,7 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, intptr_t i_pix1,
 
     satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v );
 
-    satdv = vec_sum2s( satdv, zero_s32v );
-    satdv = vec_splat( satdv, 1 );
-    vec_ste( satdv, 0, &i_satd );
+    i_satd =  vec_extract(satdv,0) + vec_extract(satdv,1);
 
     return i_satd >> 1;
 }
@@ -280,9 +278,7 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, intptr_t i_pix1,
     satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v,
                        temp4v, temp5v, temp6v, temp7v );
 
-    satdv = vec_sum2s( satdv, zero_s32v );
-    satdv = vec_splat( satdv, 1 );
-    vec_ste( satdv, 0, &i_satd );
+    i_satd =  vec_extract(satdv,0) + vec_extract(satdv,1);
 
     return i_satd >> 1;
 }
@@ -330,8 +326,8 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, intptr_t i_pix1,
                        temp4v, temp5v, temp6v, temp7v );
 
     satdv = vec_sums( satdv, zero_s32v );
-    satdv = vec_splat( satdv, 3 );
-    vec_ste( satdv, 0, &i_satd );
+
+    i_satd =  vec_extract(satdv,3) ;
 
     return i_satd >> 1;
 }
@@ -400,8 +396,8 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, intptr_t i_pix1,
                                        temp4v, temp5v, temp6v, temp7v ) );
 
     satdv = vec_sums( satdv, zero_s32v );
-    satdv = vec_splat( satdv, 3 );
-    vec_ste( satdv, 0, &i_satd );
+
+    i_satd =  vec_extract(satdv,3) ;
 
     return i_satd >> 1;
 }
@@ -470,8 +466,8 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, intptr_t i_pix1,
                                        temp4v, temp5v, temp6v, temp7v ) );
 
     satdv = vec_sums( satdv, zero_s32v );
-    satdv = vec_splat( satdv, 3 );
-    vec_ste( satdv, 0, &i_satd );
+
+    i_satd =  vec_extract(satdv,3) ;
 
     return i_satd >> 1;
 }
@@ -575,8 +571,7 @@ static int pixel_satd_16x16_altivec( uint8_t *pix1, intptr_t i_pix1,
                                        temp4v, temp5v, temp6v, temp7v ) );
 
     satdv = vec_sums( satdv, zero_s32v );
-    satdv = vec_splat( satdv, 3 );
-    vec_ste( satdv, 0, &i_satd );
+    i_satd =  vec_extract(satdv,3) ;
 
     return i_satd >> 1;
 }
-- 
2.17.1



More information about the x264-devel mailing list