[x264-devel] [PATCH 1/2] ppc: Use vec_splats in mc

Luca Barbato lu_zero at gentoo.org
Thu Sep 6 12:25:13 CEST 2018


No overall speedup, just tidier code.
---
 common/ppc/mc.c | 96 ++++++++++++++++-----------------------------------------
 1 file changed, 27 insertions(+), 69 deletions(-)

diff --git a/common/ppc/mc.c b/common/ppc/mc.c
index 3ceb1ac8..125ecfa2 100644
--- a/common/ppc/mc.c
+++ b/common/ppc/mc.c
@@ -802,20 +802,13 @@ void x264_hpel_filter_altivec( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint
 
     vec_u16_t twov, fourv, fivev, sixv;
     vec_s16_t sixteenv, thirtytwov;
-    vec_u16_u temp_u;
-
-    temp_u.s[0]=2;
-    twov = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=4;
-    fourv = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=5;
-    fivev = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=6;
-    sixv = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=16;
-    sixteenv = (vec_s16_t)vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=32;
-    thirtytwov = (vec_s16_t)vec_splat( temp_u.v, 0 );
+
+    twov = vec_splats( (uint16_t)2 );
+    fourv = vec_splats( (uint16_t)4 );
+    fivev = vec_splats( (uint16_t)5 );
+    sixv = vec_splats( (uint16_t)6 );
+    sixteenv = vec_splats( (int16_t)16 );
+    thirtytwov = vec_splats( (int16_t)32 );
 
     for( int y = 0; y < i_height; y++ )
     {
@@ -995,23 +988,16 @@ static void mc_weight_w2_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, in
     vec_u8_t srcv;
     vec_s16_t weightv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t) denom );
+        roundv = vec_splats( (int16_t)(1<<(denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1047,23 +1033,16 @@ static void mc_weight_w4_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, in
     vec_u8_t srcv;
     vec_s16_t weightv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t) denom );
+        roundv = vec_splats( (int16_t)(1<<(denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1099,23 +1078,16 @@ static void mc_weight_w8_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, in
     vec_u8_t srcv;
     vec_s16_t weightv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t) denom );
+        roundv = vec_splats( (int16_t)(1<<(denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1151,23 +1123,16 @@ static void mc_weight_w16_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, i
     vec_u8_t srcv;
     vec_s16_t weight_lv, weight_hv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t) denom );
+        roundv = vec_splats( (int16_t)(1<<(denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1209,15 +1174,11 @@ static void mc_weight_w20_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, i
     vec_u8_t srcv, srcv2;
     vec_s16_t weight_lv, weight_hv, weight_3v;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
@@ -1229,11 +1190,8 @@ static void mc_weight_w20_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, i
             { round, round, round, round, 0, 0, 0, 0 },
         };
 
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = round;
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t)denom );
+        roundv = vec_splats( (int16_t)(1<<(denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
-- 
2.12.2



More information about the x264-devel mailing list