[x264-devel] [PATCH 1/1] PPC: Improve SATD by using vec_extract.
Michail Alvanos
malvanos at gmail.com
Sun Apr 7 11:52:45 CEST 2019
Around 1-10% speedup of intra_satd_x3 and satd_ functions by
using vec_extract instead of vec_splat and vec_ste.
---
common/ppc/pixel.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index f94029e3..11424e36 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -161,9 +161,7 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, intptr_t i_pix1,
satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v );
- satdv = vec_sum2s( satdv, zero_s32v );
- satdv = vec_splat( satdv, 1 );
- vec_ste( satdv, 0, &i_satd );
+ i_satd = vec_extract(satdv,0) + vec_extract(satdv,1);
return i_satd >> 1;
}
@@ -280,9 +278,7 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, intptr_t i_pix1,
satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v,
temp4v, temp5v, temp6v, temp7v );
- satdv = vec_sum2s( satdv, zero_s32v );
- satdv = vec_splat( satdv, 1 );
- vec_ste( satdv, 0, &i_satd );
+ i_satd = vec_extract(satdv,0) + vec_extract(satdv,1);
return i_satd >> 1;
}
@@ -330,8 +326,8 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, intptr_t i_pix1,
temp4v, temp5v, temp6v, temp7v );
satdv = vec_sums( satdv, zero_s32v );
- satdv = vec_splat( satdv, 3 );
- vec_ste( satdv, 0, &i_satd );
+
+ i_satd = vec_extract(satdv,3) ;
return i_satd >> 1;
}
@@ -400,8 +396,8 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, intptr_t i_pix1,
temp4v, temp5v, temp6v, temp7v ) );
satdv = vec_sums( satdv, zero_s32v );
- satdv = vec_splat( satdv, 3 );
- vec_ste( satdv, 0, &i_satd );
+
+ i_satd = vec_extract(satdv,3) ;
return i_satd >> 1;
}
@@ -470,8 +466,8 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, intptr_t i_pix1,
temp4v, temp5v, temp6v, temp7v ) );
satdv = vec_sums( satdv, zero_s32v );
- satdv = vec_splat( satdv, 3 );
- vec_ste( satdv, 0, &i_satd );
+
+ i_satd = vec_extract(satdv,3) ;
return i_satd >> 1;
}
@@ -575,8 +571,7 @@ static int pixel_satd_16x16_altivec( uint8_t *pix1, intptr_t i_pix1,
temp4v, temp5v, temp6v, temp7v ) );
satdv = vec_sums( satdv, zero_s32v );
- satdv = vec_splat( satdv, 3 );
- vec_ste( satdv, 0, &i_satd );
+ i_satd = vec_extract(satdv,3) ;
return i_satd >> 1;
}
--
2.17.1
More information about the x264-devel
mailing list