[x264-devel] [PATCH 2/3] ppc: Use a single store to write the scores for sad_x4_8x8

Luca Barbato lu_zero at gentoo.org
Sun Aug 19 17:27:54 CEST 2018


Yet another use of xxpermdi, another 10% gain.
---
 common/ppc/pixel.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index a1b52ca3..0e733a0d 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -1120,20 +1120,11 @@ static void pixel_sad_x4_8x8_altivec( uint8_t *fenc,
     sum2v = vec_sums( sum2v, zero_s32v );
     sum3v = vec_sums( sum3v, zero_s32v );
 
-    sum0v = vec_splat( sum0v, 3 );
-    sum1v = vec_splat( sum1v, 3 );
-    sum2v = vec_splat( sum2v, 3 );
-    sum3v = vec_splat( sum3v, 3 );
+    vec_s32_t s01 = vec_mergel(sum0v, sum1v);
+    vec_s32_t s23 = vec_mergel(sum2v, sum3v);
+    vec_s32_t s = xxpermdi(s01, s23, 3);
 
-    vec_ste( sum0v, 0, &sum0);
-    vec_ste( sum1v, 0, &sum1);
-    vec_ste( sum2v, 0, &sum2);
-    vec_ste( sum3v, 0, &sum3);
-
-    scores[0] = sum0;
-    scores[1] = sum1;
-    scores[2] = sum2;
-    scores[3] = sum3;
+    vec_vsx_st(s, 0, scores);
 }
 
 static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
-- 
2.12.2



More information about the x264-devel mailing list