[x264-devel] [PATCH 2/2] PPC: Remove vec_perm when Power9 from zigzag_scan_4x4_field.

Michail Alvanos malvanos at gmail.com
Sat Apr 6 15:23:49 CEST 2019


Checkasm8 --bench on Power9:
Before: zigzag_scan_4x4_field_c: 56 zigzag_scan_4x4_field_altivec: 55
After: zigzag_scan_4x4_field_c: 55 zigzag_scan_4x4_field_altivec: 50

---
 common/ppc/dct.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/common/ppc/dct.c b/common/ppc/dct.c
index 18ad5a69..c6ca57c7 100644
--- a/common/ppc/dct.c
+++ b/common/ppc/dct.c
@@ -648,6 +648,7 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[16] )
     dct0v = vec_ld(0x00, dct);
     dct1v = vec_ld(0x10, dct);
 
+#ifndef __POWER9_VECTOR__
     const vec_u8_t sel0 = (vec_u8_t) CV(0,1,2,3,8,9,4,5,6,7,10,11,12,13,14,15);
 
     tmp0v = vec_perm( dct0v, dct1v, sel0 );
@@ -655,6 +656,15 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[16] )
 
     vec_st( tmp0v, 0x00, level );
     vec_st( tmp1v, 0x10, level );
+
+#else
+    vec_st( dct0v, 0x00, level );
+    vec_st( dct1v, 0x10, level );
+
+    * (uint32_t *) &level[3] = * (uint32_t *) &dct[2];
+    level[2] = dct[4];
+#endif
+
 }
 
 void x264_zigzag_scan_8x8_frame_altivec( int16_t level[64], int16_t dct[64] )
-- 
2.17.1



More information about the x264-devel mailing list