[x264-devel] [PATCH 2/2] PPC: Remove vec_perm when Power9 from zigzag_scan_4x4_field.
Michail Alvanos
malvanos at gmail.com
Sat Apr 6 15:23:49 CEST 2019
Checkasm8 --bench on Power9:
Before: zigzag_scan_4x4_field_c: 56 zigzag_scan_4x4_field_altivec: 55
After: zigzag_scan_4x4_field_c: 55 zigzag_scan_4x4_field_altivec: 50
---
common/ppc/dct.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/common/ppc/dct.c b/common/ppc/dct.c
index 18ad5a69..c6ca57c7 100644
--- a/common/ppc/dct.c
+++ b/common/ppc/dct.c
@@ -648,6 +648,7 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[16] )
dct0v = vec_ld(0x00, dct);
dct1v = vec_ld(0x10, dct);
+#ifndef __POWER9_VECTOR__
const vec_u8_t sel0 = (vec_u8_t) CV(0,1,2,3,8,9,4,5,6,7,10,11,12,13,14,15);
tmp0v = vec_perm( dct0v, dct1v, sel0 );
@@ -655,6 +656,15 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[16] )
vec_st( tmp0v, 0x00, level );
vec_st( tmp1v, 0x10, level );
+
+#else
+ vec_st( dct0v, 0x00, level );
+ vec_st( dct1v, 0x10, level );
+
+ * (uint32_t *) &level[3] = * (uint32_t *) &dct[2];
+ level[2] = dct[4];
+#endif
+
}
void x264_zigzag_scan_8x8_frame_altivec( int16_t level[64], int16_t dct[64] )
--
2.17.1
More information about the x264-devel
mailing list