[x264-devel] x86: Don't use explicitly aligned versions of SAD on AVX CPUs

Mon May 20 23:06:47 CEST 2013

x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Fri May  3 23:06:10 2013 +0200| [b4be6e56629cf8fdcf53adc6b879969d8f6760b3] | committer: Jason Garrett-Glaser

x86: Don't use explicitly aligned versions of SAD on AVX CPUs

On modern CPUs movdqu isn't slower than movdqa when used on aligned data and using the same code in both cases saves cache.

This was already done for the high bit-depth AVX2 implementation but the aligned version still exists as dead code so remove that.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=b4be6e56629cf8fdcf53adc6b879969d8f6760b3
---

 common/pixel.c         |    2 ++
 common/x86/pixel.h     |    1 -
 common/x86/sad16-a.asm |    3 ---
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/common/pixel.c b/common/pixel.c
index 145fac7..2b323cb 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -966,6 +966,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
     }
     if( cpu&X264_CPU_AVX )
     {
+        INIT5_NAME( sad_aligned, sad, _ssse3 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
         INIT_ADS( _avx );
         INIT6( satd, _avx );
         pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_avx;
@@ -1234,6 +1235,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
 
     if( cpu&X264_CPU_AVX )
     {
+        INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
         INIT8( satd, _avx );
         INIT7( satd_x3, _avx );
         INIT7( satd_x4, _avx );
diff --git a/common/x86/pixel.h b/common/x86/pixel.h
index bddf528..555c4ec 100644
--- a/common/x86/pixel.h
+++ b/common/x86/pixel.h
@@ -53,7 +53,6 @@ DECL_X1( sad, sse2_aligned )
 DECL_X1( sad, ssse3 )
 DECL_X1( sad, ssse3_aligned )
 DECL_X1( sad, avx2 )
-DECL_X1( sad, avx2_aligned )
 DECL_X4( sad, mmx2 )
 DECL_X4( sad, sse2 )
 DECL_X4( sad, sse3 )
diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm
index 8c48723..7606da3 100644
--- a/common/x86/sad16-a.asm
+++ b/common/x86/sad16-a.asm
@@ -205,9 +205,6 @@ SAD  8,  8
 INIT_YMM avx2
 SAD 16, 16
 SAD 16,  8
-INIT_YMM avx2, aligned
-SAD 16, 16
-SAD 16,  8
 
 ;=============================================================================
 ; SAD x3/x4