[x264-devel] x86: Don't use explicitly aligned versions of SAD on AVX CPUs
Henrik Gramner
git at videolan.org
Mon May 20 23:06:47 CEST 2013
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Fri May 3 23:06:10 2013 +0200| [b4be6e56629cf8fdcf53adc6b879969d8f6760b3] | committer: Jason Garrett-Glaser
x86: Don't use explicitly aligned versions of SAD on AVX CPUs
On modern CPUs movdqu isn't slower than movdqa when used on aligned data and using the same code in both cases saves cache.
This was already done for the high bit-depth AVX2 implementation but the aligned version still exists as dead code so remove that.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=b4be6e56629cf8fdcf53adc6b879969d8f6760b3
---
common/pixel.c | 2 ++
common/x86/pixel.h | 1 -
common/x86/sad16-a.asm | 3 ---
3 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/common/pixel.c b/common/pixel.c
index 145fac7..2b323cb 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -966,6 +966,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
}
if( cpu&X264_CPU_AVX )
{
+ INIT5_NAME( sad_aligned, sad, _ssse3 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
INIT_ADS( _avx );
INIT6( satd, _avx );
pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_avx;
@@ -1234,6 +1235,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
if( cpu&X264_CPU_AVX )
{
+ INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
INIT8( satd, _avx );
INIT7( satd_x3, _avx );
INIT7( satd_x4, _avx );
diff --git a/common/x86/pixel.h b/common/x86/pixel.h
index bddf528..555c4ec 100644
--- a/common/x86/pixel.h
+++ b/common/x86/pixel.h
@@ -53,7 +53,6 @@ DECL_X1( sad, sse2_aligned )
DECL_X1( sad, ssse3 )
DECL_X1( sad, ssse3_aligned )
DECL_X1( sad, avx2 )
-DECL_X1( sad, avx2_aligned )
DECL_X4( sad, mmx2 )
DECL_X4( sad, sse2 )
DECL_X4( sad, sse3 )
diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm
index 8c48723..7606da3 100644
--- a/common/x86/sad16-a.asm
+++ b/common/x86/sad16-a.asm
@@ -205,9 +205,6 @@ SAD 8, 8
INIT_YMM avx2
SAD 16, 16
SAD 16, 8
-INIT_YMM avx2, aligned
-SAD 16, 16
-SAD 16, 8
;=============================================================================
; SAD x3/x4
More information about the x264-devel
mailing list