[x264-devel] commit: satd_4x4_ssse3 (Jason Garrett-Glaser )
git version control
git at videolan.org
Thu Mar 20 02:11:18 CET 2008
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Wed Mar 19 14:00:34 2008 -0600| [1df5f84baf226141548948d94c84a1f3b1792c0b]
satd_4x4_ssse3
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=1df5f84baf226141548948d94c84a1f3b1792c0b
---
common/pixel.c | 8 ++++----
common/x86/pixel-a.asm | 12 ++++++++----
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/common/pixel.c b/common/pixel.c
index 5200c66..ae90845 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -357,7 +357,7 @@ SATD_X_DECL7()
SATD_X_DECL7( _mmxext )
SATD_X_DECL5( _sse2 )
#ifdef HAVE_SSE3
-SATD_X_DECL5( _ssse3 )
+SATD_X_DECL7( _ssse3 )
#endif
#endif
@@ -630,9 +630,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
if( cpu&X264_CPU_SSSE3 )
{
- INIT5( satd, _ssse3 );
- INIT5( satd_x3, _ssse3 );
- INIT5( satd_x4, _ssse3 );
+ INIT7( satd, _ssse3 );
+ INIT7( satd_x3, _ssse3 );
+ INIT7( satd_x4, _ssse3 );
INIT_ADS( _ssse3 );
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index b4593d6..557aeb8 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -513,19 +513,21 @@ cglobal x264_pixel_satd_8x4_mmxext, 4,6
paddw mm0, mm1
SATD_END_MMX
-cglobal x264_pixel_satd_4x8_mmxext, 4,6
+%macro SATD_W4 1
+cglobal x264_pixel_satd_4x8_%1, 4,6
SATD_START_MMX
SATD_4x4_MMX mm0, 0, 1
SATD_4x4_MMX mm1, 0, 0
paddw mm0, mm1
SATD_END_MMX
-cglobal x264_pixel_satd_4x4_mmxext, 4,6
+cglobal x264_pixel_satd_4x4_%1, 4,6
SATD_START_MMX
SATD_4x4_MMX mm0, 0, 0
SATD_END_MMX
+%endmacro
-
+SATD_W4 mmxext
%macro SATD_START_SSE2 0
pxor xmm6, xmm6
@@ -1211,10 +1213,11 @@ cglobal x264_intra_satd_x3_8x8c_%1, 0,6
%endmacro
; instantiate satds
-; FIXME width4 can benefit from pabsw even if not sse2
+%ifndef ARCH_X86_64
cextern x264_pixel_sa8d_8x8_mmxext
SA8D_16x16_32 mmxext
+%endif
%define ABS1 ABS1_MMX
%define ABS2 ABS2_MMX
@@ -1229,6 +1232,7 @@ SATDS_SSE2 ssse3
SA8D_16x16_32 ssse3
INTRA_SA8D_SSE2 ssse3
INTRA_SATDS_MMX ssse3
+SATD_W4 ssse3 ; mmx, but uses pabsw from ssse3.
%endif
More information about the x264-devel
mailing list