[x264-devel] commit: satd_4x4_ssse3 (Jason Garrett-Glaser )

git version control git at videolan.org
Thu Mar 20 02:11:18 CET 2008


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Wed Mar 19 14:00:34 2008 -0600| [1df5f84baf226141548948d94c84a1f3b1792c0b]

satd_4x4_ssse3

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=1df5f84baf226141548948d94c84a1f3b1792c0b
---

 common/pixel.c         |    8 ++++----
 common/x86/pixel-a.asm |   12 ++++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/common/pixel.c b/common/pixel.c
index 5200c66..ae90845 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -357,7 +357,7 @@ SATD_X_DECL7()
 SATD_X_DECL7( _mmxext )
 SATD_X_DECL5( _sse2 )
 #ifdef HAVE_SSE3
-SATD_X_DECL5( _ssse3 )
+SATD_X_DECL7( _ssse3 )
 #endif
 #endif
 
@@ -630,9 +630,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
 
     if( cpu&X264_CPU_SSSE3 )
     {
-        INIT5( satd, _ssse3 );
-        INIT5( satd_x3, _ssse3 );
-        INIT5( satd_x4, _ssse3 );
+        INIT7( satd, _ssse3 );
+        INIT7( satd_x3, _ssse3 );
+        INIT7( satd_x4, _ssse3 );
         INIT_ADS( _ssse3 );
         pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
         pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index b4593d6..557aeb8 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -513,19 +513,21 @@ cglobal x264_pixel_satd_8x4_mmxext, 4,6
     paddw       mm0, mm1
     SATD_END_MMX
 
-cglobal x264_pixel_satd_4x8_mmxext, 4,6
+%macro SATD_W4 1
+cglobal x264_pixel_satd_4x8_%1, 4,6
     SATD_START_MMX
     SATD_4x4_MMX mm0, 0, 1
     SATD_4x4_MMX mm1, 0, 0
     paddw       mm0, mm1
     SATD_END_MMX
 
-cglobal x264_pixel_satd_4x4_mmxext, 4,6
+cglobal x264_pixel_satd_4x4_%1, 4,6
     SATD_START_MMX
     SATD_4x4_MMX mm0, 0, 0
     SATD_END_MMX
+%endmacro
 
-
+SATD_W4 mmxext
 
 %macro SATD_START_SSE2 0
     pxor    xmm6, xmm6
@@ -1211,10 +1213,11 @@ cglobal x264_intra_satd_x3_8x8c_%1, 0,6
 %endmacro
 
 ; instantiate satds
-; FIXME width4 can benefit from pabsw even if not sse2
 
+%ifndef ARCH_X86_64
 cextern x264_pixel_sa8d_8x8_mmxext
 SA8D_16x16_32 mmxext
+%endif
 
 %define ABS1 ABS1_MMX
 %define ABS2 ABS2_MMX
@@ -1229,6 +1232,7 @@ SATDS_SSE2 ssse3
 SA8D_16x16_32 ssse3
 INTRA_SA8D_SSE2 ssse3
 INTRA_SATDS_MMX ssse3
+SATD_W4 ssse3 ; mmx, but uses pabsw from ssse3.
 %endif
 
 



More information about the x264-devel mailing list