[x264-devel] x86: AVX2 high bit-depth vsad

Henrik Gramner git at videolan.org
Tue Apr 23 23:03:08 CEST 2013


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue Apr 16 23:27:43 2013 +0200| [ead88a021acd034a39769f63dc3aaf5369bb0d94] | committer: Jason Garrett-Glaser

x86: AVX2 high bit-depth vsad

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ead88a021acd034a39769f63dc3aaf5369bb0d94
---

 common/pixel.c         |    1 +
 common/x86/pixel.h     |    1 +
 common/x86/sad16-a.asm |   32 ++++++++++++++++++++++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/common/pixel.c b/common/pixel.c
index 97d87f9..bbe43be 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -991,6 +991,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
     {
         INIT2( sad, _avx2 );
         INIT2_NAME( sad_aligned, sad, _avx2 );
+        pixf->vsad = x264_pixel_vsad_avx2;
     }
 #endif // HAVE_MMX
 #else // !HIGH_BIT_DEPTH
diff --git a/common/x86/pixel.h b/common/x86/pixel.h
index 6221dec..5618fcc 100644
--- a/common/x86/pixel.h
+++ b/common/x86/pixel.h
@@ -174,6 +174,7 @@ int  x264_pixel_vsad_mmx2 ( pixel *src, intptr_t stride, int height );
 int  x264_pixel_vsad_sse2 ( pixel *src, intptr_t stride, int height );
 int  x264_pixel_vsad_ssse3( pixel *src, intptr_t stride, int height );
 int  x264_pixel_vsad_xop  ( pixel *src, intptr_t stride, int height );
+int  x264_pixel_vsad_avx2 ( uint16_t *src, intptr_t stride, int height );
 int x264_pixel_asd8_sse2 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
 int x264_pixel_asd8_ssse3( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
 int x264_pixel_asd8_xop  ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm
index 921c5ff..a3a6892 100644
--- a/common/x86/sad16-a.asm
+++ b/common/x86/sad16-a.asm
@@ -4,6 +4,7 @@
 ;* Copyright (C) 2010-2013 x264 project
 ;*
 ;* Authors: Oskar Arvidsson <oskar at irock.se>
+;*          Henrik Gramner <henrik at gramner.com>
 ;*
 ;* This program is free software; you can redistribute it and/or modify
 ;* it under the terms of the GNU General Public License as published by
@@ -420,6 +421,37 @@ PIXEL_VSAD
 INIT_XMM xop
 PIXEL_VSAD
 
+INIT_YMM avx2
+cglobal pixel_vsad, 3,3
+    mova      m0, [r0]
+    mova      m1, [r0+2*r1]
+    lea       r0, [r0+4*r1]
+    psubw     m0, m1
+    pabsw     m0, m0
+    sub      r2d, 2
+    je .end
+.loop:
+    mova      m2, [r0]
+    mova      m3, [r0+2*r1]
+    lea       r0, [r0+4*r1]
+    psubw     m1, m2
+    psubw     m2, m3
+    pabsw     m1, m1
+    pabsw     m2, m2
+    paddw     m0, m1
+    paddw     m0, m2
+    mova      m1, m3
+    sub      r2d, 2
+    jg .loop
+.end:
+%if BIT_DEPTH == 9
+    HADDW     m0, m1
+%else
+    HADDUW    m0, m1
+%endif
+    movd     eax, xm0
+    RET
+
 ;-----------------------------------------------------------------------------
 ; void pixel_sad_xK_MxN( uint16_t *fenc, uint16_t *pix0, uint16_t *pix1,
 ;                        uint16_t *pix2, intptr_t i_stride, int scores[3] )



More information about the x264-devel mailing list