[x264-devel] x86: AVX2 high bit-depth vsad
Henrik Gramner
git at videolan.org
Tue Apr 23 23:03:08 CEST 2013
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue Apr 16 23:27:43 2013 +0200| [ead88a021acd034a39769f63dc3aaf5369bb0d94] | committer: Jason Garrett-Glaser
x86: AVX2 high bit-depth vsad
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ead88a021acd034a39769f63dc3aaf5369bb0d94
---
common/pixel.c | 1 +
common/x86/pixel.h | 1 +
common/x86/sad16-a.asm | 32 ++++++++++++++++++++++++++++++++
3 files changed, 34 insertions(+)
diff --git a/common/pixel.c b/common/pixel.c
index 97d87f9..bbe43be 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -991,6 +991,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
INIT2( sad, _avx2 );
INIT2_NAME( sad_aligned, sad, _avx2 );
+ pixf->vsad = x264_pixel_vsad_avx2;
}
#endif // HAVE_MMX
#else // !HIGH_BIT_DEPTH
diff --git a/common/x86/pixel.h b/common/x86/pixel.h
index 6221dec..5618fcc 100644
--- a/common/x86/pixel.h
+++ b/common/x86/pixel.h
@@ -174,6 +174,7 @@ int x264_pixel_vsad_mmx2 ( pixel *src, intptr_t stride, int height );
int x264_pixel_vsad_sse2 ( pixel *src, intptr_t stride, int height );
int x264_pixel_vsad_ssse3( pixel *src, intptr_t stride, int height );
int x264_pixel_vsad_xop ( pixel *src, intptr_t stride, int height );
+int x264_pixel_vsad_avx2 ( uint16_t *src, intptr_t stride, int height );
int x264_pixel_asd8_sse2 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
int x264_pixel_asd8_ssse3( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
int x264_pixel_asd8_xop ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm
index 921c5ff..a3a6892 100644
--- a/common/x86/sad16-a.asm
+++ b/common/x86/sad16-a.asm
@@ -4,6 +4,7 @@
;* Copyright (C) 2010-2013 x264 project
;*
;* Authors: Oskar Arvidsson <oskar at irock.se>
+;* Henrik Gramner <henrik at gramner.com>
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
@@ -420,6 +421,37 @@ PIXEL_VSAD
INIT_XMM xop
PIXEL_VSAD
+INIT_YMM avx2
+cglobal pixel_vsad, 3,3
+ mova m0, [r0]
+ mova m1, [r0+2*r1]
+ lea r0, [r0+4*r1]
+ psubw m0, m1
+ pabsw m0, m0
+ sub r2d, 2
+ je .end
+.loop:
+ mova m2, [r0]
+ mova m3, [r0+2*r1]
+ lea r0, [r0+4*r1]
+ psubw m1, m2
+ psubw m2, m3
+ pabsw m1, m1
+ pabsw m2, m2
+ paddw m0, m1
+ paddw m0, m2
+ mova m1, m3
+ sub r2d, 2
+ jg .loop
+.end:
+%if BIT_DEPTH == 9
+ HADDW m0, m1
+%else
+ HADDUW m0, m1
+%endif
+ movd eax, xm0
+ RET
+
;-----------------------------------------------------------------------------
; void pixel_sad_xK_MxN( uint16_t *fenc, uint16_t *pix0, uint16_t *pix1,
; uint16_t *pix2, intptr_t i_stride, int scores[3] )
More information about the x264-devel
mailing list