<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><DIV>>+;-----------------------------------------------------------------------------<BR>>+; int pixel_ssd_WxH( uint16_t *, intptr_t, uint16_t *, intptr_t )<BR>>+;-----------------------------------------------------------------------------<BR>>+<BR>>+%macro HEVC_SSD_SS 0<BR>>+cglobal pixel_ssd_ss_4x4, 4,7,6<BR>>+ pxor m0, m0<BR>can be remove</DIV>
<DIV> </DIV>
<DIV>>+ pmovsxwd m1, [r0]<BR>>+ pmovsxwd m2, [r2]<BR>>+ psubd m1, m2<BR></DIV>
<DIV>>+ pmulld m1, m1<BR>>+ paddd m0, m1<BR>use pmulld dest reg is better</DIV>
<DIV> </DIV>
<DIV>>+ lea r0, [r0 + r1*2]<BR>>+ lea r2, [r2 + r3*2]</DIV>
<DIV>code ident</DIV>
<DIV><BR>>+ pmovsxwd m1, [r0]<BR>>+ pmovsxwd m2, [r2]<BR>>+ psubd m1, m2<BR>>+ pmulld m1, m1<BR>>+ paddd m0, m1<BR>>+ lea r0, [r0 + r1*2]<BR>>+ lea r2, [r2 + r3*2]<BR>>+ pmovsxwd m1, [r0]<BR>>+ pmovsxwd m2, [r2]<BR>>+ psubd m1, m2<BR>>+ pmulld m1, m1<BR>>+ paddd m0, m1<BR>>+ lea r0, [r0 + r1*2]<BR>>+ lea r2, [r2 + r3*2]<BR>>+ pmovsxwd m1, [r0]<BR>>+ pmovsxwd m2, [r2]<BR>>+ psubd m1, m2<BR>>+ pmulld m1, m1<BR>>+ paddd m0, m1<BR>>+ phaddd m0, m0<BR>>+ phaddd m0, m0<BR>>+ movd eax, m0<BR>>+ RET<BR>>+%endmacro<BR>>+<BR>> %if HIGH_BIT_DEPTH == 0<BR>> %macro SSD_LOAD_FULL 5<BR>> mova m1, [t0+%1]<BR>>@@ -512,12 +551,17 @@<BR>> %define SSD_CORE SSD_CORE_SSE2<BR>> %define JOIN JOIN_SSE2<BR>> HEVC_SSD<BR>>+HEVC_SSD_SS<BR>> INIT_XMM ssse3<BR>> %define SSD_CORE SSD_CORE_SSSE3<BR>> %define JOIN JOIN_SSSE3<BR>> HEVC_SSD<BR>>+HEVC_SSD_SS<BR>>+INIT_XMM sse4<BR>>+HEVC_SSD_SS<BR>> INIT_XMM avx<BR>> HEVC_SSD<BR>>+HEVC_SSD_SS<BR>> INIT_MMX ssse3<BR>> SSD 4, 4<BR>> SSD 4, 8<BR>>diff -r d2173ec27a15 -r 98bcf33302ef source/common/x86/pixel.h<BR>>--- a/source/common/x86/pixel.h Thu Nov 21 20:16:39 2013 +0530<BR>>+++ b/source/common/x86/pixel.h Fri Nov 22 18:57:18 2013 +0530<BR>>@@ -59,6 +59,9 @@<BR>> #define DECL_X1(name, suffix) \<BR>> DECL_PIXELS(int, name, suffix, (pixel *, intptr_t, pixel *, intptr_t))<BR>> <BR>>+#define DECL_X1_SS(name, suffix) \<BR>>+ DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, int16_t *, intptr_t))<BR>>+<BR>> #define DECL_X4(name, suffix) \<BR>> DECL_PIXELS(void, name ## _x3, suffix, (pixel *, pixel *, pixel *, pixel *, intptr_t, int *)) \<BR>> DECL_PIXELS(void, name ## _x4, suffix, (pixel *, pixel *, pixel *, pixel *, pixel *, intptr_t, int *))<BR>>@@ -86,6 +89,15 @@<BR>> DECL_X1(ssd, avx)<BR>> DECL_X1(ssd, xop)<BR>> DECL_X1(ssd, avx2)<BR>>+DECL_X1_SS(ssd_ss, mmx)<BR>>+DECL_X1_SS(ssd_ss, mmx2)<BR>>+DECL_X1_SS(ssd_ss, sse2slow)<BR>>+DECL_X1_SS(ssd_ss, sse2)<BR>>+DECL_X1_SS(ssd_ss, ssse3)<BR>>+DECL_X1_SS(ssd_ss, sse4)<BR>>+DECL_X1_SS(ssd_ss, avx)<BR>>+DECL_X1_SS(ssd_ss, xop)<BR>>+DECL_X1_SS(ssd_ss, avx2)<BR>> DECL_X1(satd, mmx2)<BR>> DECL_X1(satd, sse2)<BR>> DECL_X1(satd, ssse3)<BR>>_______________________________________________<BR>>x265-devel mailing list<BR>>x265-devel@videolan.org<BR>>https://mailman.videolan.org/listinfo/x265-devel<BR></DIV></div>