<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><DIV>>+;-----------------------------------------------------------------------------<BR>>+; int pixel_ssd_sp_4x4( int16_t *, intptr_t, uint8_t *, intptr_t )<BR>>+;-----------------------------------------------------------------------------<BR>>+INIT_XMM sse4<BR>>+cglobal pixel_ssd_sp_4x4, 4, 6, 8, src1, stride1, src2, stride2<BR>I think you use 5 registers only </DIV>
<DIV> </DIV>
<DIV> </DIV>
<DIV>>+ pxor m7, m7<BR>>+ add r1, r1<BR>>+ lea r4, [r1 * 3]<BR>>+ call pixel_ssd_sp_4x4_internal<BR>>+ HADDD m7, m1<BR>>+ movd eax, m7<BR>>+ RET<BR>>+<BR>>+;-----------------------------------------------------------------------------<BR>>+; int pixel_ssd_sp_4x8( int16_t *, intptr_t, uint8_t *, intptr_t )<BR>>+;-----------------------------------------------------------------------------<BR>>+INIT_XMM sse4<BR>>+cglobal pixel_ssd_sp_4x8, 4, 6, 8, src1, stride1, src2, stride2<BR>>+ pxor m7, m7<BR>>+ add r1, r1<BR>>+ lea r4, [r1 * 3]<BR>>+ call pixel_ssd_sp_4x4_internal<BR>>+ lea r0, [r0 + 4 * r1]<BR>>+ lea r2, [r2 + 2 * r3]<BR>>+ call pixel_ssd_sp_4x4_internal<BR>>+ HADDD m7, m1<BR>>+ movd eax, m7<BR>>+ RET<BR>>+<BR>>+;-----------------------------------------------------------------------------<BR>>+; int pixel_ssd_sp_4x16( int16_t *, intptr_t, uint8_t *, intptr_t )<BR>>+;-----------------------------------------------------------------------------<BR>>+INIT_XMM sse4<BR>>+cglobal pixel_ssd_sp_4x16, 4, 6, 8, src1, stride1, src2, stride2<BR>>+ pxor m7, m7<BR>>+ add r1, r1<BR>>+ lea r4, [r1 * 3]<BR>>+ call pixel_ssd_sp_4x4_internal<BR>>+ lea r0, [r0 + 4 * r1]<BR>>+ lea r2, [r2 + 2 * r3]<BR>>+ call pixel_ssd_sp_4x4_internal<BR>>+ lea r0, [r0 + 4 * r1]<BR>>+ lea r2, [r2 + 2 * r3]<BR>>+ call pixel_ssd_sp_4x4_internal<BR>>+ lea r0, [r0 + 4 * r1]<BR>>+ lea r2, [r2 + 2 * r3]<BR>>+ call pixel_ssd_sp_4x4_internal<BR>>+ HADDD m7, m1<BR>>+ movd eax, m7<BR>>+ RET<BR>>+<BR>> cglobal pixel_ssd_sp_8x4_internal<BR>> movu m0, [r0]<BR>> movu m1, [r0 + r1]<BR>>diff -r 949f85337789 -r a0fbadcf1f91 source/common/x86/pixel.h<BR>>--- a/source/common/x86/pixel.h Wed Nov 27 18:10:14 2013 -0600<BR>>+++ b/source/common/x86/pixel.h Thu Nov 28 14:57:10 2013 +0530<BR>>@@ -402,6 +402,9 @@<BR>> void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<BR>> void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<BR>> <BR>>+int x265_pixel_ssd_sp_4x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);<BR>>+int x265_pixel_ssd_sp_4x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);<BR>>+int x265_pixel_ssd_sp_4x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);<BR>> int x265_pixel_ssd_sp_8x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);<BR>> int x265_pixel_ssd_sp_8x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);<BR>> int x265_pixel_ssd_sp_8x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);<BR>>_______________________________________________<BR>>x265-devel mailing list<BR>>x265-devel@videolan.org<BR>>https://mailman.videolan.org/listinfo/x265-devel<BR></DIV></div>