<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Oct 29, 2013 at 6:15 AM, <span dir="ltr"><<a href="mailto:yuvaraj@multicorewareinc.com" target="_blank">yuvaraj@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Yuvaraj Venkatesh <<a href="mailto:yuvaraj@multicorewareinc.com">yuvaraj@multicorewareinc.com</a>><br>
# Date 1383044811 -19800<br>
# Tue Oct 29 16:36:51 2013 +0530<br>
# Node ID fc35a117efd17270eb15aa56aad7cc90bb7bdd35<br>
# Parent e2f512dbd2424d099d9984c72bfc7d0729be25fe<br>
assembly code for pixel_sad_x3_32xN<br></blockquote><div><br></div><div>When you mark patches as review only, it would be helpful if you described why you believe the patch needs review or why it is unfinished.</div><div>
</div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
diff -r e2f512dbd242 -r fc35a117efd1 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Mon Oct 28 16:13:05 2013 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp Tue Oct 29 16:36:51 2013 +0530<br>
@@ -280,6 +280,11 @@<br>
p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ssse3;<br>
p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_ssse3;<br>
p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_ssse3;<br>
+ p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_ssse3;<br>
+ p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_ssse3;<br>
+ p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_ssse3;<br>
+ p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_ssse3;<br>
+ p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_ssse3;<br>
}<br>
if (cpuMask & X265_CPU_SSE4)<br>
{<br>
@@ -310,6 +315,11 @@<br>
p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx;<br>
p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_avx;<br>
p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_avx;<br>
+ p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_avx;<br>
+ p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_avx;<br>
+ p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_avx;<br>
+ p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_avx;<br>
+ p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_avx;<br>
}<br>
if (cpuMask & X265_CPU_XOP)<br>
{<br>
diff -r e2f512dbd242 -r fc35a117efd1 source/common/x86/pixel.h<br>
--- a/source/common/x86/pixel.h Mon Oct 28 16:13:05 2013 +0530<br>
+++ b/source/common/x86/pixel.h Tue Oct 29 16:36:51 2013 +0530<br>
@@ -29,6 +29,11 @@<br>
#define X265_I386_PIXEL_H<br>
<br>
#define DECL_PIXELS(ret, name, suffix, args) \<br>
+ ret x265_pixel_ ## name ## _32x64_ ## suffix args; \<br>
+ ret x265_pixel_ ## name ## _32x32_ ## suffix args; \<br>
+ ret x265_pixel_ ## name ## _32x24_ ## suffix args; \<br>
+ ret x265_pixel_ ## name ## _32x16_ ## suffix args; \<br>
+ ret x265_pixel_ ## name ## _32x8_ ## suffix args; \<br>
ret x265_pixel_ ## name ## _16x64_ ## suffix args; \<br>
ret x265_pixel_ ## name ## _16x32_ ## suffix args; \<br>
ret x265_pixel_ ## name ## _16x16_ ## suffix args; \<br>
diff -r e2f512dbd242 -r fc35a117efd1 source/common/x86/sad-a.asm<br>
--- a/source/common/x86/sad-a.asm Mon Oct 28 16:13:05 2013 +0530<br>
+++ b/source/common/x86/sad-a.asm Tue Oct 29 16:36:51 2013 +0530<br>
@@ -1007,19 +1007,30 @@<br>
; SAD x3/x4 XMM<br>
;=============================================================================<br>
<br>
-%macro SAD_X3_START_1x16P_SSE2 0<br>
- mova m2, [r0]<br>
+%macro SAD_X3_START_1x16P_SSE2 1<br>
+ mova m3, [r0 + %1]<br>
+%if %1 == 0<br>
+ pxor m0, m0<br>
+ pxor m1, m1<br>
+ pxor m2, m2<br>
+%endif<br>
%if cpuflag(avx)<br>
- psadbw m0, m2, [r1]<br>
- psadbw m1, m2, [r2]<br>
- psadbw m2, [r3]<br>
+ psadbw m4, m3, [r1 + %1]<br>
+ psadbw m5, m3, [r2 + %1]<br>
+ psadbw m3, [r3 + %1]<br>
+ paddd m0, m4<br>
+ paddd m1, m5<br>
+ paddd m2, m3<br>
%else<br>
- movu m0, [r1]<br>
- movu m1, [r2]<br>
- movu m3, [r3]<br>
- psadbw m0, m2<br>
- psadbw m1, m2<br>
- psadbw m2, m3<br>
+ movu m4, [r1 + %1]<br>
+ movu m5, [r2 + %1]<br>
+ movu m6, [r3 + %1]<br>
+ psadbw m4, m3<br>
+ psadbw m5, m3<br>
+ psadbw m6, m3<br>
+ paddd m0, m4<br>
+ paddd m1, m5<br>
+ paddd m2, m6<br>
%endif<br>
%endmacro<br>
<br>
@@ -1051,7 +1062,7 @@<br>
%macro SAD_X3_4x16P_SSE2 2<br>
%if %1==0<br>
lea t0, [r4*3]<br>
- SAD_X3_START_1x16P_SSE2<br>
+ SAD_X3_START_1x16P_SSE2 0<br>
%else<br>
SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0<br>
%endif<br>
@@ -1068,6 +1079,30 @@<br>
%endif<br>
%endmacro<br>
<br>
+%macro SAD_X3_4x32P_SSE2 2<br>
+%assign y 0<br>
+%rep 2<br>
+%if %1==0<br>
+ lea t0, [r4+r4*2]<br>
+ SAD_X3_START_1x16P_SSE2 y<br>
+%else<br>
+ SAD_X3_1x16P_SSE2 (FENC_STRIDE*(0+(%1&1)*4) + y), (r4*0 + y)<br>
+%endif<br>
+ SAD_X3_1x16P_SSE2 (FENC_STRIDE*(1+(%1&1)*4) + y), (r4*1 + y)<br>
+ SAD_X3_1x16P_SSE2 (FENC_STRIDE*(2+(%1&1)*4) + y), (r4*2 + y)<br>
+ SAD_X3_1x16P_SSE2 (FENC_STRIDE*(3+(%1&1)*4) + y), (t0 + y)<br>
+%assign y y+16<br>
+%endrep<br>
+%if %1 != %2-1<br>
+%if (%1&1) != 0<br>
+ add r0, 8*FENC_STRIDE<br>
+%endif<br>
+ lea r1, [r1+4*r4]<br>
+ lea r2, [r2+4*r4]<br>
+ lea r3, [r3+4*r4]<br>
+%endif<br>
+%endmacro<br>
+<br>
%macro SAD_X3_START_2x8P_SSE2 0<br>
movq m3, [r0]<br>
movq m0, [r1]<br>
@@ -1506,7 +1541,7 @@<br>
SAD_X%1_4x%2P_SSE2 x, %3/4<br>
%assign x x+1<br>
%endrep<br>
-%if %3 == 64<br>
+%if %3 >= 24<br>
SAD_X%1_END_SSE2 1<br>
%else<br>
SAD_X%1_END_SSE2 0<br>
@@ -1544,6 +1579,11 @@<br>
%endmacro<br>
<br>
INIT_XMM ssse3<br>
+SAD_X_SSE2 3, 32, 64, 7<br>
+SAD_X_SSE2 3, 32, 32, 7<br>
+SAD_X_SSE2 3, 32, 24, 7<br>
+SAD_X_SSE2 3, 32, 16, 7<br>
+SAD_X_SSE2 3, 32, 8, 7<br>
SAD_X_SSE2 3, 16, 64, 7<br>
SAD_X_SSE2 3, 16, 32, 7<br>
SAD_X_SSE2 3, 16, 16, 7<br>
@@ -1562,6 +1602,11 @@<br>
SAD_X_SSSE3 4, 8, 4<br>
<br>
INIT_XMM avx<br>
+SAD_X_SSE2 3, 32, 64, 7<br>
+SAD_X_SSE2 3, 32, 32, 7<br>
+SAD_X_SSE2 3, 32, 24, 7<br>
+SAD_X_SSE2 3, 32, 16, 7<br>
+SAD_X_SSE2 3, 32, 8, 7<br>
SAD_X_SSE2 3, 16, 64, 7<br>
SAD_X_SSE2 3, 16, 32, 6<br>
SAD_X_SSE2 3, 16, 16, 6<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>