<div dir="ltr">pushed</div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Jun 14, 2017 at 12:03 PM,  <span dir="ltr"><<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class=""># HG changeset patch<br>
# User Praveen Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>
</span># Date 1497422024 -19800<br>
#      Wed Jun 14 12:03:44 2017 +0530<br>
# Node ID 65e038ecbbf63a2f449ccf52358c5f<wbr>bbec408b27<br>
<span class=""># Parent  28bbc349d17035a3c1fcdfbdca3b8e<wbr>21ae6b669b<br>
avx2: integral_init4h -> added 'INTEGRAL_FOUR_HORIZONTAL_4' macro to reduce data movement for '4' element case<br>
<br>
</span>diff -r 28bbc349d170 -r 65e038ecbbf6 source/common/x86/seaintegral.<wbr>asm<br>
<span class="">--- a/source/common/x86/<wbr>seaintegral.asm Wed Jun 07 17:06:57 2017 +0530<br>
</span>+++ b/source/common/x86/<wbr>seaintegral.asm Wed Jun 14 12:03:44 2017 +0530<br>
<span class="">@@ -148,11 +148,6 @@<br>
     jnz     .loop<br>
     RET<br>
<br>
-;----------------------------<wbr>------------------------------<wbr>-------------------<br>
-;static void integral_init4h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
-;----------------------------<wbr>------------------------------<wbr>-------------------<br>
-INIT_YMM avx2<br>
-<br>
 %macro INTEGRAL_FOUR_HORIZONTAL_16 0<br>
     pmovzxbw       m0, [r1]<br>
     pmovzxbw       m1, [r1 + 1]<br>
</span>@@ -163,6 +158,24 @@<br>
<span class="">     paddw          m0, m1<br>
 %endmacro<br>
<br>
+%macro INTEGRAL_FOUR_HORIZONTAL_4 0<br>
+    movd       xm0, [r1]<br>
+    movd       xm1, [r1 + 1]<br>
+    pmovzxbw   xm0, xm0<br>
+    pmovzxbw   xm1, xm1<br>
+    paddw      xm0, xm1<br>
+    movd       xm1, [r1 + 2]<br>
+    pmovzxbw   xm1, xm1<br>
+    paddw      xm0, xm1<br>
+    movd       xm1, [r1 + 3]<br>
+    pmovzxbw   xm1, xm1<br>
+    paddw      xm0, xm1<br>
</span><span class="">+%endmacro<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;static void integral_init4h(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
 cglobal integral4h, 3, 5, 3<br>
     lea            r3, [4 * r2]<br>
     sub            r0, r3<br>
</span>@@ -205,7 +218,7 @@<br>
<div class="HOEnZb"><div class="h5">     jmp             .end<br>
<br>
 .loop_4:<br>
-    INTEGRAL_FOUR_HORIZONTAL_16<br>
+    INTEGRAL_FOUR_HORIZONTAL_4<br>
     pmovzxwd       xm0, xm0<br>
     movu           xm1, [r0]<br>
     paddd          xm0, xm1<br>
______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
</div></div></blockquote></div><br></div>