<div dir="ltr">Gives a compile error for HIGH_BIT_DEPTH off, because you've defined it in the wrong setupPrimitives()?<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jun 25, 2015 at 2:20 PM,  <span dir="ltr"><<a href="mailto:rajesh@multicorewareinc.com" target="_blank">rajesh@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Rajesh Paulraj<<a href="mailto:rajesh@multicorewareinc.com">rajesh@multicorewareinc.com</a>><br>
# Date 1435220688 -19800<br>
#      Thu Jun 25 13:54:48 2015 +0530<br>
# Node ID c8d1630fc5ccb85aa7d98a198895bad31ccc33b0<br>
# Parent  26e8eff8eb5abc1c2fa5dd94f59f620c6040caf9<br>
asm: pixelavg_pp[8xN] avx2 code for 10bpp<br>
<br>
avx2:<br>
avg_pp[  8x4]  4.39x    145.09          636.75<br>
avg_pp[  8x8]  5.33x    215.27          1146.55<br>
avg_pp[ 8x16]  6.50x    336.88          2190.68<br>
avg_pp[ 8x32]  7.71x    579.86          4470.84<br>
<br>
sse2:<br>
avg_pp[  8x4]  2.31x    287.63          663.94<br>
avg_pp[  8x8]  3.26x    370.21          1205.26<br>
avg_pp[ 8x16]  3.99x    581.63          2323.25<br>
avg_pp[ 8x32]  4.78x    995.79          4755.58<br>
<br>
diff -r 26e8eff8eb5a -r c8d1630fc5cc source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp      Thu Jun 25 13:45:55 2015 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp      Thu Jun 25 13:54:48 2015 +0530<br>
@@ -1353,6 +1353,10 @@<br>
         <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].intra_pred[33]    = PFX(intra_pred_ang32_33_avx2);<br>
         <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].intra_pred[34]    = PFX(intra_pred_ang32_2_avx2);<br>
<br>
+        p.pu[LUMA_8x4].pixelavg_pp = PFX(pixel_avg_8x4_avx2);<br>
+        p.pu[LUMA_8x8].pixelavg_pp = PFX(pixel_avg_8x8_avx2);<br>
+        p.pu[LUMA_8x16].pixelavg_pp = PFX(pixel_avg_8x16_avx2);<br>
+        p.pu[LUMA_8x32].pixelavg_pp = PFX(pixel_avg_8x32_avx2);<br>
         p.pu[LUMA_16x4].pixelavg_pp = PFX(pixel_avg_16x4_avx2);<br>
         p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_16x8_avx2);<br>
         p.pu[LUMA_16x12].pixelavg_pp = PFX(pixel_avg_16x12_avx2);<br>
diff -r 26e8eff8eb5a -r c8d1630fc5cc source/common/x86/mc-a.asm<br>
--- a/source/common/x86/mc-a.asm        Thu Jun 25 13:45:55 2015 +0530<br>
+++ b/source/common/x86/mc-a.asm        Thu Jun 25 13:54:48 2015 +0530<br>
@@ -4439,6 +4439,88 @@<br>
 INIT_YMM avx2<br>
 PIXEL_AVG_W18<br>
<br>
+%macro  pixel_avg_W8 0<br>
+    movu    xm0, [r2]<br>
+    movu    xm1, [r4]<br>
+    pavgw   xm0, xm1<br>
+    movu    [r0], xm0<br>
+    movu    xm2, [r2 + r3]<br>
+    movu    xm3, [r4 + r5]<br>
+    pavgw   xm2, xm3<br>
+    movu    [r0 + r1], xm2<br>
+<br>
+    movu    xm0, [r2 + r3 * 2]<br>
+    movu    xm1, [r4 + r5 * 2]<br>
+    pavgw   xm0, xm1<br>
+    movu    [r0 + r1 * 2], xm0<br>
+    movu    xm2, [r2 + r6]<br>
+    movu    xm3, [r4 + r7]<br>
+    pavgw   xm2, xm3<br>
+    movu    [r0 + r8], xm2<br>
+<br>
+    lea     r0, [r0 + 4 * r1]<br>
+    lea     r2, [r2 + 4 * r3]<br>
+    lea     r4, [r4 + 4 * r5]<br>
+%endmacro<br>
+<br>
+;-------------------------------------------------------------------------------------------------------------------------------<br>
+;void pixelavg_pp(pixel dst, intptr_t dstride, const pixel src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int)<br>
+;-------------------------------------------------------------------------------------------------------------------------------<br>
+%if ARCH_X86_64<br>
+INIT_YMM avx2<br>
+cglobal pixel_avg_8x4, 6,10,4<br>
+    add     r1d, r1d<br>
+    add     r3d, r3d<br>
+    add     r5d, r5d<br>
+    lea     r6, [r3 * 3]<br>
+    lea     r7, [r5 * 3]<br>
+    lea     r8, [r1 * 3]<br>
+    pixel_avg_W8<br>
+    RET<br>
+<br>
+cglobal pixel_avg_8x8, 6,10,4<br>
+    add     r1d, r1d<br>
+    add     r3d, r3d<br>
+    add     r5d, r5d<br>
+    lea     r6, [r3 * 3]<br>
+    lea     r7, [r5 * 3]<br>
+    lea     r8, [r1 * 3]<br>
+    mov     r9d, 2<br>
+.loop<br>
+    pixel_avg_W8<br>
+    dec     r9d<br>
+    jnz     .loop<br>
+    RET<br>
+<br>
+cglobal pixel_avg_8x16, 6,10,4<br>
+    add     r1d, r1d<br>
+    add     r3d, r3d<br>
+    add     r5d, r5d<br>
+    lea     r6, [r3 * 3]<br>
+    lea     r7, [r5 * 3]<br>
+    lea     r8, [r1 * 3]<br>
+    mov     r9d, 4<br>
+.loop<br>
+    pixel_avg_W8<br>
+    dec     r9d<br>
+    jnz     .loop<br>
+    RET<br>
+<br>
+cglobal pixel_avg_8x32, 6,10,4<br>
+    add     r1d, r1d<br>
+    add     r3d, r3d<br>
+    add     r5d, r5d<br>
+    lea     r6, [r3 * 3]<br>
+    lea     r7, [r5 * 3]<br>
+    lea     r8, [r1 * 3]<br>
+    mov     r9d, 8<br>
+.loop<br>
+    pixel_avg_W8<br>
+    dec     r9d<br>
+    jnz     .loop<br>
+    RET<br>
+%endif<br>
+<br>
 %macro  pixel_avg_H4 0<br>
     movu    m0, [r2]<br>
     movu    m1, [r4]<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>