<div dir="ltr">Thanks. Please re-send - this does not apply.</div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Feb 16, 2016 at 2:52 PM,  <span dir="ltr"><<a href="mailto:radhakrishnan@multicorewareinc.com" target="_blank">radhakrishnan@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User <a href="mailto:radhakrishnan@multicorewareinc.com">radhakrishnan@multicorewareinc.com</a><br>
# Date 1455599542 -19800<br>
#      Tue Feb 16 10:42:22 2016 +0530<br>
# Node ID a370f5d37fca01529c6f61d3611609e7d92e3562<br>
# Parent  425b583f25dbb57af86fc5c128548038954baf31<br>
arm: Implement pixelavg_pp_NxN_neon<br>
<br>
diff -r 425b583f25db -r a370f5d37fca source/common/arm/asm-primitives.cpp<br>
--- a/source/common/arm/asm-primitives.cpp      Thu Feb 11 13:15:03 2016 +0530<br>
+++ b/source/common/arm/asm-primitives.cpp      Tue Feb 16 10:42:22 2016 +0530<br>
@@ -42,6 +42,31 @@<br>
     if (cpuMask & X265_CPU_NEON)<br>
     {<br>
         p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon);<br>
+        p.pu[LUMA_4x4].pixelavg_pp = PFX(pixel_avg_pp_4x4_neon);<br>
+        p.pu[LUMA_4x8].pixelavg_pp = PFX(pixel_avg_pp_4x8_neon);<br>
+        p.pu[LUMA_4x16].pixelavg_pp = PFX(pixel_avg_pp_4x16_neon);<br>
+        p.pu[LUMA_8x4].pixelavg_pp = PFX(pixel_avg_pp_8x4_neon);<br>
+        p.pu[LUMA_8x8].pixelavg_pp = PFX(pixel_avg_pp_8x8_neon);<br>
+        p.pu[LUMA_8x16].pixelavg_pp = PFX(pixel_avg_pp_8x16_neon);<br>
+        p.pu[LUMA_8x32].pixelavg_pp = PFX(pixel_avg_pp_8x32_neon);<br>
+        p.pu[LUMA_12x16].pixelavg_pp = PFX(pixel_avg_pp_12x16_neon);<br>
+        p.pu[LUMA_16x4].pixelavg_pp = PFX(pixel_avg_pp_16x4_neon);<br>
+        p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_pp_16x8_neon);<br>
+        p.pu[LUMA_16x12].pixelavg_pp = PFX(pixel_avg_pp_16x12_neon);<br>
+        p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_pp_16x16_neon);<br>
+        p.pu[LUMA_16x32].pixelavg_pp = PFX(pixel_avg_pp_16x32_neon);<br>
+        p.pu[LUMA_16x64].pixelavg_pp = PFX(pixel_avg_pp_16x64_neon);<br>
+        p.pu[LUMA_24x32].pixelavg_pp = PFX(pixel_avg_pp_24x32_neon);<br>
+        p.pu[LUMA_32x8].pixelavg_pp = PFX(pixel_avg_pp_32x8_neon);<br>
+        p.pu[LUMA_32x16].pixelavg_pp = PFX(pixel_avg_pp_32x16_neon);<br>
+        p.pu[LUMA_32x24].pixelavg_pp = PFX(pixel_avg_pp_32x24_neon);<br>
+        p.pu[LUMA_32x32].pixelavg_pp = PFX(pixel_avg_pp_32x32_neon);<br>
+        p.pu[LUMA_32x64].pixelavg_pp = PFX(pixel_avg_pp_32x64_neon);<br>
+        p.pu[LUMA_48x64].pixelavg_pp = PFX(pixel_avg_pp_48x64_neon);<br>
+        p.pu[LUMA_64x16].pixelavg_pp = PFX(pixel_avg_pp_64x16_neon);<br>
+        p.pu[LUMA_64x32].pixelavg_pp = PFX(pixel_avg_pp_64x32_neon);<br>
+        p.pu[LUMA_64x48].pixelavg_pp = PFX(pixel_avg_pp_64x48_neon);<br>
+        p.pu[LUMA_64x64].pixelavg_pp = PFX(pixel_avg_pp_64x64_neon);<br>
     }<br>
     if (cpuMask & X265_CPU_ARMV6)<br>
     {<br>
diff -r 425b583f25db -r a370f5d37fca source/common/arm/mc-a.S<br>
--- a/source/common/arm/mc-a.S  Thu Feb 11 13:15:03 2016 +0530<br>
+++ b/source/common/arm/mc-a.S  Tue Feb 16 10:42:22 2016 +0530<br>
@@ -100,3 +100,234 @@<br>
     vst1.8          {q0}, [r0]<br>
     bx              lr<br>
 endfunc<br>
+<br>
+//void pixelavg_pp(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int)<br>
+.macro pixel_avg_pp_4xN_neon h<br>
+function x265_pixel_avg_pp_4x\h\()_neon<br>
+    push            {r4}<br>
+    ldr             r4, [sp, #4]<br>
+    ldr             r12, [sp, #8]<br>
+.rept \h<br>
+    vld1.32         {d0[]}, [r2], r3<br>
+    vld1.32         {d1[]}, [r4], r12<br>
+    vrhadd.u8       d2, d0, d1<br>
+    vst1.32         {d2[0]}, [r0], r1<br>
+.endr<br>
+    pop             {r4}<br>
+    bx              lr<br>
+endfunc<br>
+.endm<br>
+<br>
+pixel_avg_pp_4xN_neon 4<br>
+pixel_avg_pp_4xN_neon 8<br>
+pixel_avg_pp_4xN_neon 16<br>
+<br>
+.macro pixel_avg_pp_8xN_neon h<br>
+function x265_pixel_avg_pp_8x\h\()_neon<br>
+    push            {r4}<br>
+    ldr             r4, [sp, #4]<br>
+    ldr             r12, [sp, #8]<br>
+.rept \h<br>
+    vld1.8          {d0}, [r2], r3<br>
+    vld1.8          {d1}, [r4], r12<br>
+    vrhadd.u8       d2, d0, d1<br>
+    vst1.8          {d2}, [r0], r1<br>
+.endr<br>
+    pop             {r4}<br>
+    bx              lr<br>
+endfunc<br>
+.endm<br>
+<br>
+pixel_avg_pp_8xN_neon 4<br>
+pixel_avg_pp_8xN_neon 8<br>
+pixel_avg_pp_8xN_neon 16<br>
+pixel_avg_pp_8xN_neon 32<br>
+<br>
+function x265_pixel_avg_pp_12x16_neon<br>
+    push            {r4, r6}<br>
+    mov             r6, #8<br>
+    ldr             r4, [sp, #8]<br>
+    ldr             r12, [sp, #12]<br>
+    sub             r1, r6<br>
+    sub             r3, r6<br>
+    sub             r12, r6<br>
+.rept 16<br>
+    vld1.32         {d0}, [r2]!<br>
+    vld1.32         {d1[0]}, [r2], r3<br>
+    vld1.32         {d2}, [r4]!<br>
+    vld1.32         {d3[0]}, [r4], r12<br>
+    vrhadd.u8       d0, d0, d2<br>
+    vrhadd.u8       d1, d1, d3<br>
+    vst1.8          {d0}, [r0]!<br>
+    vst1.32         {d1[0]}, [r0], r1<br>
+.endr<br>
+    pop            {r4, r6}<br>
+    bx              lr<br>
+endfunc<br>
+<br>
+.macro pixel_avg_pp_16xN_neon h<br>
+function x265_pixel_avg_pp_16x\h\()_neon<br>
+    push            {r4}<br>
+    ldr             r4, [sp, #4]<br>
+    ldr             r12, [sp, #8]<br>
+.rept \h<br>
+    vld1.8          {q0}, [r2], r3<br>
+    vld1.8          {q1}, [r4], r12<br>
+    vrhadd.u8       q2, q0, q1<br>
+    vst1.8          {q2}, [r0], r1<br>
+.endr<br>
+    pop             {r4}<br>
+    bx              lr<br>
+endfunc<br>
+.endm<br>
+<br>
+pixel_avg_pp_16xN_neon 4<br>
+pixel_avg_pp_16xN_neon 8<br>
+pixel_avg_pp_16xN_neon 12<br>
+pixel_avg_pp_16xN_neon 16<br>
+pixel_avg_pp_16xN_neon 32<br>
+<br>
+function x265_pixel_avg_pp_16x64_neon<br>
+    push            {r4, r6}<br>
+    ldr             r4, [sp, #8]<br>
+    ldr             r12, [sp, #12]<br>
+    mov             r6, #8<br>
+lpavg_16x64:<br>
+.rept 8<br>
+    vld1.8          {q0}, [r2], r3<br>
+    vld1.8          {q1}, [r4], r12<br>
+    vrhadd.u8       q2, q0, q1<br>
+    vst1.8          {q2}, [r0], r1<br>
+.endr<br>
+    subs            r6, r6, #1<br>
+    bne             lpavg_16x64<br>
+    pop             {r4 , r6}<br>
+    bx              lr<br>
+endfunc<br>
+<br>
+function x265_pixel_avg_pp_24x32_neon<br>
+    push            {r4, r6}<br>
+    ldr             r4, [sp, #8]<br>
+    ldr             r12, [sp, #12]<br>
+    mov             r6, #4<br>
+lpavg_24x32:<br>
+.rept 8<br>
+    vld1.8          {d0, d1, d2}, [r2], r3<br>
+    vld1.8          {d3, d4, d5}, [r4], r12<br>
+    vrhadd.u8       d0, d0, d3<br>
+    vrhadd.u8       d1, d1, d4<br>
+    vrhadd.u8       d2, d2, d5<br>
+    vst1.8          {d0, d1, d2}, [r0], r1<br>
+.endr<br>
+    subs            r6, r6, #1<br>
+    bne             lpavg_24x32<br>
+    pop             {r4, r6}<br>
+    bx              lr<br>
+endfunc<br>
+<br>
+.macro pixel_avg_pp_32xN_neon h<br>
+function x265_pixel_avg_pp_32x\h\()_neon<br>
+    push            {r4}<br>
+    ldr             r4, [sp, #4]<br>
+    ldr             r12, [sp, #8]<br>
+.rept \h<br>
+    vld1.8          {q0, q1}, [r2], r3<br>
+    vld1.8          {q2, q3}, [r4], r12<br>
+    vrhadd.u8       q0, q0, q2<br>
+    vrhadd.u8       q1, q1, q3<br>
+    vst1.8          {q0, q1}, [r0], r1<br>
+.endr<br>
+    pop             {r4}<br>
+    bx              lr<br>
+endfunc<br>
+.endm<br>
+<br>
+pixel_avg_pp_32xN_neon 8<br>
+pixel_avg_pp_32xN_neon 16<br>
+pixel_avg_pp_32xN_neon 24<br>
+<br>
+.macro pixel_avg_pp_32xN1_neon h i<br>
+function x265_pixel_avg_pp_32x\h\()_neon<br>
+    push            {r4, r6}<br>
+    ldr             r4, [sp, #8]<br>
+    ldr             r12, [sp, #12]<br>
+    mov             r6, #\i<br>
+lpavg_32x\h\():<br>
+.rept 8<br>
+    vld1.8          {q0, q1}, [r2], r3<br>
+    vld1.8          {q2, q3}, [r4], r12<br>
+    vrhadd.u8       q0, q0, q2<br>
+    vrhadd.u8       q1, q1, q3<br>
+    vst1.8          {q0, q1}, [r0], r1<br>
+.endr<br>
+    subs            r6, r6, #1<br>
+    bne             lpavg_32x\h<br>
+    pop             {r4, r6}<br>
+    bx              lr<br>
+endfunc<br>
+.endm<br>
+<br>
+pixel_avg_pp_32xN1_neon 32 4<br>
+pixel_avg_pp_32xN1_neon 64 8<br>
+<br>
+function x265_pixel_avg_pp_48x64_neon<br>
+    push            {r4, r6, r7}<br>
+    ldr             r4, [sp, #12]<br>
+    ldr             r12, [sp, #16]<br>
+    mov             r6, #8<br>
+    mov             r7, #32<br>
+    sub             r1, r7<br>
+    sub             r3, r7<br>
+    sub             r12, r7<br>
+lpavg_48x64:<br>
+.rept 8<br>
+    vld1.8          {q0, q1}, [r2]!<br>
+    vld1.8          {q2}, [r2], r3<br>
+    vld1.8          {q8, q9}, [r4]!<br>
+    vld1.8          {q10}, [r4], r12<br>
+    vrhadd.u8       q0, q0, q8<br>
+    vrhadd.u8       q1, q1, q9<br>
+    vrhadd.u8       q2, q2, q10<br>
+    vst1.8          {q0, q1}, [r0]!<br>
+    vst1.8          {q2}, [r0], r1<br>
+.endr<br>
+    subs            r6, r6, #1<br>
+    bne             lpavg_48x64<br>
+    pop             {r4, r6, r7}<br>
+    bx              lr<br>
+endfunc<br>
+<br>
+.macro pixel_avg_pp_64xN_neon h i<br>
+function x265_pixel_avg_pp_64x\h\()_neon<br>
+    push            {r4, r6, r7}<br>
+    ldr             r4, [sp, #12]<br>
+    ldr             r12, [sp, #16]<br>
+    mov             r7, #32<br>
+    mov             r6, #\i<br>
+    sub             r3, r7<br>
+    sub             r12, r7<br>
+    sub             r1, r7<br>
+lpavg_64x\h\():<br>
+.rept 4<br>
+    vld1.8          {q0, q1}, [r2]!<br>
+    vld1.8          {q2, q3}, [r2], r3<br>
+    vld1.8          {q8, q9}, [r4]!<br>
+    vld1.8          {q10, q11}, [r4], r12<br>
+    vrhadd.u8       q0, q0, q8<br>
+    vrhadd.u8       q1, q1, q9<br>
+    vrhadd.u8       q2, q2, q10<br>
+    vrhadd.u8       q3, q3, q11<br>
+    vst1.8          {q0, q1}, [r0]!<br>
+    vst1.8          {q2, q3}, [r0], r1<br>
+.endr<br>
+    subs            r6, r6, #1<br>
+    bne             lpavg_64x\h<br>
+    pop             {r4, r6, r7}<br>
+    bx              lr<br>
+endfunc<br>
+.endm<br>
+<br>
+pixel_avg_pp_64xN_neon 16 4<br>
+pixel_avg_pp_64xN_neon 32 8<br>
+pixel_avg_pp_64xN_neon 48 12<br>
+pixel_avg_pp_64xN_neon 64 16<br>
diff -r 425b583f25db -r a370f5d37fca source/common/arm/pixel.h<br>
--- a/source/common/arm/pixel.h Thu Feb 11 13:15:03 2016 +0530<br>
+++ b/source/common/arm/pixel.h Tue Feb 16 10:42:22 2016 +0530<br>
@@ -31,4 +31,29 @@<br>
 #define X265_I386_PIXEL_ARM_H<br>
 int x265_pixel_sad_4x4_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);<br>
 int x265_pixel_sad_4x8_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);<br>
+void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
+void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);<br>
 #endif // ifndef X265_I386_PIXEL_ARM_H<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature"><div dir="ltr"><div><div>Deepthi Nandakumar<br></div>Engineering Manager, x265<br></div>Multicoreware, Inc<br></div></div>
</div>