<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><br><pre><br>At 2016-03-14 19:09:19,ramya@multicorewareinc.com wrote:
># HG changeset patch
># User Ramya Sriraman<ramya@multicorewareinc.com>
># Date 1457681682 -19800
># Fri Mar 11 13:04:42 2016 +0530
># Node ID c1d2fa2ca49d4027252bd52176ecbe2db0d0eddd
># Parent 0af38750a71aab5fe790993365aaaa3e209a7d5c
>arm: Implement interp_8tap_vert_pp_4xn_neon and interp_8tap_vert_pp_8xn_neon
>
>diff -r 0af38750a71a -r c1d2fa2ca49d source/common/arm/asm-primitives.cpp
>--- a/source/common/arm/asm-primitives.cpp Thu Mar 10 21:43:35 2016 +0530
>+++ b/source/common/arm/asm-primitives.cpp Fri Mar 11 13:04:42 2016 +0530
>@@ -290,6 +290,14 @@
> // planecopy
> p.planecopy_cp = PFX(pixel_planecopy_cp_neon);
>
>+ // interpolation filters
>+ p.pu[LUMA_4x4].luma_vpp = PFX(interp_8tap_vert_pp_4x4_neon);
>+ p.pu[LUMA_4x8].luma_vpp = PFX(interp_8tap_vert_pp_4x8_neon);
>+ p.pu[LUMA_4x16].luma_vpp = PFX(interp_8tap_vert_pp_4x16_neon);
>+ p.pu[LUMA_8x4].luma_vpp = PFX(interp_8tap_vert_pp_8x4_neon);
>+ p.pu[LUMA_8x8].luma_vpp = PFX(interp_8tap_vert_pp_8x8_neon);
>+ p.pu[LUMA_8x16].luma_vpp = PFX(interp_8tap_vert_pp_8x16_neon);
>+ p.pu[LUMA_8x32].luma_vpp = PFX(interp_8tap_vert_pp_8x32_neon);
> }
> if (cpuMask & X265_CPU_ARMV6)
> {
>diff -r 0af38750a71a -r c1d2fa2ca49d source/common/arm/ipfilter8.S
>--- a/source/common/arm/ipfilter8.S Thu Mar 10 21:43:35 2016 +0530
>+++ b/source/common/arm/ipfilter8.S Fri Mar 11 13:04:42 2016 +0530
>@@ -27,6 +27,8 @@
>
> .align 4
>
>+g_lumaFilter:
>+.word 0,0,0,0,0,0,64,64,0,0,0,0,0,0,0,0,-1,-1,4,4,-10,-10,58,58,17,17,-5,-5,1,1,0,0,-1,-1,4,4,-11,-11,40,40,40,40,-11,-11,4,4,-1,-1,0,0,1,1,-5,-5,17,17,58,58,-10,-10,4,4,-1,-1
> .text
>
> // filterPixelToShort(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride)
>@@ -692,3 +694,207 @@
> bgt .loop_filterP2S_48x64
> bx lr
> endfunc
>+
>+.macro LUMA_VPP_4xN h
>+function x265_interp_8tap_vert_pp_4x\h\()_neon
>+ push {r4, r5, r6}
>+ ldr r4, [sp, #4 * 3]
>+ mov r5, r4, lsl #6
>+ mov r4, #3
>+ mul r4, r1, r4<br>How about below:<br>mov r4, r1, lsl #2<br>sub r4, r1<br>
<br>Code are right, just performance problem, we may reference ffmpeg's hevcdsp_qpel_neon.S
</pre></div>