[x265] [PATCH] arm: Implement planecopy_cp NEON

Deepthi Nandakumar deepthi at multicorewareinc.com
Thu Mar 10 08:00:15 CET 2016


Your patch is still mangled up. See this:
https://patches.videolan.org/patch/12602/

If your patch is otherwise ok, can you change the commit message slightly
so that it is sent as a new patch altogether?

On Wed, Mar 9, 2016 at 5:12 PM, Ramya Sriraman <ramya at multicorewareinc.com>
wrote:

> # HG changeset patch
> # User Ramya Sriraman<ramya at multicorewareinc.com>
> # Date 1456985538 -19800
> #      Thu Mar 03 11:42:18 2016 +0530
> # Node ID 3aeb208f4923e8a45cfbacbb792877af52b16970
> # Parent  6d06de58c3163c19def2c28e614492e2e74283f8
> arm: Implement planecopy_cp NEON
>
> diff -r 6d06de58c316 -r 3aeb208f4923 source/common/arm/asm-primitives.cpp
> --- a/source/common/arm/asm-primitives.cpp    Wed Mar 02 19:52:25 2016
> +0530
> +++ b/source/common/arm/asm-primitives.cpp    Thu Mar 03 11:42:18 2016
> +0530
> @@ -176,6 +176,9 @@
>          p.pu[LUMA_64x48].copy_pp = PFX(blockcopy_pp_64x48_neon);
>          p.pu[LUMA_64x64].copy_pp = PFX(blockcopy_pp_64x64_neon);
>
> +        // planecopy
> +        p.planecopy_cp = PFX(pixel_planecopy_cp_neon);
> +
>          // sad
>          p.pu[LUMA_8x4].sad    = PFX(pixel_sad_8x4_neon);
>          p.pu[LUMA_8x8].sad    = PFX(pixel_sad_8x8_neon);
> diff -r 6d06de58c316 -r 3aeb208f4923 source/common/arm/pixel-util.S
> --- a/source/common/arm/pixel-util.S    Wed Mar 02 19:52:25 2016 +0530
> +++ b/source/common/arm/pixel-util.S    Thu Mar 03 11:42:18 2016 +0530
> @@ -626,3 +626,55 @@
>      pop             {r4, r5}
>      bx              lr
>  endfunc
> +
> +function x265_pixel_planecopy_cp_neon
> +    push            {r4, r5, r6, r7}
> +    ldr             r4, [sp, #4 * 4]
> +    ldr             r5, [sp, #4 * 4 + 4]
> +    ldr             r12, [sp, #4 * 4 + 8]
> +    vdup.8          q2, r12
> +    sub             r5, #1
> +
> +.loop_h:
> +    mov             r6, r0
> +    mov             r12, r2
> +    eor             r7, r7
> +.loop_w:
> +    vld1.u8         {q0}, [r6]!
> +    vshl.u8         q0, q0, q2
> +    vst1.u8         {q0}, [r12]!
> +
> +    add             r7, #16
> +    cmp             r7, r4
>
> +    blt             .loop_w
> +
> +    add             r0, r1
> +    add             r2, r3
> +
> +    subs             r5, #1
> +    bgt             .loop_h
> +
> +// handle last row
> +    mov             r5, r4
> +    lsr             r5, #3
> +
> +.loopW8:
> +    vld1.u8         d0, [r0]!
> +    vshl.u8         d0, d0, d4
> +    vst1.u8         d0, [r2]!
> +    subs            r4, r4, #8
> +    subs            r5, #1
> +    bgt             .loopW8
> +
> +    mov             r5,#8
> +    sub             r5, r4
> +    sub             r0, r5
> +    sub             r2, r5
> +    vld1.u8         d0, [r0]
> +    vshl.u8         d0, d0, d4
> +    vst1.u8         d0, [r2]
> +
> +    pop             {r4, r5, r6, r7}
> +    bx              lr
> +endfunc
> +
> diff -r 6d06de58c316 -r 3aeb208f4923 source/common/arm/pixel.h
> --- a/source/common/arm/pixel.h    Wed Mar 02 19:52:25 2016 +0530
> +++ b/source/common/arm/pixel.h    Thu Mar 03 11:42:18 2016 +0530
> @@ -163,4 +163,6 @@
>  void x265_pixel_add_ps_16x16_neon(pixel* a, intptr_t dstride, const
> pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
>  void x265_pixel_add_ps_32x32_neon(pixel* a, intptr_t dstride, const
> pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
>  void x265_pixel_add_ps_64x64_neon(pixel* a, intptr_t dstride, const
> pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
> +
> +void x265_pixel_planecopy_cp_neon(const uint8_t* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int width, int height, int shift);
>  #endif // ifndef X265_I386_PIXEL_ARM_H
>
>
>
> Thank you
> Regards
> Ramya
>
> On Tue, Mar 8, 2016 at 3:32 PM, Deepthi Nandakumar <
> deepthi at multicorewareinc.com> wrote:
>
>> This patch does not apply.
>>
>> On Thu, Mar 3, 2016 at 12:32 PM, <ramya at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Ramya Sriraman<ramya at multicorewareinc.com>
>>> # Date 1456985538 -19800
>>> #      Thu Mar 03 11:42:18 2016 +0530
>>> # Node ID dbccf88be30776f12c7f8c52b9da67d4607abcab
>>> # Parent  9cc9920bf82be1b43efd2a3628e28a3a78ab3b2f
>>> arm: Implement planecopy_cp NEON
>>>
>>> diff -r 9cc9920bf82b -r dbccf88be307 source/common/arm/asm-primitives.cpp
>>> --- a/source/common/arm/asm-primitives.cpp      Wed Mar 02 17:26:11 2016
>>> +0530
>>> +++ b/source/common/arm/asm-primitives.cpp      Thu Mar 03 11:42:18 2016
>>> +0530
>>> @@ -142,6 +142,9 @@
>>>          p.pu[LUMA_64x48].copy_pp = PFX(blockcopy_pp_64x48_neon);
>>>          p.pu[LUMA_64x64].copy_pp = PFX(blockcopy_pp_64x64_neon);
>>>
>>> +        // planecopy
>>> +        p.planecopy_cp = PFX(pixel_planecopy_cp_neon);
>>> +
>>>          // sad
>>>          p.pu[LUMA_8x4].sad    = PFX(pixel_sad_8x4_neon);
>>>          p.pu[LUMA_8x8].sad    = PFX(pixel_sad_8x8_neon);
>>> diff -r 9cc9920bf82b -r dbccf88be307 source/common/arm/pixel-util.S
>>> --- a/source/common/arm/pixel-util.S    Wed Mar 02 17:26:11 2016 +0530
>>> +++ b/source/common/arm/pixel-util.S    Thu Mar 03 11:42:18 2016 +0530
>>> @@ -626,3 +626,56 @@
>>>      pop             {r4, r5}
>>>      bx              lr
>>>  endfunc
>>> +
>>> +function x265_pixel_planecopy_cp_neon
>>> +    push            {r4, r5, r6}
>>> +    ldr             r4, [sp, #12]
>>> +    ldr             r5, [sp, #16]
>>> +    ldr             r12, [sp, #20]
>>> +    vdup.8          q2, r12
>>> +    sub             r5, #1
>>> +
>>> +.loop_h:
>>> +    eor             r6, r6
>>> +    eor             r12, r12
>>> +.loop_w:
>>> +    add             r12, r0, r6
>>> +    vld1.u8         {q0}, [r12]
>>> +    vshl.u8         q0, q0, q2
>>> +    add             r12, r2, r6
>>> +    vst1.u8         {q0}, [r12]
>>> +
>>> +    add             r6, #16
>>> +    cmp             r6, r4
>>> +    blt             .loop_w
>>> +
>>> +    add             r0, r1
>>> +    add             r2, r3
>>> +
>>> +    subs             r5, #1
>>> +    bgt             .loop_h
>>> +
>>> +// handle last row
>>> +    mov             r5, r4
>>> +    lsr             r5, #3
>>> +
>>> +.loopW8:
>>> +    vld1.u8         d0, [r0]!
>>> +    vshl.u8         d0, d0, d4
>>> +    vst1.u8         d0, [r2]!
>>> +    subs            r4, r4, #8
>>> +    subs            r5, #1
>>> +    bgt             .loopW8
>>> +
>>> +    mov             r5,#8
>>> +    sub             r5, r4
>>> +    sub             r0, r5
>>> +    sub             r2, r5
>>> +    vld1.u8         d0, [r0]
>>> +    vshl.u8         d0, d0, d4
>>> +    vst1.u8         d0, [r2]
>>> +
>>> +    pop             {r4, r5, r6}
>>> +    bx              lr
>>> +endfunc
>>> +
>>> diff -r 9cc9920bf82b -r dbccf88be307 source/common/arm/pixel.h
>>> --- a/source/common/arm/pixel.h Wed Mar 02 17:26:11 2016 +0530
>>> +++ b/source/common/arm/pixel.h Thu Mar 03 11:42:18 2016 +0530
>>> @@ -163,4 +163,6 @@
>>>  void x265_pixel_add_ps_16x16_neon(pixel* a, intptr_t dstride, const
>>> pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
>>>  void x265_pixel_add_ps_32x32_neon(pixel* a, intptr_t dstride, const
>>> pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
>>>  void x265_pixel_add_ps_64x64_neon(pixel* a, intptr_t dstride, const
>>> pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
>>> +
>>> +void x265_pixel_planecopy_cp_neon(const uint8_t* src, intptr_t
>>> srcStride, pixel* dst, intptr_t dstStride, int width, int height, int
>>> shift);
>>>  #endif // ifndef X265_I386_PIXEL_ARM_H
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>>
>> --
>> Deepthi Nandakumar
>> Engineering Manager, x265
>> Multicoreware, Inc
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>


-- 
Deepthi Nandakumar
Engineering Manager, x265
Multicoreware, Inc
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160310/6d8da01a/attachment.html>


More information about the x265-devel mailing list