[x265] [PATCH] asm: improve algorithm logic on saoCuOrgE3
Deepthi Nandakumar
deepthi at multicorewareinc.com
Wed Apr 15 12:28:13 CEST 2015
Min, pls resend. This conflicts with Divya's patch.
On Wed, Apr 15, 2015 at 11:38 AM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1429078116 -28800
> # Node ID 677ecdf2ba50e52604e73a1e92ea88ab26e950c1
> # Parent dd456de98c239b86e29bf349881854a699056240
> asm: improve algorithm logic on saoCuOrgE3
> ---
> source/common/x86/loopfilter.asm | 40
> ++++++++++++-------------------------
> 1 files changed, 13 insertions(+), 27 deletions(-)
>
> diff -r dd456de98c23 -r 677ecdf2ba50 source/common/x86/loopfilter.asm
> --- a/source/common/x86/loopfilter.asm Tue Apr 14 13:41:40 2015 +0800
> +++ b/source/common/x86/loopfilter.asm Wed Apr 15 14:08:36 2015 +0800
> @@ -456,19 +456,20 @@
> ;void saoCuOrgE3(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo,
> intptr_t stride, int startX, int endX)
>
> ;=======================================================================================================
> INIT_XMM sse4
> -cglobal saoCuOrgE3, 3, 7, 8
> +cglobal saoCuOrgE3, 3,6,8
> mov r3d, r3m
> mov r4d, r4m
> mov r5d, r5m
>
> - mov r6d, r5d
> - sub r6d, r4d
> + ; save latest 2 pixels for case startX=1 or left_endX=15
> + movh m7, [r0 + r5]
> + movhps m7, [r1 + r5 - 1]
>
> + ; move to startX+1
> inc r4d
> add r0, r4
> add r1, r4
> - movh m7, [r0 + r6 - 1]
> - mov r6, [r1 + r6 - 2]
> + sub r5d, r4d
> pxor m0, m0 ; m0 = 0
> movu m6, [pb_2] ; m6 = [2, 2, 2, 2, 2, 2,
> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
>
> @@ -508,30 +509,15 @@
> packuswb m2, m1
> movu [r0], m2
>
> - sub r5d, 16
> - jle .end
> + add r0, 16
> + add r1, 16
>
> - lea r0, [r0 + 16]
> - lea r1, [r1 + 16]
> + sub r5, 16
> + jg .loop
>
> - jnz .loop
> -
> -.end:
> - js .skip
> - sub r0, r4
> - sub r1, r4
> - movh [r0 + 16], m7
> - mov [r1 + 15], r6
> - jmp .quit
> -
> -.skip:
> - sub r0, r4
> - sub r1, r4
> - movh [r0 + 15], m7
> - mov [r1 + 14], r6
> -
> -.quit:
> -
> + ; restore last pixels (up to 2)
> + movh [r0 + r5], m7
> + movhps [r1 + r5 - 1], m7
> RET
>
>
> ;=====================================================================================
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150415/6e189fd0/attachment.html>
More information about the x265-devel
mailing list