[x265] [PATCH] asm: improve algorithm logic on saoCuOrgE3
Deepthi Nandakumar
deepthi at multicorewareinc.com
Wed Apr 15 12:31:31 CEST 2015
Sorry, realised Steve had already pushed this.
On Wed, Apr 15, 2015 at 3:58 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:
> Min, pls resend. This conflicts with Divya's patch.
>
> On Wed, Apr 15, 2015 at 11:38 AM, Min Chen <chenm003 at 163.com> wrote:
>
>> # HG changeset patch
>> # User Min Chen <chenm003 at 163.com>
>> # Date 1429078116 -28800
>> # Node ID 677ecdf2ba50e52604e73a1e92ea88ab26e950c1
>> # Parent dd456de98c239b86e29bf349881854a699056240
>> asm: improve algorithm logic on saoCuOrgE3
>> ---
>> source/common/x86/loopfilter.asm | 40
>> ++++++++++++-------------------------
>> 1 files changed, 13 insertions(+), 27 deletions(-)
>>
>> diff -r dd456de98c23 -r 677ecdf2ba50 source/common/x86/loopfilter.asm
>> --- a/source/common/x86/loopfilter.asm Tue Apr 14 13:41:40 2015 +0800
>> +++ b/source/common/x86/loopfilter.asm Wed Apr 15 14:08:36 2015 +0800
>> @@ -456,19 +456,20 @@
>> ;void saoCuOrgE3(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo,
>> intptr_t stride, int startX, int endX)
>>
>> ;=======================================================================================================
>> INIT_XMM sse4
>> -cglobal saoCuOrgE3, 3, 7, 8
>> +cglobal saoCuOrgE3, 3,6,8
>> mov r3d, r3m
>> mov r4d, r4m
>> mov r5d, r5m
>>
>> - mov r6d, r5d
>> - sub r6d, r4d
>> + ; save latest 2 pixels for case startX=1 or left_endX=15
>> + movh m7, [r0 + r5]
>> + movhps m7, [r1 + r5 - 1]
>>
>> + ; move to startX+1
>> inc r4d
>> add r0, r4
>> add r1, r4
>> - movh m7, [r0 + r6 - 1]
>> - mov r6, [r1 + r6 - 2]
>> + sub r5d, r4d
>> pxor m0, m0 ; m0 = 0
>> movu m6, [pb_2] ; m6 = [2, 2, 2, 2, 2,
>> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
>>
>> @@ -508,30 +509,15 @@
>> packuswb m2, m1
>> movu [r0], m2
>>
>> - sub r5d, 16
>> - jle .end
>> + add r0, 16
>> + add r1, 16
>>
>> - lea r0, [r0 + 16]
>> - lea r1, [r1 + 16]
>> + sub r5, 16
>> + jg .loop
>>
>> - jnz .loop
>> -
>> -.end:
>> - js .skip
>> - sub r0, r4
>> - sub r1, r4
>> - movh [r0 + 16], m7
>> - mov [r1 + 15], r6
>> - jmp .quit
>> -
>> -.skip:
>> - sub r0, r4
>> - sub r1, r4
>> - movh [r0 + 15], m7
>> - mov [r1 + 14], r6
>> -
>> -.quit:
>> -
>> + ; restore last pixels (up to 2)
>> + movh [r0 + r5], m7
>> + movhps [r1 + r5 - 1], m7
>> RET
>>
>>
>> ;=====================================================================================
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150415/f352041c/attachment.html>
More information about the x265-devel
mailing list