[x265] [PATCH REVIEW Only ] chroma 4XN block, coeffIdex insted of coeff pointer
Praveen Tiwari
praveen at multicorewareinc.com
Fri Oct 11 19:59:18 CEST 2013
ohh... It will be mova coef2, [tab_coeff + coeffIdx * 16].
On Fri, Oct 11, 2013 at 11:21 PM, Praveen Tiwari <
praveen at multicorewareinc.com> wrote:
> I have just missed to change the line mova coef2,
> [tab_coeff + 16] (I was just testing for coeffIdex 1 ) I will make it for
> random like mova coef2, [tab_coeff + height * 16]. Please
> Ignore this.
>
> Regards,
> Praveen
>
>
> On Fri, Oct 11, 2013 at 10:20 PM, <praveen at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Praveen Tiwari
>> # Date 1381510220 -19800
>> # Node ID 5a9160e8b0bdc3117c2417bc29453077488efd8e
>> # Parent c6d89dc62e191f56f63dbcb1781a6494da50a70d
>> chroma 4XN block, coeffIdex insted of coeff pointer
>>
>> diff -r c6d89dc62e19 -r 5a9160e8b0bd source/common/x86/ipfilter8.asm
>> --- a/source/common/x86/ipfilter8.asm Fri Oct 11 01:47:53 2013 -0500
>> +++ b/source/common/x86/ipfilter8.asm Fri Oct 11 22:20:20 2013 +0530
>> @@ -26,107 +26,58 @@
>> %include "x86inc.asm"
>> %include "x86util.asm"
>>
>> -%if ARCH_X86_64 == 0
>> -
>> SECTION_RODATA 32
>> -tab_leftmask: db -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0
>> -
>> tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
>> - db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
>>
>> tab_c_512: times 8 dw 512
>>
>> +tab_coeff: db 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0
>> + db -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58,
>> 10, -2
>> + db -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54,
>> 16, -2
>> + db -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46,
>> 28, -4
>> + db -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4, -4, 36,
>> 36, -4
>> + db -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6, -4, 28,
>> 46, -6
>> + db -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4, -2, 16,
>> 54, -4
>> + db -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10,
>> 58, -2
>> +
>> SECTION .text
>>
>> -%macro FILTER_H4 3
>> - movu %1, [src + col - 1]
>> - pshufb %2, %1, Tm4
>> +%macro FILTER_H4_w4 3
>> + movu %1, [srcq - 1]
>> + pshufb %2, %1, Tm0
>> pmaddubsw %2, coef2
>> - pshufb %1, %1, Tm5
>> - pmaddubsw %1, coef2
>> phaddw %2, %1
>> pmulhrsw %2, %3
>> packuswb %2, %2
>> %endmacro
>>
>> +%macro FILTER_H4_w4_CALL 0
>> + FILTER_H4_w4 x0, x1, x2
>> +
>> + movd [dstq], x1
>> +
>> + add srcq, srcstrideq
>> + add dstq, dststrideq
>> +%endmacro
>> +
>>
>> ;-----------------------------------------------------------------------------
>> -; void filterHorizontal_p_p_4(pixel *src, intptr_t srcStride, pixel
>> *dst, intptr_t dstStride, int width, int height, short const *coeff)
>> +; void interp_4tap_horiz_pp_w4(pixel *src, intptr_t srcStride, pixel
>> *dst, intptr_t dstStride, int height, int coeffIdx)
>>
>> ;-----------------------------------------------------------------------------
>> INIT_XMM sse4
>> -cglobal filterHorizontal_p_p_4, 0, 7, 8
>> -%define src r0
>> -%define dst r1
>> -%define row r2
>> -%define col r3
>> -%define width r4
>> -%define widthleft r5
>> -%define mask_offset r6
>> -%define coef2 m7
>> -%define x3 m6
>> -%define Tm5 m5
>> -%define Tm4 m4
>> -%define x2 m3
>> -%define x1 m2
>> -%define x0 m1
>> -%define leftmask m0
>> -%define tmp r0
>> -%define tmp1 r1
>> -
>> - mov tmp, r6m
>> - movu coef2, [tmp]
>> - packsswb coef2, coef2
>> - pshufd coef2, coef2, 0
>> +cglobal interp_4tap_horiz_pp_w4, 6, 6, 5, src, srcstride, dst,
>> dststride, height, coeffIdx
>> +%define coef2 m4
>> +%define Tm0 m3
>> +%define x2 m2
>> +%define x1 m1
>> +%define x0 m0
>>
>> - mova x3, [tab_c_512]
>> + mova coef2, [tab_coeff + 16]
>> + mova x2, [tab_c_512]
>> + mova Tm0, [tab_Tm]
>>
>> - mov width, r4m
>> - mov widthleft, width
>> - and width, ~7
>> - and widthleft, 7
>> - mov mask_offset, widthleft
>> - neg mask_offset
>> +.loop
>> +FILTER_H4_w4_CALL
>> +dec r4d
>> +jnz .loop
>> +RET
>>
>> - movq leftmask, [tab_leftmask + (7 + mask_offset)]
>> - mova Tm4, [tab_Tm]
>> - mova Tm5, [tab_Tm + 16]
>> -
>> - mov src, r0m
>> - mov dst, r2m
>> - mov row, r5m
>> -
>> -_loop_row:
>> - xor col, col
>> -
>> -_loop_col:
>> - FILTER_H4 x0, x1, x3
>> - movh [dst + col], x1
>> -
>> - add col, 8
>> -
>> - cmp col, width
>> - jl _loop_col
>> -
>> -_end_col:
>> - test widthleft, widthleft
>> - jz _next_row
>> -
>> - movq x2, [dst + col]
>> - FILTER_H4 x0, x1, x3
>> - pblendvb x2, x2, x1, leftmask
>> - movh [dst + col], x2
>> -
>> -_next_row:
>> - add src, r1m
>> - add dst, r3m
>> - dec row
>> -
>> - test row, row
>> - jz _end_row
>> -
>> - jmp _loop_row
>> -
>> -_end_row:
>> -
>> - RET
>> -
>> -%endif ; ARCH_X86_64 == 0
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131011/50b94e1d/attachment.html>
More information about the x265-devel
mailing list