[x265] [PATCH REVIEW Only ] chroma 4XN block, coeffIdex insted of coeff pointer
Praveen Tiwari
praveen at multicorewareinc.com
Fri Oct 11 19:51:57 CEST 2013
I have just missed to change the line mova coef2, [tab_coeff
+ 16] (I was just testing for coeffIdex 1 ) I will make it for random
like mova
coef2, [tab_coeff + height * 16]. Please Ignore this.
Regards,
Praveen
On Fri, Oct 11, 2013 at 10:20 PM, <praveen at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1381510220 -19800
> # Node ID 5a9160e8b0bdc3117c2417bc29453077488efd8e
> # Parent c6d89dc62e191f56f63dbcb1781a6494da50a70d
> chroma 4XN block, coeffIdex insted of coeff pointer
>
> diff -r c6d89dc62e19 -r 5a9160e8b0bd source/common/x86/ipfilter8.asm
> --- a/source/common/x86/ipfilter8.asm Fri Oct 11 01:47:53 2013 -0500
> +++ b/source/common/x86/ipfilter8.asm Fri Oct 11 22:20:20 2013 +0530
> @@ -26,107 +26,58 @@
> %include "x86inc.asm"
> %include "x86util.asm"
>
> -%if ARCH_X86_64 == 0
> -
> SECTION_RODATA 32
> -tab_leftmask: db -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0
> -
> tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
> - db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
>
> tab_c_512: times 8 dw 512
>
> +tab_coeff: db 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0
> + db -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58,
> 10, -2
> + db -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54,
> 16, -2
> + db -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46,
> 28, -4
> + db -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4, -4, 36,
> 36, -4
> + db -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6, -4, 28,
> 46, -6
> + db -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4, -2, 16,
> 54, -4
> + db -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10,
> 58, -2
> +
> SECTION .text
>
> -%macro FILTER_H4 3
> - movu %1, [src + col - 1]
> - pshufb %2, %1, Tm4
> +%macro FILTER_H4_w4 3
> + movu %1, [srcq - 1]
> + pshufb %2, %1, Tm0
> pmaddubsw %2, coef2
> - pshufb %1, %1, Tm5
> - pmaddubsw %1, coef2
> phaddw %2, %1
> pmulhrsw %2, %3
> packuswb %2, %2
> %endmacro
>
> +%macro FILTER_H4_w4_CALL 0
> + FILTER_H4_w4 x0, x1, x2
> +
> + movd [dstq], x1
> +
> + add srcq, srcstrideq
> + add dstq, dststrideq
> +%endmacro
> +
>
> ;-----------------------------------------------------------------------------
> -; void filterHorizontal_p_p_4(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> +; void interp_4tap_horiz_pp_w4(pixel *src, intptr_t srcStride, pixel
> *dst, intptr_t dstStride, int height, int coeffIdx)
>
> ;-----------------------------------------------------------------------------
> INIT_XMM sse4
> -cglobal filterHorizontal_p_p_4, 0, 7, 8
> -%define src r0
> -%define dst r1
> -%define row r2
> -%define col r3
> -%define width r4
> -%define widthleft r5
> -%define mask_offset r6
> -%define coef2 m7
> -%define x3 m6
> -%define Tm5 m5
> -%define Tm4 m4
> -%define x2 m3
> -%define x1 m2
> -%define x0 m1
> -%define leftmask m0
> -%define tmp r0
> -%define tmp1 r1
> -
> - mov tmp, r6m
> - movu coef2, [tmp]
> - packsswb coef2, coef2
> - pshufd coef2, coef2, 0
> +cglobal interp_4tap_horiz_pp_w4, 6, 6, 5, src, srcstride, dst, dststride,
> height, coeffIdx
> +%define coef2 m4
> +%define Tm0 m3
> +%define x2 m2
> +%define x1 m1
> +%define x0 m0
>
> - mova x3, [tab_c_512]
> + mova coef2, [tab_coeff + 16]
> + mova x2, [tab_c_512]
> + mova Tm0, [tab_Tm]
>
> - mov width, r4m
> - mov widthleft, width
> - and width, ~7
> - and widthleft, 7
> - mov mask_offset, widthleft
> - neg mask_offset
> +.loop
> +FILTER_H4_w4_CALL
> +dec r4d
> +jnz .loop
> +RET
>
> - movq leftmask, [tab_leftmask + (7 + mask_offset)]
> - mova Tm4, [tab_Tm]
> - mova Tm5, [tab_Tm + 16]
> -
> - mov src, r0m
> - mov dst, r2m
> - mov row, r5m
> -
> -_loop_row:
> - xor col, col
> -
> -_loop_col:
> - FILTER_H4 x0, x1, x3
> - movh [dst + col], x1
> -
> - add col, 8
> -
> - cmp col, width
> - jl _loop_col
> -
> -_end_col:
> - test widthleft, widthleft
> - jz _next_row
> -
> - movq x2, [dst + col]
> - FILTER_H4 x0, x1, x3
> - pblendvb x2, x2, x1, leftmask
> - movh [dst + col], x2
> -
> -_next_row:
> - add src, r1m
> - add dst, r3m
> - dec row
> -
> - test row, row
> - jz _end_row
> -
> - jmp _loop_row
> -
> -_end_row:
> -
> - RET
> -
> -%endif ; ARCH_X86_64 == 0
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131011/a0815794/attachment-0001.html>
More information about the x265-devel
mailing list