[x265] [PATCH Review Only] chroma_hpp[8x8] for colorspace i420 in avx2: improve 530c->373c
Aasaipriya Chandran
aasaipriya at multicorewareinc.com
Fri Dec 5 08:42:05 CET 2014
Hello Chen,
Thanks for your comment. Ok i will try optimizing it.
Thanks,
Aasaipriya
On Fri, Dec 5, 2014 at 5:43 AM, chen <chenm003 at 163.com> wrote:
>
>
>
> At 2014-12-04 17:27:18,aasaipriya at multicorewareinc.com wrote:
> ># HG changeset patch
> ># User Aasaipriya Chandran aasaipriya at multicorewareinc.com>
> ># Date 1417684329 -19800
> ># Thu Dec 04 14:42:09 2014 +0530
> ># Node ID 952f1013ee0ca5aaa25cb597990542fc8dfa8a52
> ># Parent ce4394109ae51c377593164d956f6e44b8f0d3df
> >chroma_hpp[8x8] for colorspace i420 in avx2: improve 530c->373c
> > <aasaipriya at multicorewareinc.com%3E%3E#%C2%A0Date%C2%A01417684329%C2%A0-19800%3E%23%C2%A0%C2%A0%C2%A0%C2%A0%C2%A0%C2%A0Thu%C2%A0Dec%C2%A004%C2%A014:42:09%C2%A02014%C2%A0+0530%3E%23%C2%A0Node%C2%A0ID%C2%A0952f1013ee0ca5aaa25cb597990542fc8dfa8a52%3E%23%C2%A0Parent%C2%A0%C2%A0ce4394109ae51c377593164d956f6e44b8f0d3df%3Echroma_hpp[8x8]%C2%A0for%C2%A0colorspace%C2%A0i420%C2%A0in%C2%A0avx2:%C2%A0improve%C2%A0530c-%3E373c%3E>diff -r ce4394109ae5 -r 952f1013ee0c source/common/x86/ipfilter8.asm
> >--- a/source/common/x86/ipfilter8.asm Wed Nov 26 11:53:40 2014 +0530
> >+++ b/source/common/x86/ipfilter8.asm Thu Dec 04 14:42:09 2014 +0530
> >@@ -1195,6 +1195,54 @@
> > jnz .loop
> > RET
> >
> >+INIT_YMM avx2
> >+cglobal interp_4tap_horiz_pp_8x8, 4,6,6
>
> 6 mm registers, where is m5?
>
> and you have more register here, so we can unroll here, unless its can't get more perf
>
>
>
> >+ mov r4d, r4m
> >+
> >+%ifdef PIC
> >+ lea r5, [tab_ChromaCoeff]
> >+ vpbroadcastd m0, [r5 + r4 * 4]
> >+%else
> >+ vpbroadcastd m0, [tab_ChromaCoeff + r4 * 4]
> >+%endif
> >+
> >+ movu m1, [tab_Tm]
> >+ vpbroadcastd m2, [pw_1]
> >+
> >+ ; register map
> >+ ; m0 - interpolate coeff
> >+ ; m1 - shuffle order table
> >+ ; m2 - constant word 1
> >+
> >+ mov r4d, 4
> >+ sub r0, 1
> >+ .loop
> >+ ; Row 0
> >+ vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> >+ pshufb m3, m1
> >+ pmaddubsw m3, m0
> >+ pmaddwd m3, m2
> >+
> >+ ; Row 1
> >+ vbroadcasti128 m4, [r0 + r1] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> >+ pshufb m4, m1
> >+ pmaddubsw m4, m0
> >+ pmaddwd m4, m2
> >+
> >+ packssdw m3, m4
> >+ pmulhrsw m3, [pw_512]
> >+ vextracti128 xm4, m3, 1
> >+ packuswb xm3, xm4
> >+ pshufd xm3, xm3, 11011000b
> >+
> >+ movq [r2], xm3
> >+ movhps [r2 + r3], xm3
> >+ lea r0, [r0 + r1 * 2]
> >+ lea r2, [r2 + r3 * 2]
> >+ dec r4d
> >+ jnz .loop
> >+ RET
> >+
> > ;--------------------------------------------------------------------------------------------------------------
> > ; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
> > ;--------------------------------------------------------------------------------------------------------------
> >diff -r ce4394109ae5 -r 952f1013ee0c source/common/x86/ipfilter8.h
> >--- a/source/common/x86/ipfilter8.h Wed Nov 26 11:53:40 2014 +0530
> >+++ b/source/common/x86/ipfilter8.h Thu Dec 04 14:42:09 2014 +0530
> >@@ -573,6 +573,7 @@
> > SETUP_CHROMA_SS_FUNC_DEF(16, 64, cpu);
> >
> > CHROMA_FILTERS(_sse4);
> >+CHROMA_FILTERS(_avx2);
> > CHROMA_SP_FILTERS(_sse2);
> > CHROMA_SP_FILTERS_SSE4(_
> sse4);
> > CHROMA_SS_FILTERS(_sse2);
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141205/22147ebd/attachment.html>
More information about the x265-devel
mailing list