[x265] [PATCH Review Only] chroma_hpp[8x8] for colorspace i420 in avx2: improve 530c->373c

Aasaipriya Chandran aasaipriya at multicorewareinc.com
Fri Dec 5 08:42:05 CET 2014


Hello Chen,

Thanks for your comment. Ok i will try optimizing it.

Thanks,
Aasaipriya

On Fri, Dec 5, 2014 at 5:43 AM, chen <chenm003 at 163.com> wrote:

>
>
>
> At 2014-12-04 17:27:18,aasaipriya at multicorewareinc.com wrote:
> ># HG changeset patch
> ># User Aasaipriya Chandran aasaipriya at multicorewareinc.com>
> ># Date 1417684329 -19800
> >#      Thu Dec 04 14:42:09 2014 +0530
> ># Node ID 952f1013ee0ca5aaa25cb597990542fc8dfa8a52
> ># Parent  ce4394109ae51c377593164d956f6e44b8f0d3df
> >chroma_hpp[8x8] for colorspace i420 in avx2: improve 530c->373c
> > <aasaipriya at multicorewareinc.com%3E%3E#%C2%A0Date%C2%A01417684329%C2%A0-19800%3E%23%C2%A0%C2%A0%C2%A0%C2%A0%C2%A0%C2%A0Thu%C2%A0Dec%C2%A004%C2%A014:42:09%C2%A02014%C2%A0+0530%3E%23%C2%A0Node%C2%A0ID%C2%A0952f1013ee0ca5aaa25cb597990542fc8dfa8a52%3E%23%C2%A0Parent%C2%A0%C2%A0ce4394109ae51c377593164d956f6e44b8f0d3df%3Echroma_hpp[8x8]%C2%A0for%C2%A0colorspace%C2%A0i420%C2%A0in%C2%A0avx2:%C2%A0improve%C2%A0530c-%3E373c%3E>diff -r ce4394109ae5 -r 952f1013ee0c source/common/x86/ipfilter8.asm
> >--- a/source/common/x86/ipfilter8.asm	Wed Nov 26 11:53:40 2014 +0530
> >+++ b/source/common/x86/ipfilter8.asm	Thu Dec 04 14:42:09 2014 +0530
> >@@ -1195,6 +1195,54 @@
> >     jnz               .loop
> >     RET
> >
> >+INIT_YMM avx2
> >+cglobal interp_4tap_horiz_pp_8x8, 4,6,6
>
> 6 mm registers, where is m5?
>
> and you have more register here, so we can unroll here, unless its can't get more perf
>
>
>
> >+    mov             r4d, r4m
> >+
> >+%ifdef PIC
> >+    lea             r5, [tab_ChromaCoeff]
> >+    vpbroadcastd    m0, [r5 + r4 * 4]
> >+%else
> >+    vpbroadcastd    m0, [tab_ChromaCoeff + r4 * 4]
> >+%endif
> >+
> >+    movu            m1, [tab_Tm]
> >+    vpbroadcastd    m2, [pw_1]
> >+
> >+    ; register map
> >+    ; m0 - interpolate coeff
> >+    ; m1 - shuffle order table
> >+    ; m2 - constant word 1
> >+
> >+    mov             r4d,             4
> >+    sub             r0, 1
> >+    .loop
> >+    ; Row 0
> >+    vbroadcasti128    m3,           [r0]                        ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> >+    pshufb            m3,           m1
> >+    pmaddubsw         m3,           m0
> >+    pmaddwd           m3,           m2
> >+
> >+    ; Row 1
> >+    vbroadcasti128    m4,           [r0 + r1]                        ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> >+    pshufb            m4,           m1
> >+    pmaddubsw         m4,           m0
> >+    pmaddwd           m4,           m2
> >+
> >+    packssdw          m3,           m4
> >+    pmulhrsw          m3,           [pw_512]
> >+    vextracti128      xm4,          m3,    1
> >+    packuswb          xm3,          xm4
> >+    pshufd            xm3,          xm3,   11011000b
> >+
> >+    movq              [r2],         xm3
> >+    movhps            [r2 + r3],    xm3
> >+    lea               r0,           [r0 + r1 * 2]
> >+    lea               r2,           [r2 + r3 * 2]
> >+    dec r4d
> >+    jnz .loop
> >+    RET
> >+
> > ;--------------------------------------------------------------------------------------------------------------
> > ; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
> > ;--------------------------------------------------------------------------------------------------------------
> >diff -r ce4394109ae5 -r 952f1013ee0c source/common/x86/ipfilter8.h
> >--- a/source/common/x86/ipfilter8.h	Wed Nov 26 11:53:40 2014 +0530
> >+++ b/source/common/x86/ipfilter8.h	Thu Dec 04 14:42:09 2014 +0530
> >@@ -573,6 +573,7 @@
> >     SETUP_CHROMA_SS_FUNC_DEF(16, 64, cpu);
> >
> > CHROMA_FILTERS(_sse4);
> >+CHROMA_FILTERS(_avx2);
> > CHROMA_SP_FILTERS(_sse2);
> > CHROMA_SP_FILTERS_SSE4(_
> sse4);
> > CHROMA_SS_FILTERS(_sse2);
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141205/22147ebd/attachment.html>


More information about the x265-devel mailing list