[x265] [PATCH Review Only] asm: luma_hps[64x64 , 64x48 , 64x32 , 64x16] avx2 - improved 58533c->29321c, 43517c->22631c, 31254c->16345c, 17540c->8854c

chen chenm003 at 163.com
Thu Mar 12 23:40:02 CET 2015


 

At 2015-03-12 14:49:50,aasaipriya at multicorewareinc.com wrote:
># HG changeset patch
># User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
># Date 1426142931 -19800
>#      Thu Mar 12 12:18:51 2015 +0530
># Node ID 9e8b95e8d6ba9e036f9a1c8f0e09d478149f1859
># Parent  6831885bc285c354817fc367017b54197a807200
>asm: luma_hps[64x64 , 64x48 , 64x32 , 64x16] avx2 - improved 58533c->29321c, 43517c->22631c, 31254c->16345c, 17540c->8854c
>
>diff -r 6831885bc285 -r 9e8b95e8d6ba source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp	Thu Mar 12 11:02:59 2015 +0530
>+++ b/source/common/x86/asm-primitives.cpp	Thu Mar 12 12:18:51 2015 +0530
>@@ -1576,6 +1576,11 @@
>         p.pu[LUMA_32x8].luma_hps = x265_interp_8tap_horiz_ps_32x8_avx2;
>         p.pu[LUMA_32x64].luma_hps = x265_interp_8tap_horiz_ps_32x64_avx2;
> 
>+        p.pu[LUMA_64x64].luma_hps = x265_interp_8tap_horiz_ps_64x64_avx2;
>+        p.pu[LUMA_64x48].luma_hps = x265_interp_8tap_horiz_ps_64x48_avx2;
>+        p.pu[LUMA_64x32].luma_hps = x265_interp_8tap_horiz_ps_64x32_avx2;
>+        p.pu[LUMA_64x16].luma_hps = x265_interp_8tap_horiz_ps_64x16_avx2;
>+
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_hpp = x265_interp_4tap_horiz_pp_8x8_avx2;
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_avx2;
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
>diff -r 6831885bc285 -r 9e8b95e8d6ba source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm	Thu Mar 12 11:02:59 2015 +0530
>+++ b/source/common/x86/ipfilter8.asm	Thu Mar 12 12:18:51 2015 +0530
>@@ -2115,6 +2115,158 @@
> IPFILTER_LUMA_PS_32xN_AVX2 32 , 24
> IPFILTER_LUMA_PS_32xN_AVX2 32 , 8
> IPFILTER_LUMA_PS_32xN_AVX2 32 , 64
>+
>+%macro IPFILTER_LUMA_PS_64xN_AVX2 2
>+INIT_YMM avx2
>+cglobal interp_8tap_horiz_ps_%1x%2, 6, 7, 8
>+%ifdef PIC
>+    lea                         r6,                [tab_LumaCoeff]
>+    vpbroadcastq                m0,                [r6 + r4 * 8]
>+%else
>+    vpbroadcastq                m0,                [tab_LumaCoeff + r4 * 8]
>+%endif
>+    mova                        m6,                [tab_Lm + 32]
>+    mova                        m1,                [tab_Lm]
>+    xor                         r4,                r4
>+    mov                         r4d,               %2                           ;height

why clear before assign value?
other are fine
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150313/0637c139/attachment.html>


More information about the x265-devel mailing list