<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><div> </div><pre><br>At 2015-03-12 14:49:50,aasaipriya@multicorewareinc.com wrote:
># HG changeset patch
># User Aasaipriya Chandran <aasaipriya@multicorewareinc.com>
># Date 1426142931 -19800
># Thu Mar 12 12:18:51 2015 +0530
># Node ID 9e8b95e8d6ba9e036f9a1c8f0e09d478149f1859
># Parent 6831885bc285c354817fc367017b54197a807200
>asm: luma_hps[64x64 , 64x48 , 64x32 , 64x16] avx2 - improved 58533c->29321c, 43517c->22631c, 31254c->16345c, 17540c->8854c
>
>diff -r 6831885bc285 -r 9e8b95e8d6ba source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Thu Mar 12 11:02:59 2015 +0530
>+++ b/source/common/x86/asm-primitives.cpp Thu Mar 12 12:18:51 2015 +0530
>@@ -1576,6 +1576,11 @@
> p.pu[LUMA_32x8].luma_hps = x265_interp_8tap_horiz_ps_32x8_avx2;
> p.pu[LUMA_32x64].luma_hps = x265_interp_8tap_horiz_ps_32x64_avx2;
>
>+ p.pu[LUMA_64x64].luma_hps = x265_interp_8tap_horiz_ps_64x64_avx2;
>+ p.pu[LUMA_64x48].luma_hps = x265_interp_8tap_horiz_ps_64x48_avx2;
>+ p.pu[LUMA_64x32].luma_hps = x265_interp_8tap_horiz_ps_64x32_avx2;
>+ p.pu[LUMA_64x16].luma_hps = x265_interp_8tap_horiz_ps_64x16_avx2;
>+
> p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_hpp = x265_interp_4tap_horiz_pp_8x8_avx2;
> p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_avx2;
> p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
>diff -r 6831885bc285 -r 9e8b95e8d6ba source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm Thu Mar 12 11:02:59 2015 +0530
>+++ b/source/common/x86/ipfilter8.asm Thu Mar 12 12:18:51 2015 +0530
>@@ -2115,6 +2115,158 @@
> IPFILTER_LUMA_PS_32xN_AVX2 32 , 24
> IPFILTER_LUMA_PS_32xN_AVX2 32 , 8
> IPFILTER_LUMA_PS_32xN_AVX2 32 , 64
>+
>+%macro IPFILTER_LUMA_PS_64xN_AVX2 2
>+INIT_YMM avx2
>+cglobal interp_8tap_horiz_ps_%1x%2, 6, 7, 8
>+%ifdef PIC
>+ lea r6, [tab_LumaCoeff]
>+ vpbroadcastq m0, [r6 + r4 * 8]
>+%else
>+ vpbroadcastq m0, [tab_LumaCoeff + r4 * 8]
>+%endif
>+ mova m6, [tab_Lm + 32]
>+ mova m1, [tab_Lm]
>+ xor r4, r4
>+ mov r4d, %2 ;height
</pre><pre>why clear before assign value?</pre><pre>other are fine</pre></div>