<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><div><br></div><pre><br>At 2015-02-24 14:47:05,"Divya Manivannan" <divya@multicorewareinc.com> wrote:
># HG changeset patch
># User Divya Manivannan <divya@multicorewareinc.com>
># Date 1424760406 -19800
>#      Tue Feb 24 12:16:46 2015 +0530
># Node ID a2db9c6435f95b5b02c056c6641d19808e5a41ff
># Parent  edc794a061474f75e57d94c927e6d1f866ebfb16
>asm-avx2: filter_vps[4x4]: improve 201c->156c
>
>diff -r edc794a06147 -r a2db9c6435f9 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp     Tue Feb 24 09:25:06 2015 +0530
>+++ b/source/common/x86/asm-primitives.cpp     Tue Feb 24 12:16:46 2015 +0530
>@@ -1801,6 +1801,7 @@
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_vpp = x265_interp_4tap_vert_pp_2x4_avx2;
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vpp = x265_interp_4tap_vert_pp_8x4_avx2;

>+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_vps = x265_interp_4tap_vert_ps_4x4_avx2;
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_vps = x265_interp_4tap_vert_ps_2x4_avx2;
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vps = x265_interp_4tap_vert_ps_8x4_avx2;
>         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vps = x265_interp_4tap_vert_ps_8x8_avx2;
>diff -r edc794a06147 -r a2db9c6435f9 source/common/x86/const-a.asm
>--- a/source/common/x86/const-a.asm    Tue Feb 24 09:25:06 2015 +0530
>+++ b/source/common/x86/const-a.asm    Tue Feb 24 12:16:46 2015 +0530
>@@ -74,6 +74,7 @@
> const pw_32_0,     times 4 dw 32,
>                    times 4 dw 0
> const pw_2000,     times 8 dw 0x2000
>+const sw_2000,     times 16 dw 0x2000<br>it is same as pw_2000, just number of constant<br>
> const pw_8000,     times 8 dw 0x8000
> const pw_3fff,     times 8 dw 0x3fff
> const pw_ppppmmmm, dw 1,1,1,1,-1,-1,-1,-1
>diff -r edc794a06147 -r a2db9c6435f9 source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm  Tue Feb 24 09:25:06 2015 +0530
>+++ b/source/common/x86/ipfilter8.asm  Tue Feb 24 12:16:46 2015 +0530
>@@ -245,6 +245,7 @@
> cextern pw_1
> cextern pw_512
> cextern pw_2000
>+cextern sw_2000

> %macro FILTER_H4_w2_2 3
>     movh        %2, [srcq - 1]
>@@ -2556,8 +2557,9 @@

> RET

>+%macro FILTER_VER_CHROMA_AVX2_4x4 1
> INIT_YMM avx2
>-cglobal interp_4tap_vert_pp_4x4, 4, 6, 3
>+cglobal interp_4tap_vert_%1_4x4, 4, 6, 3
>     mov             r4d, r4m
>     shl             r4d, 6
>     sub             r0, r1
>@@ -2591,6 +2593,7 @@
>     pmaddubsw       m0, [r5]
>     pmaddubsw       m1, [r5 + mmsize]
>     paddw           m0, m1                                  ; m0 = WORD ROW[3 2 1 0]
>+%ifidn %1,pp
>     pmulhrsw        m0, [pw_512]
>     vextracti128    xm1, m0, 1
>     packuswb        xm0, xm1
>@@ -2599,7 +2602,21 @@
>     pextrd          [r2 + r3], xm0, 1
>     pextrd          [r2 + r3 * 2], xm0, 2
>     pextrd          [r2 + r5], xm0, 3
>-    RET
>+%else
>+    add             r3d, r3d
>+    psubw           m0, [sw_2000]
>+    vextracti128    xm1, m0, 1
>+    lea             r5, [r3 * 3]
>+    movq            [r2], xm0
>+    movhps          [r2 + r3], xm0
>+    movq            [r2 + r3 * 2], xm1
>+    movhps          [r2 + r5], xm1
>+%endif
>+    RET
>+%endmacro
>+
>+FILTER_VER_CHROMA_AVX2_4x4 pp
>+FILTER_VER_CHROMA_AVX2_4x4 ps

> ;-----------------------------------------------------------------------------
> ; void interp_4tap_vert_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
>@@ -5359,8 +5376,7 @@
>     paddw           m0, m4
>     paddw           m0, m2                                  ; m0 = WORD ROW[3 2 1 0]

>-    vbroadcasti128  m3, [pw_2000]
>-    psubw           m0, m3
>+    psubw           m0, [sw_2000]
>     vextracti128    xm2, m0, 1
>     lea             r5, [r3 * 3]
>     movq            [r2], xm0
>_______________________________________________
>x265-devel mailing list
>x265-devel@videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
</pre></div>