[x265] [PATCH 5 of 5] asm: filter_hpp[2x4] in avx2: 185c->161c

Divya Manivannan divya at multicorewareinc.com
Thu Mar 19 06:13:57 CET 2015


# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1426740596 -19800
#      Thu Mar 19 10:19:56 2015 +0530
# Node ID 46bcd60c8a1527f09c69c5a97664d46f7517e9e7
# Parent  ad53f152fce599c1801304a0fd1ed0c5992f834f
asm: filter_hpp[2x4] in avx2: 185c->161c

diff -r ad53f152fce5 -r 46bcd60c8a15 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Mar 19 10:14:57 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Mar 19 10:19:56 2015 +0530
@@ -1604,6 +1604,8 @@
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_hpp = x265_interp_4tap_horiz_pp_16x16_avx2;
 
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_hpp = x265_interp_4tap_horiz_pp_2x4_avx2;
+
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp = x265_interp_4tap_horiz_pp_4x2_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_hpp = x265_interp_4tap_horiz_pp_4x8_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].filter_hpp = x265_interp_4tap_horiz_pp_4x16_avx2;
diff -r ad53f152fce5 -r 46bcd60c8a15 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Thu Mar 19 10:14:57 2015 +0530
+++ b/source/common/x86/ipfilter8.asm	Thu Mar 19 10:19:56 2015 +0530
@@ -285,6 +285,8 @@
 interp4_horiz_shuf1:    db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
                         db 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14
 
+ALIGN 32
+interp4_hpp_shuf: times 2 db 0, 1, 2, 3, 1, 2, 3, 4, 8, 9, 10, 11, 9, 10, 11, 12
 
 ALIGN 32
 interp8_hps_shuf: dd 0, 4, 1, 5, 2, 6, 3, 7
@@ -1561,6 +1563,39 @@
     pextrd            [r2+r0],      xm3,     3
     RET
 
+INIT_YMM avx2 
+cglobal interp_4tap_horiz_pp_2x4, 4, 6, 3
+    mov               r4d,           r4m
+
+%ifdef PIC
+    lea               r5,            [tab_ChromaCoeff]
+    vpbroadcastd      m0,            [r5 + r4 * 4]
+%else
+    vpbroadcastd      m0,            [tab_ChromaCoeff + r4 * 4]
+%endif
+
+    dec               r0
+    lea               r4,            [r1 * 3]
+    movq              xm1,           [r0]
+    movhps            xm1,           [r0 + r1]
+    movq              xm2,           [r0 + r1 * 2]
+    movhps            xm2,           [r0 + r4]
+    vinserti128       m1,            m1,          xm2,          1
+    pshufb            m1,            [interp4_hpp_shuf]
+    pmaddubsw         m1,            m0
+    pmaddwd           m1,            [pw_1]
+    vextracti128      xm2,           m1,          1
+    packssdw          xm1,           xm2
+    pmulhrsw          xm1,           [pw_512]
+    packuswb          xm1,           xm1
+
+    lea               r4,            [r3 * 3]
+    pextrw            [r2],          xm1,         0
+    pextrw            [r2 + r3],     xm1,         1
+    pextrw            [r2 + r3 * 2], xm1,         2
+    pextrw            [r2 + r4],     xm1,         3
+    RET
+
 INIT_YMM avx2
 cglobal interp_4tap_horiz_pp_32x32, 4,6,7
     mov             r4d, r4m


More information about the x265-devel mailing list