[x265] [PATCH] asm : chroma_hpp[4x2] for i420 avx2 - improved 138c->134c

aasaipriya at multicorewareinc.com aasaipriya at multicorewareinc.com
Mon Mar 16 06:02:55 CET 2015


# HG changeset patch
# User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
# Date 1426482163 -19800
#      Mon Mar 16 10:32:43 2015 +0530
# Node ID cdac951240a3f9a174733d5c26418d2414352e31
# Parent  6461985f33ac6fc5b205879bbb0f2a535226ca76
asm : chroma_hpp[4x2] for i420 avx2 - improved 138c->134c

diff -r 6461985f33ac -r cdac951240a3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Mon Mar 16 10:32:43 2015 +0530
@@ -1581,6 +1581,8 @@
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_hpp = x265_interp_4tap_horiz_pp_16x16_avx2;
 
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp = x265_interp_4tap_horiz_pp_4x2_avx2;
+
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hps = x265_interp_4tap_horiz_ps_32x32_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_hps = x265_interp_4tap_horiz_ps_16x16_avx2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hps = x265_interp_4tap_horiz_ps_4x4_avx2;
diff -r 6461985f33ac -r cdac951240a3 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/x86/ipfilter8.asm	Mon Mar 16 10:32:43 2015 +0530
@@ -16693,3 +16693,35 @@
     movu             [r2],         xm3
 .end
    RET
+
+INIT_YMM avx2 
+cglobal interp_4tap_horiz_pp_4x2, 4,6,4
+    mov             r4d, r4m
+%ifdef PIC
+    lea               r5,           [tab_ChromaCoeff]
+    vpbroadcastd      m0,           [r5 + r4 * 4]
+%else
+    vpbroadcastd      m0,           [tab_ChromaCoeff + r4 * 4]
+%endif
+
+    vbroadcasti128    m1,           [tab_Tm]
+
+    ; register map
+    ; m0 - interpolate coeff
+    ; m1 - shuffle order table
+
+    ; Row 0-1
+    movu              xm2,          [r0 - 1]
+    vinserti128       m2,           m2,      [r0 + r1 - 1],     1
+    pshufb            m2,           m1
+    pmaddubsw         m2,           m0
+    pmaddwd           m2,           [pw_1]
+
+    packssdw          m2,           m2
+    pmulhrsw          m2,           [pw_512]
+    vextracti128      xm3,          m2,     1
+    packuswb          xm2,          xm3
+
+    movd              [r2],         xm2
+    pextrd            [r2+r3],      xm2,     2
+    RET


More information about the x265-devel mailing list