[x265] [PATCH] asm : chroma_hpp[4x2] for i420 avx2 - improved 138c->134c
aasaipriya at multicorewareinc.com
aasaipriya at multicorewareinc.com
Mon Mar 16 06:02:55 CET 2015
# HG changeset patch
# User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
# Date 1426482163 -19800
# Mon Mar 16 10:32:43 2015 +0530
# Node ID cdac951240a3f9a174733d5c26418d2414352e31
# Parent 6461985f33ac6fc5b205879bbb0f2a535226ca76
asm : chroma_hpp[4x2] for i420 avx2 - improved 138c->134c
diff -r 6461985f33ac -r cdac951240a3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp Mon Mar 16 10:32:43 2015 +0530
@@ -1581,6 +1581,8 @@
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_hpp = x265_interp_4tap_horiz_pp_16x16_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp = x265_interp_4tap_horiz_pp_4x2_avx2;
+
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hps = x265_interp_4tap_horiz_ps_32x32_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_hps = x265_interp_4tap_horiz_ps_16x16_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hps = x265_interp_4tap_horiz_ps_4x4_avx2;
diff -r 6461985f33ac -r cdac951240a3 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/x86/ipfilter8.asm Mon Mar 16 10:32:43 2015 +0530
@@ -16693,3 +16693,35 @@
movu [r2], xm3
.end
RET
+
+INIT_YMM avx2
+cglobal interp_4tap_horiz_pp_4x2, 4,6,4
+ mov r4d, r4m
+%ifdef PIC
+ lea r5, [tab_ChromaCoeff]
+ vpbroadcastd m0, [r5 + r4 * 4]
+%else
+ vpbroadcastd m0, [tab_ChromaCoeff + r4 * 4]
+%endif
+
+ vbroadcasti128 m1, [tab_Tm]
+
+ ; register map
+ ; m0 - interpolate coeff
+ ; m1 - shuffle order table
+
+ ; Row 0-1
+ movu xm2, [r0 - 1]
+ vinserti128 m2, m2, [r0 + r1 - 1], 1
+ pshufb m2, m1
+ pmaddubsw m2, m0
+ pmaddwd m2, [pw_1]
+
+ packssdw m2, m2
+ pmulhrsw m2, [pw_512]
+ vextracti128 xm3, m2, 1
+ packuswb xm2, xm3
+
+ movd [r2], xm2
+ pextrd [r2+r3], xm2, 2
+ RET
More information about the x265-devel
mailing list