[x265] [PATCH] asm-avx2: filter_vpp[8x32], filter_vps[8x32]: 1028c->937c, 902c->860c
Divya Manivannan
divya at multicorewareinc.com
Thu Mar 5 11:25:06 CET 2015
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1425551071 -19800
# Thu Mar 05 15:54:31 2015 +0530
# Node ID be5f8a18fd5038212147f1446ea4f4dd309651e7
# Parent dc38f1755137e00af00e358bffa9dedc710a2916
asm-avx2: filter_vpp[8x32], filter_vps[8x32]: 1028c->937c, 902c->860c
diff -r dc38f1755137 -r be5f8a18fd50 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Mar 05 15:08:04 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Mar 05 15:54:31 2015 +0530
@@ -1644,6 +1644,7 @@
p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vpp = x265_interp_4tap_vert_pp_8x4_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_vpp = x265_interp_4tap_vert_pp_8x6_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_vpp = x265_interp_4tap_vert_pp_8x16_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_vpp = x265_interp_4tap_vert_pp_8x32_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vpp = x265_interp_4tap_vert_pp_16x8_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_vpp = x265_interp_4tap_vert_pp_16x4_avx2;
@@ -1655,6 +1656,7 @@
p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_vps = x265_interp_4tap_vert_ps_8x6_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vps = x265_interp_4tap_vert_ps_8x8_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_vps = x265_interp_4tap_vert_ps_8x16_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_vps = x265_interp_4tap_vert_ps_8x32_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vps = x265_interp_4tap_vert_ps_16x8_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_vps = x265_interp_4tap_vert_ps_16x4_avx2;
diff -r dc38f1755137 -r be5f8a18fd50 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Thu Mar 05 15:08:04 2015 +0530
+++ b/source/common/x86/ipfilter8.asm Thu Mar 05 15:54:31 2015 +0530
@@ -4499,6 +4499,38 @@
FILTER_VER_CHROMA_AVX2_8x16 pp
FILTER_VER_CHROMA_AVX2_8x16 ps
+%macro FILTER_VER_CHROMA_AVX2_8x32 1
+INIT_YMM avx2
+cglobal interp_4tap_vert_%1_8x32, 4, 7, 8
+ mov r4d, r4m
+ shl r4d, 6
+
+%ifdef PIC
+ lea r5, [tab_ChromaCoeffVer_32]
+ add r5, r4
+%else
+ lea r5, [tab_ChromaCoeffVer_32 + r4]
+%endif
+
+ lea r4, [r1 * 3]
+ sub r0, r1
+%ifidn %1,pp
+ mova m7, [pw_512]
+%else
+ add r3d, r3d
+ mova m7, [pw_2000]
+%endif
+ lea r6, [r3 * 3]
+%rep 2
+ PROCESS_CHROMA_AVX2_W8_16R %1
+ lea r2, [r2 + r3 * 4]
+%endrep
+ RET
+%endmacro
+
+FILTER_VER_CHROMA_AVX2_8x32 pp
+FILTER_VER_CHROMA_AVX2_8x32 ps
+
%macro PROCESS_CHROMA_AVX2_W8_4R 0
movq xm1, [r0] ; m1 = row 0
movq xm2, [r0 + r1] ; m2 = row 1
More information about the x265-devel
mailing list