[x265] [PATCH] asm: filter_vsp, filter_vss for 16x24 in avx2
Divya Manivannan
divya at multicorewareinc.com
Tue Apr 28 11:17:46 CEST 2015
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1430211431 -19800
# Tue Apr 28 14:27:11 2015 +0530
# Node ID b143e26c2846d917c0e911708907b0e3e85a368c
# Parent a5a8d76ef3df4c85510cce653d52f6dba9307ff2
asm: filter_vsp, filter_vss for 16x24 in avx2
filter_vsp[16x24]: 4357c->2865c
filter_vss[16x24]: 3545c->3171c
diff -r a5a8d76ef3df -r b143e26c2846 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Apr 28 13:30:29 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Apr 28 14:27:11 2015 +0530
@@ -2286,6 +2286,7 @@
p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_vss = x265_interp_4tap_vert_ss_8x12_avx2;
p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vss = x265_interp_4tap_vert_ss_6x16_avx2;
p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vss = x265_interp_4tap_vert_ss_2x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vss = x265_interp_4tap_vert_ss_16x24_avx2;
//i444 for chroma_vss
p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss = x265_interp_4tap_vert_ss_4x4_avx2;
@@ -2473,6 +2474,7 @@
p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_vsp = x265_interp_4tap_vert_sp_8x12_avx2;
p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vsp = x265_interp_4tap_vert_sp_6x16_avx2;
p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_vsp = x265_interp_4tap_vert_sp_2x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vsp = x265_interp_4tap_vert_sp_16x24_avx2;
//i444 for chroma_vsp
p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vsp = x265_interp_4tap_vert_sp_4x4_avx2;
diff -r a5a8d76ef3df -r b143e26c2846 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Tue Apr 28 13:30:29 2015 +0530
+++ b/source/common/x86/ipfilter8.asm Tue Apr 28 14:27:11 2015 +0530
@@ -17422,10 +17422,10 @@
FILTER_VER_CHROMA_S_AVX2_8xN ss, 32
FILTER_VER_CHROMA_S_AVX2_8xN ss, 64
-%macro FILTER_VER_CHROMA_S_AVX2_32x24 1
-INIT_YMM avx2
-%if ARCH_X86_64 == 1
-cglobal interp_4tap_vert_%1_32x24, 4, 10, 10
+%macro FILTER_VER_CHROMA_S_AVX2_Nx24 2
+%if ARCH_X86_64 == 1
+INIT_YMM avx2
+cglobal interp_4tap_vert_%1_%2x24, 4, 10, 10
mov r4d, r4m
shl r4d, 6
add r1d, r1d
@@ -17445,7 +17445,7 @@
add r3d, r3d
%endif
lea r6, [r3 * 3]
- mov r9d, 4
+ mov r9d, %2 / 8
.loopW:
PROCESS_CHROMA_S_AVX2_W8_16R %1
%ifidn %1,sp
@@ -17457,13 +17457,13 @@
dec r9d
jnz .loopW
%ifidn %1,sp
- lea r2, [r8 + r3 * 4 - 24]
-%else
- lea r2, [r8 + r3 * 4 - 48]
-%endif
- lea r0, [r7 - 48]
+ lea r2, [r8 + r3 * 4 - %2 + 8]
+%else
+ lea r2, [r8 + r3 * 4 - 2 * %2 + 16]
+%endif
+ lea r0, [r7 - 2 * %2 + 16]
mova m7, m9
- mov r9d, 4
+ mov r9d, %2 / 8
.loop:
PROCESS_CHROMA_S_AVX2_W8_8R %1
%ifidn %1,sp
@@ -17478,8 +17478,10 @@
%endif
%endmacro
- FILTER_VER_CHROMA_S_AVX2_32x24 sp
- FILTER_VER_CHROMA_S_AVX2_32x24 ss
+ FILTER_VER_CHROMA_S_AVX2_Nx24 sp, 32
+ FILTER_VER_CHROMA_S_AVX2_Nx24 sp, 16
+ FILTER_VER_CHROMA_S_AVX2_Nx24 ss, 32
+ FILTER_VER_CHROMA_S_AVX2_Nx24 ss, 16
%macro FILTER_VER_CHROMA_S_AVX2_2x8 1
INIT_YMM avx2
More information about the x265-devel
mailing list