[x265] [PATCH] asm: interp_4tap_vert_X[i422][16xN, 32xN, 12x32, 24x64] avx2 10bit code
rajesh at multicorewareinc.com
rajesh at multicorewareinc.com
Tue Jun 9 13:38:47 CEST 2015
# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1433847184 -19800
# Tue Jun 09 16:23:04 2015 +0530
# Node ID 8f9f36c1fd4799cf31a3fe99ffcf8f83d4ee2d45
# Parent 97e1a9097a80ac3c290ca7eae3fe8ddb5b3029fd
asm: interp_4tap_vert_X[i422][16xN, 32xN, 12x32, 24x64] avx2 10bit code
diff -r 97e1a9097a80 -r 8f9f36c1fd47 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Jun 09 15:18:07 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Jun 09 16:23:04 2015 +0530
@@ -1758,6 +1758,50 @@
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_vsp = x265_interp_4tap_vert_sp_32x16_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_vsp = x265_interp_4tap_vert_sp_32x24_avx2;
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_vsp = x265_interp_4tap_vert_sp_32x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vpp = x265_interp_4tap_vert_pp_12x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vps = x265_interp_4tap_vert_ps_12x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vss = x265_interp_4tap_vert_ss_12x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vsp = x265_interp_4tap_vert_sp_12x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vpp = x265_interp_4tap_vert_pp_16x8_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vpp = x265_interp_4tap_vert_pp_16x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vpp = x265_interp_4tap_vert_pp_16x24_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vpp = x265_interp_4tap_vert_pp_16x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vpp = x265_interp_4tap_vert_pp_16x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vps = x265_interp_4tap_vert_ps_16x8_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vps = x265_interp_4tap_vert_ps_16x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vps = x265_interp_4tap_vert_ps_16x24_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vps = x265_interp_4tap_vert_ps_16x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vps = x265_interp_4tap_vert_ps_16x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vss = x265_interp_4tap_vert_ss_16x8_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vss = x265_interp_4tap_vert_ss_16x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vss = x265_interp_4tap_vert_ss_16x24_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vss = x265_interp_4tap_vert_ss_16x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vss = x265_interp_4tap_vert_ss_16x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vsp = x265_interp_4tap_vert_sp_16x8_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vsp = x265_interp_4tap_vert_sp_16x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vsp = x265_interp_4tap_vert_sp_16x24_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vsp = x265_interp_4tap_vert_sp_16x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vsp = x265_interp_4tap_vert_sp_16x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vpp = x265_interp_4tap_vert_pp_24x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vps = x265_interp_4tap_vert_ps_24x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vss = x265_interp_4tap_vert_ss_24x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vsp = x265_interp_4tap_vert_sp_24x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vpp = x265_interp_4tap_vert_pp_32x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vpp = x265_interp_4tap_vert_pp_32x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vpp = x265_interp_4tap_vert_pp_32x48_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vpp = x265_interp_4tap_vert_pp_32x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vps = x265_interp_4tap_vert_ps_32x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vps = x265_interp_4tap_vert_ps_32x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vps = x265_interp_4tap_vert_ps_32x48_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vps = x265_interp_4tap_vert_ps_32x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vss = x265_interp_4tap_vert_ss_32x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vss = x265_interp_4tap_vert_ss_32x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vss = x265_interp_4tap_vert_ss_32x48_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vss = x265_interp_4tap_vert_ss_32x64_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vsp = x265_interp_4tap_vert_sp_32x16_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vsp = x265_interp_4tap_vert_sp_32x32_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vsp = x265_interp_4tap_vert_sp_32x48_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vsp = x265_interp_4tap_vert_sp_32x64_avx2;
p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vpp = x265_interp_4tap_vert_pp_48x64_avx2;
p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vps = x265_interp_4tap_vert_ps_48x64_avx2;
p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vss = x265_interp_4tap_vert_ss_48x64_avx2;
diff -r 97e1a9097a80 -r 8f9f36c1fd47 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm Tue Jun 09 15:18:07 2015 +0530
+++ b/source/common/x86/ipfilter16.asm Tue Jun 09 16:23:04 2015 +0530
@@ -4899,26 +4899,34 @@
FILTER_VER_CHROMA_W16_16xN_avx2 4, pp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 8, pp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 12, pp, 8
+ FILTER_VER_CHROMA_W16_16xN_avx2 24, pp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 16, pp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 32, pp, 8
+ FILTER_VER_CHROMA_W16_16xN_avx2 64, pp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 4, ps, 8
FILTER_VER_CHROMA_W16_16xN_avx2 8, ps, 8
FILTER_VER_CHROMA_W16_16xN_avx2 12, ps, 8
+ FILTER_VER_CHROMA_W16_16xN_avx2 24, ps, 8
FILTER_VER_CHROMA_W16_16xN_avx2 16, ps, 8
FILTER_VER_CHROMA_W16_16xN_avx2 32, ps, 8
+ FILTER_VER_CHROMA_W16_16xN_avx2 64, ps, 8
FILTER_VER_CHROMA_W16_16xN_avx2 4, ss, 7
FILTER_VER_CHROMA_W16_16xN_avx2 8, ss, 7
FILTER_VER_CHROMA_W16_16xN_avx2 12, ss, 7
+ FILTER_VER_CHROMA_W16_16xN_avx2 24, ss, 7
FILTER_VER_CHROMA_W16_16xN_avx2 16, ss, 7
FILTER_VER_CHROMA_W16_16xN_avx2 32, ss, 7
+ FILTER_VER_CHROMA_W16_16xN_avx2 64, ss, 7
FILTER_VER_CHROMA_W16_16xN_avx2 4, sp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 8, sp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 12, sp, 8
+ FILTER_VER_CHROMA_W16_16xN_avx2 24, sp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 16, sp, 8
FILTER_VER_CHROMA_W16_16xN_avx2 32, sp, 8
+ FILTER_VER_CHROMA_W16_16xN_avx2 64, sp, 8
%macro PROCESS_CHROMA_VERT_W32_2R 0
movu m1, [r0]
@@ -5100,21 +5108,29 @@
FILTER_VER_CHROMA_W16_32xN_avx2 16, pp, 15
FILTER_VER_CHROMA_W16_32xN_avx2 24, pp, 15
FILTER_VER_CHROMA_W16_32xN_avx2 32, pp, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 48, pp, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 64, pp, 15
FILTER_VER_CHROMA_W16_32xN_avx2 8, ps, 15
FILTER_VER_CHROMA_W16_32xN_avx2 16, ps, 15
FILTER_VER_CHROMA_W16_32xN_avx2 24, ps, 15
FILTER_VER_CHROMA_W16_32xN_avx2 32, ps, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 48, ps, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 64, ps, 15
FILTER_VER_CHROMA_W16_32xN_avx2 8, ss, 15
FILTER_VER_CHROMA_W16_32xN_avx2 16, ss, 15
FILTER_VER_CHROMA_W16_32xN_avx2 24, ss, 15
FILTER_VER_CHROMA_W16_32xN_avx2 32, ss, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 48, ss, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 64, ss, 15
FILTER_VER_CHROMA_W16_32xN_avx2 8, sp, 15
FILTER_VER_CHROMA_W16_32xN_avx2 16, sp, 15
FILTER_VER_CHROMA_W16_32xN_avx2 24, sp, 15
FILTER_VER_CHROMA_W16_32xN_avx2 32, sp, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 48, sp, 15
+ FILTER_VER_CHROMA_W16_32xN_avx2 64, sp, 15
;-----------------------------------------------------------------------------------------------------------------
; void interp_4tap_vert(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
@@ -5202,6 +5218,10 @@
FILTER_VER_CHROMA_W16_12xN_avx2 16, sp, 8
FILTER_VER_CHROMA_W16_12xN_avx2 16, ps, 8
FILTER_VER_CHROMA_W16_12xN_avx2 16, pp, 8
+ FILTER_VER_CHROMA_W16_12xN_avx2 32, ss, 7
+ FILTER_VER_CHROMA_W16_12xN_avx2 32, sp, 8
+ FILTER_VER_CHROMA_W16_12xN_avx2 32, ps, 8
+ FILTER_VER_CHROMA_W16_12xN_avx2 32, pp, 8
%macro PROCESS_CHROMA_VERT_W24_2R 0
movu m1, [r0]
@@ -5383,6 +5403,10 @@
FILTER_VER_CHROMA_W16_24xN_avx2 32, sp, 15
FILTER_VER_CHROMA_W16_24xN_avx2 32, ps, 15
FILTER_VER_CHROMA_W16_24xN_avx2 32, pp, 15
+ FILTER_VER_CHROMA_W16_24xN_avx2 64, ss, 15
+ FILTER_VER_CHROMA_W16_24xN_avx2 64, sp, 15
+ FILTER_VER_CHROMA_W16_24xN_avx2 64, ps, 15
+ FILTER_VER_CHROMA_W16_24xN_avx2 64, pp, 15
;-----------------------------------------------------------------------------------------------------------------
; void interp_4tap_vert(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
More information about the x265-devel
mailing list