[x265] [PATCH] asm: filter_vsp and filter_vss for Nx64, 32x48 in I422

Divya Manivannan divya at multicorewareinc.com
Mon Apr 27 11:54:54 CEST 2015


# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1430121437 -19800
#      Mon Apr 27 13:27:17 2015 +0530
# Node ID c5aff04e8ec9643b921205cbd603c08d1e6e1548
# Parent  4a7176bab7423d831675f0419b6470668bdbd919
asm: filter_vsp and filter_vss for Nx64, 32x48 in I422

filter_vsp[32x64, 16x64, 24x64, 8x64, 32x48]: 22410c->14840c, 11660c->7343c, 16721c->11383c, 5708c->3842c, 17755c->11026c

filter_vss[32x64, 16x64, 24x64, 8x64, 32x48]: 18358c->16895c, 9262c->8129c, 15231c->12887c, 4891c->4082c, 14188c->12137c

diff -r 4a7176bab742 -r c5aff04e8ec9 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Apr 24 16:07:42 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Mon Apr 27 13:27:17 2015 +0530
@@ -2278,6 +2278,11 @@
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vss = x265_interp_4tap_vert_ss_32x32_avx2;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_vss = x265_interp_4tap_vert_ss_8x4_avx2;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vss = x265_interp_4tap_vert_ss_32x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vss = x265_interp_4tap_vert_ss_32x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vss = x265_interp_4tap_vert_ss_16x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vss = x265_interp_4tap_vert_ss_24x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_vss = x265_interp_4tap_vert_ss_8x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vss = x265_interp_4tap_vert_ss_32x48_avx2;
 
         //i444 for chroma_vss
         p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vss = x265_interp_4tap_vert_ss_4x4_avx2;
@@ -2457,6 +2462,11 @@
         p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_vsp = x265_interp_4tap_vert_sp_8x4_avx2;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vsp = x265_interp_4tap_vert_sp_16x8_avx2;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vsp = x265_interp_4tap_vert_sp_32x16_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vsp = x265_interp_4tap_vert_sp_32x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vsp = x265_interp_4tap_vert_sp_16x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vsp = x265_interp_4tap_vert_sp_24x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_vsp = x265_interp_4tap_vert_sp_8x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vsp = x265_interp_4tap_vert_sp_32x48_avx2;
 
         //i444 for chroma_vsp
         p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vsp = x265_interp_4tap_vert_sp_4x4_avx2;
diff -r 4a7176bab742 -r c5aff04e8ec9 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Fri Apr 24 16:07:42 2015 -0500
+++ b/source/common/x86/ipfilter8.asm	Mon Apr 27 13:27:17 2015 +0530
@@ -15980,6 +15980,14 @@
     FILTER_VER_CHROMA_S_AVX2_NxN 16, 32, ss
     FILTER_VER_CHROMA_S_AVX2_NxN 24, 32, ss
     FILTER_VER_CHROMA_S_AVX2_NxN 32, 32, ss
+    FILTER_VER_CHROMA_S_AVX2_NxN 16, 64, sp
+    FILTER_VER_CHROMA_S_AVX2_NxN 24, 64, sp
+    FILTER_VER_CHROMA_S_AVX2_NxN 32, 64, sp
+    FILTER_VER_CHROMA_S_AVX2_NxN 32, 48, sp
+    FILTER_VER_CHROMA_S_AVX2_NxN 32, 48, ss
+    FILTER_VER_CHROMA_S_AVX2_NxN 16, 64, ss
+    FILTER_VER_CHROMA_S_AVX2_NxN 24, 64, ss
+    FILTER_VER_CHROMA_S_AVX2_NxN 32, 64, ss
 
 %macro PROCESS_CHROMA_S_AVX2_W8_4R 1
     movu            xm0, [r0]                       ; m0 = row 0
@@ -17160,8 +17168,10 @@
 
     FILTER_VER_CHROMA_S_AVX2_8xN sp, 16
     FILTER_VER_CHROMA_S_AVX2_8xN sp, 32
+    FILTER_VER_CHROMA_S_AVX2_8xN sp, 64
     FILTER_VER_CHROMA_S_AVX2_8xN ss, 16
     FILTER_VER_CHROMA_S_AVX2_8xN ss, 32
+    FILTER_VER_CHROMA_S_AVX2_8xN ss, 64
 
 %macro FILTER_VER_CHROMA_S_AVX2_32x24 1
 INIT_YMM avx2
diff -r 4a7176bab742 -r c5aff04e8ec9 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Fri Apr 24 16:07:42 2015 -0500
+++ b/source/common/x86/ipfilter8.h	Mon Apr 27 13:27:17 2015 +0530
@@ -729,9 +729,13 @@
 CHROMA_422_FILTERS(_sse4);
 CHROMA_422_FILTERS(_avx2);
 CHROMA_422_SP_FILTERS(_sse2);
+CHROMA_422_SP_FILTERS(_avx2);
 CHROMA_422_SP_FILTERS_SSE4(_sse4);
+CHROMA_422_SP_FILTERS_SSE4(_avx2);
 CHROMA_422_SS_FILTERS(_sse2);
+CHROMA_422_SS_FILTERS(_avx2);
 CHROMA_422_SS_FILTERS_SSE4(_sse4);
+CHROMA_422_SS_FILTERS_SSE4(_avx2);
 CHROMA_422_P2S_FILTERS_SSE4(_sse4);
 CHROMA_422_P2S_FILTERS_SSSE3(_ssse3);
 CHROMA_422_P2S_FILTERS_AVX2(_avx2);


More information about the x265-devel mailing list