[x265] [PATCH 2 of 2] Adding asm function declarations and initializations for luma vss filter functions

nabajit at multicorewareinc.com nabajit at multicorewareinc.com
Fri Nov 15 13:18:38 CET 2013


# HG changeset patch
# User Nabajit Deka
# Date 1384517887 -19800
#      Fri Nov 15 17:48:07 2013 +0530
# Node ID b72e5604ca2e496d8c1e02bbff2b92c25718dc26
# Parent  351229c80f52d580d24853f64f79e42d47617f87
Adding asm function declarations and initializations for luma vss filter functions.

diff -r 351229c80f52 -r b72e5604ca2e source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Nov 15 17:46:57 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Nov 15 17:48:07 2013 +0530
@@ -264,6 +264,9 @@
 #define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
     p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
 
+#define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \
+    p.luma_vss[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu;
+
 #define SETUP_LUMA_BLOCKCOPY_FUNC_DEF(W, H, cpu) \
     p.luma_copy_pp[LUMA_ ## W ## x ## H] = x265_blockcopy_pp_ ## W ## x ## H ## cpu;
 
@@ -321,6 +324,33 @@
     SETUP_LUMA_SP_FUNC_DEF(64, 16, cpu); \
     SETUP_LUMA_SP_FUNC_DEF(16, 64, cpu);
 
+#define LUMA_SS_FILTERS(cpu) \
+    SETUP_LUMA_SS_FUNC_DEF(4,   4, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,   8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,   4, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(4,   8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16,  8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,  16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 12, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(12, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16,  4, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(4,  16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 24, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(24, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32,  8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,  32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 64, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 64, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 48, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(48, 64, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 64, cpu);
+
 #define LUMA_BLOCKCOPY(cpu) \
     SETUP_LUMA_BLOCKCOPY_FUNC_DEF(4,   4, cpu); \
     SETUP_LUMA_BLOCKCOPY_FUNC_DEF(8,   8, cpu); \
@@ -434,6 +464,7 @@
         LUMA_BLOCKCOPY(_sse2);
 
         CHROMA_SS_FILTERS(_sse2);
+        LUMA_SS_FILTERS(_sse2);
 
         // This function pointer initialization is temporary will be removed
         // later with macro definitions.  It is used to avoid linker errors
diff -r 351229c80f52 -r b72e5604ca2e source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Fri Nov 15 17:46:57 2013 +0530
+++ b/source/common/x86/ipfilter8.h	Fri Nov 15 17:48:07 2013 +0530
@@ -119,6 +119,36 @@
     SETUP_LUMA_SP_FUNC_DEF(64, 16, cpu); \
     SETUP_LUMA_SP_FUNC_DEF(16, 64, cpu);
 
+#define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \
+    void x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
+
+#define LUMA_SS_FILTERS(cpu) \
+    SETUP_LUMA_SS_FUNC_DEF(4,   4, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,   8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,   4, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(4,   8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16,  8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,  16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 12, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(12, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16,  4, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(4,  16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 24, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(24, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32,  8, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(8,  32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 64, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 32, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(32, 64, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 48, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(48, 64, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(64, 16, cpu); \
+    SETUP_LUMA_SS_FUNC_DEF(16, 64, cpu);
+
   #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \
     void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
 
@@ -180,6 +210,7 @@
 CHROMA_SS_FILTERS(_sse2);
 LUMA_FILTERS(_sse4);
 LUMA_SP_FILTERS(_ssse3);
+LUMA_SS_FILTERS(_sse2);
 
 void x265_interp_8tap_hv_pp_8x8_ssse3(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int idxX, int idxY);
 void x265_interp_8tap_v_ss_sse2(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int width, int height, const int coefIdx);
@@ -194,10 +225,12 @@
 #undef SETUP_CHROMA_SS_FUNC_DEF
 #undef SETUP_LUMA_FUNC_DEF
 #undef SETUP_LUMA_SP_FUNC_DEF
+#undef SETUP_LUMA_SS_FUNC_DEF
 #undef CHROMA_FILTERS
 #undef CHROMA_SP_FILTERS
 #undef CHROMA_SS_FILTERS
 #undef LUMA_FILTERS
 #undef LUMA_SP_FILTERS
+#undef LUMA_SS_FILTERS
 
 #endif // ifndef X265_MC_H


More information about the x265-devel mailing list