[x265] [PATCH] asm: fix for illegal instruction usage in ipfilter

murugan at multicorewareinc.com murugan at multicorewareinc.com
Mon Feb 17 08:43:51 CET 2014


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1392623018 -19800
#      Mon Feb 17 13:13:38 2014 +0530
# Node ID 314cd7999ba9740c25fad0df91988979650bece6
# Parent  ce96cdb390fe26aee6effa731e51303c1d9056b0
asm:  fix for illegal instruction usage in ipfilter

diff -r ce96cdb390fe -r 314cd7999ba9 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp	Mon Feb 17 13:13:38 2014 +0530
@@ -292,17 +292,12 @@
     SETUP_CHROMA_FUNC_DEF_444(32, 8, cpu); \
     SETUP_CHROMA_FUNC_DEF_444(8, 32, cpu);
 
-#define CHROMA_SP_FILTERS_420(cpu) \
+#define CHROMA_SP_FILTERS_SSE4_420(cpu) \
     SETUP_CHROMA_SP_FUNC_DEF_420(4, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(4, 2, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_420(8, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_420(8, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(4, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_420(8, 6, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_420(8, 2, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(16, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(16, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_420(8, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(16, 12, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(12, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(16, 4, cpu); \
@@ -312,17 +307,21 @@
     SETUP_CHROMA_SP_FUNC_DEF_420(16, 32, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(32, 24, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(24, 32, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_420(32, 8, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_420(32, 8, cpu);
+
+#define CHROMA_SP_FILTERS_420(cpu) \
+    SETUP_CHROMA_SP_FUNC_DEF_420(8, 2, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_420(8, 4, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_420(8, 6, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_420(8, 8, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_420(8, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_420(8, 32, cpu);
 
-#define CHROMA_SP_FILTERS_444(cpu) \
+#define CHROMA_SP_FILTERS_SSE4_444(cpu) \
     SETUP_CHROMA_SP_FUNC_DEF_444(4, 4, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_444(8, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_444(8, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(4, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(16, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(16, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_444(8, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(16, 12, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(12, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(16, 4, cpu); \
@@ -332,20 +331,22 @@
     SETUP_CHROMA_SP_FUNC_DEF_444(16, 32, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(32, 24, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(24, 32, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF_444(32, 8, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_444(32, 8, cpu);
+
+#define CHROMA_SP_FILTERS_444(cpu) \
+    SETUP_CHROMA_SP_FUNC_DEF_444(8, 8, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_444(8, 4, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF_444(8, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF_444(8, 32, cpu);
 
 #define CHROMA_SS_FILTERS_420(cpu) \
     SETUP_CHROMA_SS_FUNC_DEF_420(4, 4, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(4, 2, cpu); \
-    SETUP_CHROMA_SS_FUNC_DEF_420(2, 4, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(8, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(8, 4, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(4, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(8, 6, cpu); \
-    SETUP_CHROMA_SS_FUNC_DEF_420(6, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(8, 2, cpu); \
-    SETUP_CHROMA_SS_FUNC_DEF_420(2, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(16, 16, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(16, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(8, 16, cpu); \
@@ -361,6 +362,12 @@
     SETUP_CHROMA_SS_FUNC_DEF_420(32, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_420(8, 32, cpu);
 
+#define CHROMA_SS_FILTERS_SSE4_420(cpu) \
+    SETUP_CHROMA_SS_FUNC_DEF_420(2, 4, cpu); \
+    SETUP_CHROMA_SS_FUNC_DEF_420(2, 8, cpu); \
+    SETUP_CHROMA_SS_FUNC_DEF_420(6, 8, cpu);
+
+
 #define CHROMA_SS_FILTERS_444(cpu) \
     SETUP_CHROMA_SS_FUNC_DEF_444(4, 4, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF_444(8, 8, cpu); \
@@ -1000,6 +1007,8 @@
 
         CHROMA_SS_FILTERS_420(_sse2);
         CHROMA_SS_FILTERS_444(_sse2);
+        CHROMA_SP_FILTERS_420(_sse2);
+        CHROMA_SP_FILTERS_444(_sse2);
         LUMA_SS_FILTERS(_sse2);
 
         // This function pointer initialization is temporary will be removed
@@ -1122,10 +1131,6 @@
         p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_ssse3;
         p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
 
-        CHROMA_SP_FILTERS_420(_ssse3);
-        CHROMA_SP_FILTERS_444(_ssse3);
-        LUMA_SP_FILTERS(_ssse3);
-
         p.dct[DST_4x4] = x265_dst4_ssse3;
     }
     if (cpuMask & X265_CPU_SSE4)
@@ -1152,6 +1157,10 @@
 
         CHROMA_FILTERS_420(_sse4);
         CHROMA_FILTERS_444(_sse4);
+        CHROMA_SS_FILTERS_SSE4_420(_sse4);
+        CHROMA_SP_FILTERS_SSE4_444(_sse4);
+        CHROMA_SP_FILTERS_SSE4_420(_sse4);
+        LUMA_SP_FILTERS(_sse4);
         LUMA_FILTERS(_sse4);
         ASSGN_SSE_SS(sse4);
         p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
diff -r ce96cdb390fe -r 314cd7999ba9 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/ipfilter8.asm	Mon Feb 17 13:13:38 2014 +0530
@@ -3531,7 +3531,7 @@
 ; void interp_8tap_vert_sp_%1x%2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 ;--------------------------------------------------------------------------------------------------------------
 %macro FILTER_VER_LUMA_SP 2
-INIT_XMM ssse3
+INIT_XMM sse4
 cglobal interp_8tap_vert_sp_%1x%2, 5, 7, 8 ,0-1
 
     add       r1d, r1d
@@ -3719,7 +3719,7 @@
 ; void interp_4tap_vert_sp_%1x%2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 ;--------------------------------------------------------------------------------------------------------------
 %macro FILTER_VER_CHROMA_SP 2
-INIT_XMM ssse3
+INIT_XMM sse4
 cglobal interp_4tap_vert_sp_%1x%2, 5, 7, 7 ,0-1
 
     add       r1d, r1d
@@ -3882,7 +3882,7 @@
 ;--------------------------------------------------------------------------------------------------------------
 ; void interp_4tap_vert_sp_4x2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 ;--------------------------------------------------------------------------------------------------------------
-INIT_XMM ssse3
+INIT_XMM sse4
 cglobal interp_4tap_vert_sp_4x2, 5, 6, 5
 
     add        r1d, r1d
@@ -4040,7 +4040,7 @@
 ; void interp_4tap_vert_sp_8x%2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 ;--------------------------------------------------------------------------------------------------------------
 %macro FILTER_VER_CHROMA_SP_W8_H2 2
-INIT_XMM ssse3
+INIT_XMM sse2
 cglobal interp_4tap_vert_sp_%1x%2, 5, 6, 8
 
     add       r1d, r1d
@@ -4711,7 +4711,7 @@
 ; void interp_4tap_vertical_ss_%1x%2(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
 ;---------------------------------------------------------------------------------------------------------------------
 %macro FILTER_VER_CHROMA_SS_W2_4R 2
-INIT_XMM sse2
+INIT_XMM sse4
 cglobal interp_4tap_vert_ss_%1x%2, 5, 6, 5
 
     add       r1d, r1d
@@ -4803,7 +4803,7 @@
 ;-------------------------------------------------------------------------------------------------------------------
 ; void interp_4tap_vertical_ss_6x8(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
 ;-------------------------------------------------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
 cglobal interp_4tap_vert_ss_6x8, 5, 7, 6
 
     add       r1d, r1d
diff -r ce96cdb390fe -r 314cd7999ba9 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/ipfilter8.h	Mon Feb 17 13:13:38 2014 +0530
@@ -153,27 +153,32 @@
     void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
 
 #define CHROMA_SP_FILTERS(cpu) \
+    SETUP_CHROMA_SP_FUNC_DEF(8, 2, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(8, 4, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(8, 6, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(8, 8, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(8, 16, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(8, 32, cpu);
+
+#define CHROMA_SP_FILTERS_SSE4(cpu) \
+    SETUP_CHROMA_SP_FUNC_DEF(2, 4, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(2, 8, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(4, 2, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(4, 4, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(4, 2, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(8, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(8, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(4, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(8, 6, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(8, 2, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(4, 16, cpu); \
+    SETUP_CHROMA_SP_FUNC_DEF(6, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(8, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 12, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(12, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 4, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(4, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(32, 32, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(32, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 32, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(32, 24, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(24, 32, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(32, 8, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(8, 32, cpu);
+    SETUP_CHROMA_SP_FUNC_DEF(32, 8, cpu);
 
 #define SETUP_CHROMA_SS_FUNC_DEF(W, H, cpu) \
     void x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
@@ -181,14 +186,11 @@
 #define CHROMA_SS_FILTERS(cpu) \
     SETUP_CHROMA_SS_FUNC_DEF(4, 4, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(4, 2, cpu); \
-    SETUP_CHROMA_SS_FUNC_DEF(2, 4, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(8, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(8, 4, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(4, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(8, 6, cpu); \
-    SETUP_CHROMA_SS_FUNC_DEF(6, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(8, 2, cpu); \
-    SETUP_CHROMA_SS_FUNC_DEF(2, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(16, 16, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(16, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(8, 16, cpu); \
@@ -204,19 +206,24 @@
     SETUP_CHROMA_SS_FUNC_DEF(32, 8, cpu); \
     SETUP_CHROMA_SS_FUNC_DEF(8, 32, cpu)
 
+
+#define CHROMA_SS_FILTERS_SSE4(cpu) \
+    SETUP_CHROMA_SS_FUNC_DEF(2, 4, cpu); \
+    SETUP_CHROMA_SS_FUNC_DEF(2, 8, cpu); \
+    SETUP_CHROMA_SS_FUNC_DEF(6, 8, cpu);
+
 CHROMA_FILTERS(_sse4);
-CHROMA_SP_FILTERS(_ssse3);
+CHROMA_SP_FILTERS(_sse2);
+CHROMA_SP_FILTERS_SSE4(_sse4);
 CHROMA_SS_FILTERS(_sse2);
+CHROMA_SS_FILTERS_SSE4(_sse4);
 LUMA_FILTERS(_sse4);
-LUMA_SP_FILTERS(_ssse3);
+LUMA_SP_FILTERS(_sse4);
 LUMA_SS_FILTERS(_sse2);
 
 void x265_interp_8tap_hv_pp_8x8_ssse3(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int idxX, int idxY);
 void x265_luma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
 void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
-void x265_interp_4tap_vert_sp_2x4_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
-void x265_interp_4tap_vert_sp_2x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
-void x265_interp_4tap_vert_sp_6x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
 
 #undef SETUP_CHROMA_FUNC_DEF
 #undef SETUP_CHROMA_SP_FUNC_DEF
@@ -230,5 +237,7 @@
 #undef LUMA_FILTERS
 #undef LUMA_SP_FILTERS
 #undef LUMA_SS_FILTERS
+#undef CHROMA_SS_FILTERS_SSE4
+#undef CHROMA_SP_FILTERS_SSE4
 
 #endif // ifndef X265_MC_H


More information about the x265-devel mailing list