[x265] [PATCH] asm: fix for illegal instruction usage in ipfilter
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Mon Feb 17 08:43:51 CET 2014
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1392623018 -19800
# Mon Feb 17 13:13:38 2014 +0530
# Node ID 314cd7999ba9740c25fad0df91988979650bece6
# Parent ce96cdb390fe26aee6effa731e51303c1d9056b0
asm: fix for illegal instruction usage in ipfilter
diff -r ce96cdb390fe -r 314cd7999ba9 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp Mon Feb 17 13:13:38 2014 +0530
@@ -292,17 +292,12 @@
SETUP_CHROMA_FUNC_DEF_444(32, 8, cpu); \
SETUP_CHROMA_FUNC_DEF_444(8, 32, cpu);
-#define CHROMA_SP_FILTERS_420(cpu) \
+#define CHROMA_SP_FILTERS_SSE4_420(cpu) \
SETUP_CHROMA_SP_FUNC_DEF_420(4, 4, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(4, 2, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_420(8, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_420(8, 4, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(4, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_420(8, 6, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_420(8, 2, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(16, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(16, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_420(8, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(16, 12, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(12, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(16, 4, cpu); \
@@ -312,17 +307,21 @@
SETUP_CHROMA_SP_FUNC_DEF_420(16, 32, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(32, 24, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(24, 32, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_420(32, 8, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_420(32, 8, cpu);
+
+#define CHROMA_SP_FILTERS_420(cpu) \
+ SETUP_CHROMA_SP_FUNC_DEF_420(8, 2, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_420(8, 4, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_420(8, 6, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_420(8, 8, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_420(8, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_420(8, 32, cpu);
-#define CHROMA_SP_FILTERS_444(cpu) \
+#define CHROMA_SP_FILTERS_SSE4_444(cpu) \
SETUP_CHROMA_SP_FUNC_DEF_444(4, 4, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_444(8, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_444(8, 4, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(4, 8, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(16, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(16, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_444(8, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(16, 12, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(12, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(16, 4, cpu); \
@@ -332,20 +331,22 @@
SETUP_CHROMA_SP_FUNC_DEF_444(16, 32, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(32, 24, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(24, 32, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF_444(32, 8, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_444(32, 8, cpu);
+
+#define CHROMA_SP_FILTERS_444(cpu) \
+ SETUP_CHROMA_SP_FUNC_DEF_444(8, 8, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_444(8, 4, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF_444(8, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF_444(8, 32, cpu);
#define CHROMA_SS_FILTERS_420(cpu) \
SETUP_CHROMA_SS_FUNC_DEF_420(4, 4, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(4, 2, cpu); \
- SETUP_CHROMA_SS_FUNC_DEF_420(2, 4, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(8, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(8, 4, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(4, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(8, 6, cpu); \
- SETUP_CHROMA_SS_FUNC_DEF_420(6, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(8, 2, cpu); \
- SETUP_CHROMA_SS_FUNC_DEF_420(2, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(16, 16, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(16, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(8, 16, cpu); \
@@ -361,6 +362,12 @@
SETUP_CHROMA_SS_FUNC_DEF_420(32, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_420(8, 32, cpu);
+#define CHROMA_SS_FILTERS_SSE4_420(cpu) \
+ SETUP_CHROMA_SS_FUNC_DEF_420(2, 4, cpu); \
+ SETUP_CHROMA_SS_FUNC_DEF_420(2, 8, cpu); \
+ SETUP_CHROMA_SS_FUNC_DEF_420(6, 8, cpu);
+
+
#define CHROMA_SS_FILTERS_444(cpu) \
SETUP_CHROMA_SS_FUNC_DEF_444(4, 4, cpu); \
SETUP_CHROMA_SS_FUNC_DEF_444(8, 8, cpu); \
@@ -1000,6 +1007,8 @@
CHROMA_SS_FILTERS_420(_sse2);
CHROMA_SS_FILTERS_444(_sse2);
+ CHROMA_SP_FILTERS_420(_sse2);
+ CHROMA_SP_FILTERS_444(_sse2);
LUMA_SS_FILTERS(_sse2);
// This function pointer initialization is temporary will be removed
@@ -1122,10 +1131,6 @@
p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_ssse3;
p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
- CHROMA_SP_FILTERS_420(_ssse3);
- CHROMA_SP_FILTERS_444(_ssse3);
- LUMA_SP_FILTERS(_ssse3);
-
p.dct[DST_4x4] = x265_dst4_ssse3;
}
if (cpuMask & X265_CPU_SSE4)
@@ -1152,6 +1157,10 @@
CHROMA_FILTERS_420(_sse4);
CHROMA_FILTERS_444(_sse4);
+ CHROMA_SS_FILTERS_SSE4_420(_sse4);
+ CHROMA_SP_FILTERS_SSE4_444(_sse4);
+ CHROMA_SP_FILTERS_SSE4_420(_sse4);
+ LUMA_SP_FILTERS(_sse4);
LUMA_FILTERS(_sse4);
ASSGN_SSE_SS(sse4);
p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
diff -r ce96cdb390fe -r 314cd7999ba9 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/ipfilter8.asm Mon Feb 17 13:13:38 2014 +0530
@@ -3531,7 +3531,7 @@
; void interp_8tap_vert_sp_%1x%2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;--------------------------------------------------------------------------------------------------------------
%macro FILTER_VER_LUMA_SP 2
-INIT_XMM ssse3
+INIT_XMM sse4
cglobal interp_8tap_vert_sp_%1x%2, 5, 7, 8 ,0-1
add r1d, r1d
@@ -3719,7 +3719,7 @@
; void interp_4tap_vert_sp_%1x%2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;--------------------------------------------------------------------------------------------------------------
%macro FILTER_VER_CHROMA_SP 2
-INIT_XMM ssse3
+INIT_XMM sse4
cglobal interp_4tap_vert_sp_%1x%2, 5, 7, 7 ,0-1
add r1d, r1d
@@ -3882,7 +3882,7 @@
;--------------------------------------------------------------------------------------------------------------
; void interp_4tap_vert_sp_4x2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;--------------------------------------------------------------------------------------------------------------
-INIT_XMM ssse3
+INIT_XMM sse4
cglobal interp_4tap_vert_sp_4x2, 5, 6, 5
add r1d, r1d
@@ -4040,7 +4040,7 @@
; void interp_4tap_vert_sp_8x%2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;--------------------------------------------------------------------------------------------------------------
%macro FILTER_VER_CHROMA_SP_W8_H2 2
-INIT_XMM ssse3
+INIT_XMM sse2
cglobal interp_4tap_vert_sp_%1x%2, 5, 6, 8
add r1d, r1d
@@ -4711,7 +4711,7 @@
; void interp_4tap_vertical_ss_%1x%2(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
;---------------------------------------------------------------------------------------------------------------------
%macro FILTER_VER_CHROMA_SS_W2_4R 2
-INIT_XMM sse2
+INIT_XMM sse4
cglobal interp_4tap_vert_ss_%1x%2, 5, 6, 5
add r1d, r1d
@@ -4803,7 +4803,7 @@
;-------------------------------------------------------------------------------------------------------------------
; void interp_4tap_vertical_ss_6x8(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
;-------------------------------------------------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal interp_4tap_vert_ss_6x8, 5, 7, 6
add r1d, r1d
diff -r ce96cdb390fe -r 314cd7999ba9 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/ipfilter8.h Mon Feb 17 13:13:38 2014 +0530
@@ -153,27 +153,32 @@
void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
#define CHROMA_SP_FILTERS(cpu) \
+ SETUP_CHROMA_SP_FUNC_DEF(8, 2, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(8, 4, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(8, 6, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(8, 8, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(8, 16, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(8, 32, cpu);
+
+#define CHROMA_SP_FILTERS_SSE4(cpu) \
+ SETUP_CHROMA_SP_FUNC_DEF(2, 4, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(2, 8, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(4, 2, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(4, 4, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(4, 2, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(8, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(8, 4, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(4, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(8, 6, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(8, 2, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(4, 16, cpu); \
+ SETUP_CHROMA_SP_FUNC_DEF(6, 8, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(16, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(16, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(8, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(16, 12, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(12, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(16, 4, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(4, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(32, 32, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(32, 16, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(16, 32, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(32, 24, cpu); \
SETUP_CHROMA_SP_FUNC_DEF(24, 32, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(32, 8, cpu); \
- SETUP_CHROMA_SP_FUNC_DEF(8, 32, cpu);
+ SETUP_CHROMA_SP_FUNC_DEF(32, 8, cpu);
#define SETUP_CHROMA_SS_FUNC_DEF(W, H, cpu) \
void x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
@@ -181,14 +186,11 @@
#define CHROMA_SS_FILTERS(cpu) \
SETUP_CHROMA_SS_FUNC_DEF(4, 4, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(4, 2, cpu); \
- SETUP_CHROMA_SS_FUNC_DEF(2, 4, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(8, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(8, 4, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(4, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(8, 6, cpu); \
- SETUP_CHROMA_SS_FUNC_DEF(6, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(8, 2, cpu); \
- SETUP_CHROMA_SS_FUNC_DEF(2, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(16, 16, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(16, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(8, 16, cpu); \
@@ -204,19 +206,24 @@
SETUP_CHROMA_SS_FUNC_DEF(32, 8, cpu); \
SETUP_CHROMA_SS_FUNC_DEF(8, 32, cpu)
+
+#define CHROMA_SS_FILTERS_SSE4(cpu) \
+ SETUP_CHROMA_SS_FUNC_DEF(2, 4, cpu); \
+ SETUP_CHROMA_SS_FUNC_DEF(2, 8, cpu); \
+ SETUP_CHROMA_SS_FUNC_DEF(6, 8, cpu);
+
CHROMA_FILTERS(_sse4);
-CHROMA_SP_FILTERS(_ssse3);
+CHROMA_SP_FILTERS(_sse2);
+CHROMA_SP_FILTERS_SSE4(_sse4);
CHROMA_SS_FILTERS(_sse2);
+CHROMA_SS_FILTERS_SSE4(_sse4);
LUMA_FILTERS(_sse4);
-LUMA_SP_FILTERS(_ssse3);
+LUMA_SP_FILTERS(_sse4);
LUMA_SS_FILTERS(_sse2);
void x265_interp_8tap_hv_pp_8x8_ssse3(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int idxX, int idxY);
void x265_luma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
-void x265_interp_4tap_vert_sp_2x4_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
-void x265_interp_4tap_vert_sp_2x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
-void x265_interp_4tap_vert_sp_6x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
#undef SETUP_CHROMA_FUNC_DEF
#undef SETUP_CHROMA_SP_FUNC_DEF
@@ -230,5 +237,7 @@
#undef LUMA_FILTERS
#undef LUMA_SP_FILTERS
#undef LUMA_SS_FILTERS
+#undef CHROMA_SS_FILTERS_SSE4
+#undef CHROMA_SP_FILTERS_SSE4
#endif // ifndef X265_MC_H
More information about the x265-devel
mailing list