[x265] [PATCH] Change minimum architecture to sse4 as chroma vsp functions for block sizes(2x4, 2x8 and 6x8) need faster SSE4 instructions

nabajit at multicorewareinc.com nabajit at multicorewareinc.com
Wed Nov 13 13:57:07 CET 2013


# HG changeset patch
# User Nabajit Deka
# Date 1384347420 -19800
#      Wed Nov 13 18:27:00 2013 +0530
# Node ID 8a6d7ba02c489b574054d8638d8cccb6af1faf61
# Parent  8a66a96d330125fbf5c3446b566de6ec486f4ad5
Change minimum architecture to sse4 as chroma vsp functions for block sizes(2x4,2x8 and 6x8) need faster SSE4 instructions.

diff -r 8a66a96d3301 -r 8a6d7ba02c48 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Nov 13 17:39:18 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Nov 13 18:27:00 2013 +0530
@@ -177,14 +177,11 @@
 #define CHROMA_SP_FILTERS(cpu) \
     SETUP_CHROMA_SP_FUNC_DEF(4, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(4, 2, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(2, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(4, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 6, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(6, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 2, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(2, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 16, cpu); \
@@ -527,6 +524,10 @@
         p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
         p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
         p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
+
+        p.chroma_vsp[CHROMA_2x4] = x265_interp_4tap_vert_sp_2x4_sse4;
+        p.chroma_vsp[CHROMA_2x8] = x265_interp_4tap_vert_sp_2x8_sse4;
+        p.chroma_vsp[CHROMA_6x8] = x265_interp_4tap_vert_sp_6x8_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r 8a66a96d3301 -r 8a6d7ba02c48 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Wed Nov 13 17:39:18 2013 +0530
+++ b/source/common/x86/ipfilter8.asm	Wed Nov 13 18:27:00 2013 +0530
@@ -3153,7 +3153,7 @@
 ; void interp_4tap_vertical_sp_%1x%2(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 ;-------------------------------------------------------------------------------------------------------------------
 %macro FILTER_VER_CHROMA_SP_W2_4R 2
-INIT_XMM ssse3
+INIT_XMM sse4
 cglobal interp_4tap_vert_sp_%1x%2, 5, 7, 6
 
     add       r1d, r1d
@@ -3255,7 +3255,7 @@
 ;-------------------------------------------------------------------------------------------------------------------
 ; void interp_4tap_vertical_sp_6x8(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 ;-------------------------------------------------------------------------------------------------------------------
-INIT_XMM ssse3
+INIT_XMM sse4
 cglobal interp_4tap_vert_sp_6x8, 5, 7, 7
 
     add       r1d, r1d
diff -r 8a66a96d3301 -r 8a6d7ba02c48 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Wed Nov 13 17:39:18 2013 +0530
+++ b/source/common/x86/ipfilter8.h	Wed Nov 13 18:27:00 2013 +0530
@@ -124,14 +124,11 @@
 #define CHROMA_SP_FILTERS(cpu) \
     SETUP_CHROMA_SP_FUNC_DEF(4, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(4, 2, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(2, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 4, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(4, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 6, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(6, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 2, cpu); \
-    SETUP_CHROMA_SP_FUNC_DEF(2, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 16, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(16, 8, cpu); \
     SETUP_CHROMA_SP_FUNC_DEF(8, 16, cpu); \
@@ -157,6 +154,9 @@
 void x265_interp_8tap_v_ss_sse2(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int width, int height, const int coefIdx);
 void x265_luma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
 void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
+void x265_interp_4tap_vert_sp_2x4_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_2x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_6x8_sse4(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
 
 #undef SETUP_CHROMA_FUNC_DEF
 #undef SETUP_CHROMA_SP_FUNC_DEF


More information about the x265-devel mailing list