[x265] [PATCH 2 of 2] asm: downgrade x265_interp_8tap_hv_pp_8x8 from SSE4 to SSSE3

Min Chen chenm003 at 163.com
Thu Apr 30 13:52:47 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1430394756 -28800
# Node ID 41e417d334b294833fab2ea367398c5e4119a6a4
# Parent  7da131e3412428e3cfcab577b9e5c211b920458c
asm: downgrade x265_interp_8tap_hv_pp_8x8 from SSE4 to SSSE3
---
 source/common/x86/asm-primitives.cpp |    5 ++++-
 source/common/x86/ipfilter8.asm      |    2 +-
 source/common/x86/ipfilter8.h        |    2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff -r 7da131e34124 -r 41e417d334b2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Apr 30 19:52:30 2015 +0800
+++ b/source/common/x86/asm-primitives.cpp	Thu Apr 30 19:52:36 2015 +0800
@@ -1447,6 +1447,9 @@
 
         ALL_LUMA_TU(count_nonzero, count_nonzero, ssse3);
 
+        // MUST be done after LUMA_FILTERS() to overwrite default version
+        p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_ssse3;
+
         p.frameInitLowres = x265_frame_init_lowres_core_ssse3;
         p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
@@ -1548,7 +1551,7 @@
         CHROMA_444_VSP_FILTERS_SSE4(_sse4);
 
         // MUST be done after LUMA_FILTERS() to overwrite default version
-        p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_sse4;
+        p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_ssse3;
 
         LUMA_CU_BLOCKCOPY(ps, sse4);
         CHROMA_420_CU_BLOCKCOPY(ps, sse4);
diff -r 7da131e34124 -r 41e417d334b2 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Thu Apr 30 19:52:30 2015 +0800
+++ b/source/common/x86/ipfilter8.asm	Thu Apr 30 19:52:36 2015 +0800
@@ -3157,7 +3157,7 @@
 ;-----------------------------------------------------------------------------
 ; void interp_8tap_hv_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int idxX, int idxY)
 ;-----------------------------------------------------------------------------
-INIT_XMM sse4
+INIT_XMM ssse3
 cglobal interp_8tap_hv_pp_8x8, 4, 7, 8, 0-15*16
 %define coef        m7
 %define stk_buf     rsp
diff -r 7da131e34124 -r 41e417d334b2 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Thu Apr 30 19:52:30 2015 +0800
+++ b/source/common/x86/ipfilter8.h	Thu Apr 30 19:52:36 2015 +0800
@@ -776,7 +776,7 @@
 LUMA_FILTERS(_avx2);
 LUMA_SP_FILTERS(_avx2);
 LUMA_SS_FILTERS(_avx2);
-void x265_interp_8tap_hv_pp_8x8_sse4(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
+void x265_interp_8tap_hv_pp_8x8_ssse3(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
 void x265_interp_8tap_hv_pp_16x16_avx2(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
 void x265_filterPixelToShort_4x4_sse4(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride);
 void x265_filterPixelToShort_4x8_sse4(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride);



More information about the x265-devel mailing list