[x265] [PATCH 2 of 2] asm: downgrade x265_interp_8tap_hv_pp_8x8 from SSE4 to SSSE3
Min Chen
chenm003 at 163.com
Thu Apr 30 13:52:47 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1430394756 -28800
# Node ID 41e417d334b294833fab2ea367398c5e4119a6a4
# Parent 7da131e3412428e3cfcab577b9e5c211b920458c
asm: downgrade x265_interp_8tap_hv_pp_8x8 from SSE4 to SSSE3
---
source/common/x86/asm-primitives.cpp | 5 ++++-
source/common/x86/ipfilter8.asm | 2 +-
source/common/x86/ipfilter8.h | 2 +-
3 files changed, 6 insertions(+), 3 deletions(-)
diff -r 7da131e34124 -r 41e417d334b2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Apr 30 19:52:30 2015 +0800
+++ b/source/common/x86/asm-primitives.cpp Thu Apr 30 19:52:36 2015 +0800
@@ -1447,6 +1447,9 @@
ALL_LUMA_TU(count_nonzero, count_nonzero, ssse3);
+ // MUST be done after LUMA_FILTERS() to overwrite default version
+ p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_ssse3;
+
p.frameInitLowres = x265_frame_init_lowres_core_ssse3;
p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
@@ -1548,7 +1551,7 @@
CHROMA_444_VSP_FILTERS_SSE4(_sse4);
// MUST be done after LUMA_FILTERS() to overwrite default version
- p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_sse4;
+ p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_ssse3;
LUMA_CU_BLOCKCOPY(ps, sse4);
CHROMA_420_CU_BLOCKCOPY(ps, sse4);
diff -r 7da131e34124 -r 41e417d334b2 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Thu Apr 30 19:52:30 2015 +0800
+++ b/source/common/x86/ipfilter8.asm Thu Apr 30 19:52:36 2015 +0800
@@ -3157,7 +3157,7 @@
;-----------------------------------------------------------------------------
; void interp_8tap_hv_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int idxX, int idxY)
;-----------------------------------------------------------------------------
-INIT_XMM sse4
+INIT_XMM ssse3
cglobal interp_8tap_hv_pp_8x8, 4, 7, 8, 0-15*16
%define coef m7
%define stk_buf rsp
diff -r 7da131e34124 -r 41e417d334b2 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Thu Apr 30 19:52:30 2015 +0800
+++ b/source/common/x86/ipfilter8.h Thu Apr 30 19:52:36 2015 +0800
@@ -776,7 +776,7 @@
LUMA_FILTERS(_avx2);
LUMA_SP_FILTERS(_avx2);
LUMA_SS_FILTERS(_avx2);
-void x265_interp_8tap_hv_pp_8x8_sse4(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
+void x265_interp_8tap_hv_pp_8x8_ssse3(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
void x265_interp_8tap_hv_pp_16x16_avx2(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
void x265_filterPixelToShort_4x4_sse4(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride);
void x265_filterPixelToShort_4x8_sse4(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride);
More information about the x265-devel
mailing list