[x265-commits] [x265] TEncSearch: fix comment for TEncSearch::predInterSearch
Steve Borho
steve at borho.org
Tue Oct 15 08:17:21 CEST 2013
details: http://hg.videolan.org/x265/rev/062c51758069
branches:
changeset: 4448:062c51758069
user: Steve Borho <steve at borho.org>
date: Mon Oct 14 21:55:04 2013 -0500
description:
TEncSearch: fix comment for TEncSearch::predInterSearch
Subject: [x265] cmake: do not query clang version, it is not used
details: http://hg.videolan.org/x265/rev/fa90c915a323
branches:
changeset: 4449:fa90c915a323
user: Steve Borho <steve at borho.org>
date: Tue Oct 15 00:37:37 2013 -0500
description:
cmake: do not query clang version, it is not used
Subject: [x265] cmake: give 16bpp vector sad primitives their own C++ file
details: http://hg.videolan.org/x265/rev/764c0e9984f0
branches:
changeset: 4450:764c0e9984f0
user: Steve Borho <steve at borho.org>
date: Tue Oct 15 00:50:51 2013 -0500
description:
cmake: give 16bpp vector sad primitives their own C++ file
diffstat:
source/CMakeLists.txt | 10 +-
source/Lib/TLibEncoder/TEncSearch.cpp | 7 +-
source/common/CMakeLists.txt | 5 +-
source/common/vec/pixel-sse41.cpp | 14 +-
source/common/vec/pixel16-sse41.cpp | 1866 +++++++++++++++++++++++++++++++++
source/common/vec/pixel16.inc | 1805 -------------------------------
6 files changed, 1881 insertions(+), 1826 deletions(-)
diffs (truncated from 3785 to 300 lines):
diff -r abae6903e0af -r 764c0e9984f0 source/CMakeLists.txt
--- a/source/CMakeLists.txt Mon Oct 14 13:12:22 2013 -0500
+++ b/source/CMakeLists.txt Tue Oct 15 00:50:51 2013 -0500
@@ -64,19 +64,19 @@ if(INTEL_CXX AND UNIX)
# treat icpc roughly like gcc
set(GCC 1)
add_definitions(-Wall -Wextra -Wshadow -no-vec)
+elseif(CLANG)
+ # treat clang roughly like gcc
+ set(GCC 1)
+ add_definitions(-Wall -Wextra -Wshadow -mstackrealign -ffast-math)
elseif(CMAKE_COMPILER_IS_GNUCXX)
+ add_definitions(-Wall -Wextra -Wshadow -mstackrealign -ffast-math)
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
- add_definitions(-Wall -Wextra -Wshadow -mstackrealign -ffast-math)
if(NOT GCC_VERSION VERSION_LESS 4.7)
# this is necessary to avoid name conflicts in vector class
# library. if vector classes are removed/replaced this can
# likely be removed as well.
add_definitions(-fabi-version=6)
endif()
-elseif(CLANG)
- execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
- set(GCC 1)
- add_definitions(-Wall -Wextra -Wshadow -ffast-math -mstackrealign)
endif()
if (GCC)
option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
diff -r abae6903e0af -r 764c0e9984f0 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Oct 14 13:12:22 2013 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Oct 15 00:50:51 2013 -0500
@@ -2218,11 +2218,8 @@ void TEncSearch::xRestrictBipredMergeCan
/** search of the best candidate for inter prediction
* \param cu
- * \param fencYuv
- * \param rpcPredYuv
- * \param rpcResiYuv
- * \param rpcRecoYuv
- * \param bUseRes
+ * \param predYuv
+ * \param bUseMRG
* \returns void
*/
void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool bUseMRG)
diff -r abae6903e0af -r 764c0e9984f0 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Mon Oct 14 13:12:22 2013 -0500
+++ b/source/common/CMakeLists.txt Tue Oct 15 00:50:51 2013 -0500
@@ -89,14 +89,14 @@ if(ENABLE_PRIMITIVES_VEC)
vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
- vec/intra-sse3.cpp vec/intra-sse41.cpp)
+ vec/pixel16-sse41.cpp vec/intra-sse3.cpp vec/intra-sse41.cpp)
if (NOT X64)
# x64 implies SSE4, so this flag would have no effect (and it issues a warning)
set_source_files_properties(vec/blockcopy-sse3.cpp
vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
- vec/intra-sse3.cpp vec/intra-sse41.cpp
+ vec/intra-sse3.cpp vec/intra-sse41.cpp vec/pixel16-sse41.cpp
PROPERTIES COMPILE_FLAGS /arch:SSE2)
endif()
if (MSVC_VERSION EQUAL 1700 OR INTEL_CXX)
@@ -129,6 +129,7 @@ if(ENABLE_PRIMITIVES_VEC)
PROPERTIES COMPILE_FLAGS "-mssse3")
set_source_files_properties(
vec/pixel-sse41.cpp vec/ipfilter-sse41.cpp vec/dct-sse41.cpp vec/intra-sse41.cpp
+ vec/pixel16-sse41.cpp
PROPERTIES COMPILE_FLAGS "-msse4.1")
endif()
if(INTEL_CXX OR CLANG OR (NOT GCC_VERSION VERSION_LESS 4.7))
diff -r abae6903e0af -r 764c0e9984f0 source/common/vec/pixel-sse41.cpp
--- a/source/common/vec/pixel-sse41.cpp Mon Oct 14 13:12:22 2013 -0500
+++ b/source/common/vec/pixel-sse41.cpp Tue Oct 15 00:50:51 2013 -0500
@@ -5527,16 +5527,10 @@ int sse_ss64(short* fenc, intptr_t strid
}
}
-#if HIGH_BIT_DEPTH
-#define INSTRSET 5
-#include "vectorclass.h"
-namespace {
-#include "pixel16.inc"
-}
-#endif
-
namespace x265 {
+extern void Setup_Vec_Pixel16Primitives_sse41(EncoderPrimitives &p);
+
#if HIGH_BIT_DEPTH
#define SETUP_PARTITION(W, H) \
p.sad[PARTITION_##W##x##H] = sad_##W<H>; \
@@ -5594,7 +5588,9 @@ void Setup_Vec_PixelPrimitives_sse41(Enc
SETUP_NONSAD(4, 4); // 4x4 SAD covered by assembly
/* 4x4 is too small for any sub partitions */
-#if !HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
+ Setup_Vec_Pixel16Primitives_sse41(p);
+#else
// These are the only SSE primitives uncovered by assembly
p.sad_x3[PARTITION_4x16] = sad_x3_4x16;
p.sad_x4[PARTITION_4x16] = sad_x4_4x16;
diff -r abae6903e0af -r 764c0e9984f0 source/common/vec/pixel16-sse41.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/vec/pixel16-sse41.cpp Tue Oct 15 00:50:51 2013 -0500
@@ -0,0 +1,1866 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve at borho.org>
+ * Mandar Gurav <mandar at multicorewareinc.com>
+ * Mahesh Pittala <mahesh at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at multicorewareinc.com.
+ *****************************************************************************/
+
+#include "TLibCommon/TComRom.h"
+#include "primitives.h"
+#include <assert.h>
+#include <xmmintrin.h> // SSE
+#include <smmintrin.h> // SSE4.1
+
+using namespace x265;
+
+/* intrinsics for when pixel type is short */
+#if HIGH_BIT_DEPTH
+
+#define INSTRSET 5
+#include "vectorclass.h"
+
+namespace {
+template<int ly>
+int sad_4(pixel * fenc, intptr_t fencstride, pixel * fref, intptr_t frefstride)
+{
+ Vec8s m1, n1;
+
+ Vec4i sum(0);
+ Vec8us sad(0);
+ int max_iterators = (ly >> 4) << 4;
+ int row;
+
+ for (row = 0; row < max_iterators; row += 16)
+ {
+ for (int i = 0; i < 16; i++)
+ {
+ m1.load_a(fenc);
+ n1.load(fref);
+ sad += abs(m1 - n1);
+
+ fenc += fencstride;
+ fref += frefstride;
+ }
+
+ sum += extend_low(sad);
+ sad = 0;
+ }
+
+ while (row++ < ly)
+ {
+ m1.load_a(fenc);
+ n1.load(fref);
+ sad += abs(m1 - n1);
+
+ fenc += fencstride;
+ fref += frefstride;
+ }
+
+ sum += extend_low(sad);
+
+ return horizontal_add(sum);
+}
+
+template<int ly>
+int sad_8(pixel * fenc, intptr_t fencstride, pixel * fref, intptr_t frefstride)
+{
+ Vec8s m1, n1;
+
+ Vec4i sum(0);
+ Vec8us sad(0);
+ int max_iterators = (ly >> 4) << 4;
+ int row;
+
+ for (row = 0; row < max_iterators; row += 16)
+ {
+ for (int i = 0; i < 16; i++)
+ {
+ m1.load_a(fenc);
+ n1.load(fref);
+ sad += abs(m1 - n1);
+
+ fenc += fencstride;
+ fref += frefstride;
+ }
+
+ sum += extend_low(sad) + extend_high(sad);
+ sad = 0;
+ }
+
+ while (row++ < ly)
+ {
+ m1.load_a(fenc);
+ n1.load(fref);
+ sad += abs(m1 - n1);
+
+ fenc += fencstride;
+ fref += frefstride;
+ }
+
+ sum += extend_low(sad) + extend_high(sad);
+
+ return horizontal_add(sum);
+}
+
+template<int ly>
+int sad_12(pixel * fenc, intptr_t fencstride, pixel * fref, intptr_t frefstride)
+{
+ Vec8s m1, n1;
+
+ Vec4i sum(0);
+ Vec8us sad(0);
+ int max_iterators = (ly >> 4) << 4;
+ int row;
+
+ for (row = 0; row < max_iterators; row += 16)
+ {
+ for (int i = 0; i < 16; i++)
+ {
+ m1.load_a(fenc);
+ n1.load(fref);
+ sad += abs(m1 - n1);
+
+ m1.load_a(fenc + 8);
+ m1.cutoff(4);
+ n1.load(fref + 8);
+ n1.cutoff(4);
+ sad += abs(m1 - n1);
+
+ fenc += fencstride;
+ fref += frefstride;
+ }
+
+ sum += extend_low(sad) + extend_high(sad);
+ sad = 0;
+ }
+
+ while (row++ < ly)
+ {
+ m1.load_a(fenc);
+ n1.load(fref);
+ sad += abs(m1 - n1);
+
+ m1.load_a(fenc + 8);
+ m1.cutoff(4);
+ n1.load(fref + 8);
+ n1.cutoff(4);
+ sad += abs(m1 - n1);
+
+ fenc += fencstride;
+ fref += frefstride;
+ }
+
+ sum += extend_low(sad) + extend_high(sad);
+
+ return horizontal_add(sum);
+}
+
+template<int ly>
+int sad_16(pixel * fenc, intptr_t fencstride, pixel * fref, intptr_t frefstride)
+{
+ Vec8s m1, n1;
+
+ Vec4i sum(0);
+ Vec8us sad(0);
+ int max_iterators = (ly >> 3) << 3;
+ int row;
+
+ for (row = 0; row < max_iterators; row += 8)
+ {
+ for (int i = 0; i < 8; i++)
+ {
+ m1.load_a(fenc);
More information about the x265-commits
mailing list