[x265-commits] [x265] cmake: detect clang compiler anf fix some link issues
Steve Borho
steve at borho.org
Thu Oct 10 06:25:23 CEST 2013
details: http://hg.videolan.org/x265/rev/d7922b02ef3c
branches:
changeset: 4334:d7922b02ef3c
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 19:03:24 2013 -0500
description:
cmake: detect clang compiler anf fix some link issues
Subject: [x265] ipfilter: remove two unused functions
details: http://hg.videolan.org/x265/rev/6a08d0e9178c
branches:
changeset: 4335:6a08d0e9178c
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 19:10:07 2013 -0500
description:
ipfilter: remove two unused functions
Subject: [x265] ipfilter: remove two unreferenced functions
details: http://hg.videolan.org/x265/rev/9518070da726
branches:
changeset: 4336:9518070da726
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 20:10:42 2013 -0500
description:
ipfilter: remove two unreferenced functions
Subject: [x265] intra: remove an unreferenced function
details: http://hg.videolan.org/x265/rev/27a3de7a742c
branches:
changeset: 4337:27a3de7a742c
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 20:10:59 2013 -0500
description:
intra: remove an unreferenced function
Subject: [x265] pixel: remove vector class headers from pixel-ssse3.cpp
details: http://hg.videolan.org/x265/rev/a574f4347855
branches:
changeset: 4338:a574f4347855
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 20:12:23 2013 -0500
description:
pixel: remove vector class headers from pixel-ssse3.cpp
Subject: [x265] vector: bypass a number of functions we do not use when compiled by clang
details: http://hg.videolan.org/x265/rev/5cc9abe88e62
branches:
changeset: 4339:5cc9abe88e62
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 22:11:06 2013 -0500
description:
vector: bypass a number of functions we do not use when compiled by clang
The sooner these classes go away the better
Subject: [x265] blockcopy: isolate vector class routines together (firing squad)
details: http://hg.videolan.org/x265/rev/82f20a7cb593
branches:
changeset: 4340:82f20a7cb593
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 20:13:17 2013 -0500
description:
blockcopy: isolate vector class routines together (firing squad)
Subject: [x265] cmake: enable clang to compile intrinsic primitives without version checks
details: http://hg.videolan.org/x265/rev/edcc92f2b2ab
branches:
changeset: 4341:edcc92f2b2ab
user: Steve Borho <steve at borho.org>
date: Wed Oct 09 20:14:29 2013 -0500
description:
cmake: enable clang to compile intrinsic primitives without version checks
diffstat:
source/CMakeLists.txt | 23 +-
source/VectorClass/vectori128.h | 4 +
source/VectorClass/vectori256.h | 6 +
source/VectorClass/vectori256e.h | 2 +
source/common/CMakeLists.txt | 12 +-
source/common/ipfilter.cpp | 51 -----
source/common/vec/blockcopy-sse3.cpp | 90 +++++-----
source/common/vec/intra-sse3.cpp | 22 --
source/common/vec/ipfilter-sse41.cpp | 307 -----------------------------------
source/common/vec/pixel-ssse3.cpp | 6 +-
source/common/vec/vec-primitives.cpp | 4 +-
11 files changed, 82 insertions(+), 445 deletions(-)
diffs (truncated from 775 to 300 lines):
diff -r 5dceef85c58c -r edcc92f2b2ab source/CMakeLists.txt
--- a/source/CMakeLists.txt Wed Oct 09 15:57:24 2013 -0500
+++ b/source/CMakeLists.txt Wed Oct 09 20:14:29 2013 -0500
@@ -22,7 +22,6 @@ if (APPLE)
add_definitions(-DMACOS)
endif()
-# Enforce coding standards. Full warnings and warnings as errors
if("$ENV{CXX}" STREQUAL "icl")
set(MSVC 1)
endif()
@@ -34,6 +33,7 @@ if(MSVC)
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
endforeach()
endif (STATIC_LINK_CRT)
+ # Enforce coding standards. Full warnings and warnings as errors
add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
add_definitions(/Ob2) # always inline
add_definitions(/Oi) # enable intrinsics
@@ -41,6 +41,9 @@ if(MSVC)
include_directories(compat/msvc)
endif(MSVC)
+if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
+ set(CLANG 1)
+endif()
if("$ENV{CXX}" STREQUAL "icpc")
set(GCC 1)
add_definitions(-Wall -Wextra -Wshadow -no-vec)
@@ -54,18 +57,10 @@ elseif(CMAKE_COMPILER_IS_GNUCXX)
# likely be removed as well.
add_definitions(-fabi-version=6)
endif()
-elseif(APPLE)
- exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
- string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
- message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
- if (DARWIN_VERSION GREATER 12)
- # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
- if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
- execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
- set(GCC 1)
- add_definitions(-Wall -Wextra -Wshadow)
- endif()
- endif()
+elseif(CLANG)
+ execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+ set(GCC 1)
+ add_definitions(-Wall -Wextra -Wshadow -ffast-math -mstackrealign)
endif()
if (GCC)
option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
@@ -153,6 +148,8 @@ endif()
set_target_properties(x265-shared PROPERTIES OUTPUT_NAME x265)
if(NOT WIN32)
set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265)
+endif()
+if(UNIX AND NOT CLANG)
set_target_properties(x265-shared PROPERTIES LINK_FLAGS "-Wl,-Bsymbolic")
endif()
# WIN32 builds static: x265-static.lib shared: x265.dll + x265.lib (shim loader)
diff -r 5dceef85c58c -r edcc92f2b2ab source/VectorClass/vectori128.h
--- a/source/VectorClass/vectori128.h Wed Oct 09 15:57:24 2013 -0500
+++ b/source/VectorClass/vectori128.h Wed Oct 09 20:14:29 2013 -0500
@@ -1207,10 +1207,12 @@ static ALWAYSINLINE Vec8s & operator >>=
return a;
}
+#ifndef __clang__
// vector operator == : returns true for elements for which a == b
static ALWAYSINLINE Vec8s operator == (Vec8s const & a, Vec8s const & b) {
return _mm_cmpeq_epi16(a, b);
}
+#endif
// vector operator != : returns true for elements for which a != b
static ALWAYSINLINE Vec8s operator != (Vec8s const & a, Vec8s const & b) {
@@ -2684,10 +2686,12 @@ static ALWAYSINLINE Vec2q operator ~ (Ve
return Vec2q( ~ Vec128b(a));
}
+#ifndef __clang__
// vector operator ! : logical not, returns true for elements == 0
static ALWAYSINLINE Vec2q operator ! (Vec2q const & a) {
return a == _mm_setzero_si128();
}
+#endif
// Functions for this class
diff -r 5dceef85c58c -r edcc92f2b2ab source/VectorClass/vectori256.h
--- a/source/VectorClass/vectori256.h Wed Oct 09 15:57:24 2013 -0500
+++ b/source/VectorClass/vectori256.h Wed Oct 09 20:14:29 2013 -0500
@@ -1094,10 +1094,12 @@ static inline Vec16s & operator >>= (Vec
return a;
}
+#ifndef __clang__
// vector operator == : returns true for elements for which a == b
static inline Vec16s operator == (Vec16s const & a, Vec16s const & b) {
return _mm256_cmpeq_epi16(a, b);
}
+#endif
// vector operator != : returns true for elements for which a != b
static inline Vec16s operator != (Vec16s const & a, Vec16s const & b) {
@@ -2398,10 +2400,12 @@ static inline Vec4q operator ~ (Vec4q co
return Vec4q( ~ Vec256b(a));
}
+#ifndef __clang__
// vector operator ! : logical not, returns true for elements == 0
static inline Vec4q operator ! (Vec4q const & a) {
return a == _mm256_setzero_si256();
}
+#endif
// Functions for this class
@@ -4042,6 +4046,7 @@ static inline Vec4q lookup4(Vec4q const
return Vec4q(lookup8(Vec8i(index * 0x200000002ll + 0x100000000ll), Vec8i(table)));
}
+#ifndef __clang__
template <int n>
static inline Vec4q lookup(Vec4q const & index, void const * table) {
if (n <= 0) return 0;
@@ -4064,6 +4069,7 @@ static inline Vec4q lookup(Vec4q const &
return _mm256_i64gather_epi64((const int64_t *)table, index1, 8);
#endif
}
+#endif
/*****************************************************************************
diff -r 5dceef85c58c -r edcc92f2b2ab source/VectorClass/vectori256e.h
--- a/source/VectorClass/vectori256e.h Wed Oct 09 15:57:24 2013 -0500
+++ b/source/VectorClass/vectori256e.h Wed Oct 09 20:14:29 2013 -0500
@@ -2191,10 +2191,12 @@ static inline Vec4q operator ~ (Vec4q co
return Vec4q(~a.get_low(), ~a.get_high());
}
+#ifndef __clang__
// vector operator ! : logical not, returns true for elements == 0
static inline Vec4q operator ! (Vec4q const & a) {
return Vec4q(!a.get_low(), !a.get_high());
}
+#endif
// Functions for this class
diff -r 5dceef85c58c -r edcc92f2b2ab source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Wed Oct 09 15:57:24 2013 -0500
+++ b/source/common/CMakeLists.txt Wed Oct 09 20:14:29 2013 -0500
@@ -113,10 +113,16 @@ if(ENABLE_PRIMITIVES_VEC)
endif()
endif()
if(GCC)
- if ("$ENV{CXX}" STREQUAL "icpc")
+ if("$ENV{CXX}" STREQUAL "icpc")
add_definitions(-wd13200) # function using MMX does not call EMMS
endif()
- if("$ENV{CXX}" STREQUAL "icpc" OR NOT GCC_VERSION VERSION_LESS 4.3)
+ if(CLANG)
+ # llvm intrinsic headers trigger this warning
+ add_definitions(-Wno-shadow)
+ # llvm reports these warnings for the vector class headers
+ add_definitions(-Wno-shift-overflow -Wno-uninitialized)
+ endif()
+ if("$ENV{CXX}" STREQUAL "icpc" OR CLANG OR NOT GCC_VERSION VERSION_LESS 4.3)
set(PRIMITIVES vec/blockcopy-sse3.cpp
vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
@@ -132,7 +138,7 @@ if(ENABLE_PRIMITIVES_VEC)
vec/pixel-sse41.cpp vec/ipfilter-sse41.cpp vec/dct-sse41.cpp vec/intra-sse41.cpp
PROPERTIES COMPILE_FLAGS "-msse4.1")
endif()
- if("$ENV{CXX}" STREQUAL "icpc" OR NOT GCC_VERSION VERSION_LESS 4.7)
+ if("$ENV{CXX}" STREQUAL "icpc" OR CLANG OR NOT GCC_VERSION VERSION_LESS 4.7)
set(PRIMITIVES ${PRIMITIVES}
vec/blockcopy-avx2.cpp vec/pixel-avx2.cpp)
set_source_files_properties(
diff -r 5dceef85c58c -r edcc92f2b2ab source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Wed Oct 09 15:57:24 2013 -0500
+++ b/source/common/ipfilter.cpp Wed Oct 09 20:14:29 2013 -0500
@@ -424,34 +424,6 @@ void filterVertical_p_p(pixel *src, intp
}
}
-void extendPicCompBorder(pixel* txt, intptr_t stride, int width, int height, int marginX, int marginY)
-{
- int x, y;
-
- for (y = 0; y < height; y++)
- {
- for (x = 0; x < marginX; x++)
- {
- txt[-marginX + x] = txt[0];
- txt[width + x] = txt[width - 1];
- }
-
- txt += stride;
- }
-
- txt -= (stride + marginX);
- for (y = 0; y < marginY; y++)
- {
- ::memcpy(txt + (y + 1) * stride, txt, sizeof(pixel) * (width + (marginX << 1)));
- }
-
- txt -= ((height - 1) * stride);
- for (y = 0; y < marginY; y++)
- {
- ::memcpy(txt - (y + 1) * stride, txt, sizeof(pixel) * (width + (marginX << 1)));
- }
-}
-
void extendCURowColBorder(pixel* txt, intptr_t stride, int width, int height, int marginX)
{
for (int y = 0; y < height; y++)
@@ -470,29 +442,6 @@ void extendCURowColBorder(pixel* txt, in
txt += stride;
}
}
-
-void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int scale, int round, int shift, int offset)
-{
- int shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
- shift = shift + shiftNum;
- round = shift ? (1 << (shift - 1)) : 0;
-
- int x, y;
- for (y = height - 1; y >= 0; y--)
- {
- for (x = width - 1; x >= 0; )
- {
- // note: luma min width is 4
- dst[x] = (pixel)Clip3(0, ((1 << X265_DEPTH) - 1), ((scale * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
- x--;
- dst[x] = (pixel)Clip3(0, ((1 << X265_DEPTH) - 1), ((scale * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
- x--;
- }
-
- src += srcStride;
- dst += dstStride;
- }
-}
}
namespace x265 {
diff -r 5dceef85c58c -r edcc92f2b2ab source/common/vec/blockcopy-sse3.cpp
--- a/source/common/vec/blockcopy-sse3.cpp Wed Oct 09 15:57:24 2013 -0500
+++ b/source/common/vec/blockcopy-sse3.cpp Wed Oct 09 20:14:29 2013 -0500
@@ -22,49 +22,13 @@
*****************************************************************************/
#include "TLibCommon/TComRom.h"
-
-#define INSTRSET 3
-#include "vectorclass.h"
-
#include "primitives.h"
-#include <string.h>
+#include <xmmintrin.h> // SSE
+#include <pmmintrin.h> // SSE3
+#include <cstring>
namespace {
-
-#if HIGH_BIT_DEPTH
-
-void blockcopy_p_p(int bx, int by, pixel *dst, intptr_t dstride, pixel *src, intptr_t sstride)
-{
- if ((bx & 7) || (((size_t)dst | (size_t)src | sstride | dstride) & 15))
- {
- // slow path, irregular memory alignments or sizes
- for (int y = 0; y < by; y++)
- {
- memcpy(dst, src, bx * sizeof(pixel));
- src += sstride;
- dst += dstride;
- }
- }
- else
- {
- // fast path, multiples of 8 pixel wide blocks
- for (int y = 0; y < by; y++)
- {
- for (int x = 0; x < bx; x += 8)
- {
- Vec8s word;
- word.load_a(src + x);
- word.store_a(dst + x);
- }
-
- src += sstride;
- dst += dstride;
- }
- }
-}
More information about the x265-commits
mailing list