[x265-commits] [x265] clang: re-disable 12x16, sse_pp_12x16 testbench fails

Steve Borho steve at borho.org
Fri Nov 1 07:27:44 CET 2013


details:   http://hg.videolan.org/x265/rev/8f4744bdf6fc
branches:  
changeset: 4798:8f4744bdf6fc
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 13:45:25 2013 -0500
description:
clang: re-disable 12x16, sse_pp_12x16 testbench fails
Subject: [x265] disable two avx2 routines which fail unit tests

details:   http://hg.videolan.org/x265/rev/2621639c96b5
branches:  
changeset: 4799:2621639c96b5
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 13:48:01 2013 -0500
description:
disable two avx2 routines which fail unit tests
Subject: [x265] api: introduce performance presets

details:   http://hg.videolan.org/x265/rev/0607132e6b11
branches:  
changeset: 4800:0607132e6b11
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 30 18:35:53 2013 -0500
description:
api: introduce performance presets
Subject: [x265] common: lower search range for higher presets with max CTU size 32

details:   http://hg.videolan.org/x265/rev/21da3bba6e70
branches:  
changeset: 4801:21da3bba6e70
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 14:44:26 2013 -0500
description:
common: lower search range for higher presets with max CTU size 32
Subject: [x265] asm: disable sad_x3[LUMA_32xN], they cause crashes on Haswell

details:   http://hg.videolan.org/x265/rev/51660f092aa4
branches:  
changeset: 4802:51660f092aa4
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 14:57:56 2013 -0500
description:
asm: disable sad_x3[LUMA_32xN], they cause crashes on Haswell

Seen crashes on Windows, Mac, and Linux.  The only constant is Haswell.
Subject: [x265] asm: disable more sad_x3 functions which cause crashes on Haswell

details:   http://hg.videolan.org/x265/rev/885e41fac726
branches:  
changeset: 4803:885e41fac726
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 15:13:39 2013 -0500
description:
asm: disable more sad_x3 functions which cause crashes on Haswell
Subject: [x265] api: add zero-latency tune target

details:   http://hg.videolan.org/x265/rev/8afb161419df
branches:  
changeset: 4804:8afb161419df
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 16:14:13 2013 -0500
description:
api: add zero-latency tune target

This just disables lookahead and B frames at the moment.
Subject: [x265] primitives: fix compile warning exposed when ASM and instrincs are both disabled

details:   http://hg.videolan.org/x265/rev/ad6d6ddd7037
branches:  stable
changeset: 4805:ad6d6ddd7037
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 16:59:59 2013 -0500
description:
primitives: fix compile warning exposed when ASM and instrincs are both disabled
Subject: [x265] common: disable MSVC warning that is exposed when compiling without primitives

details:   http://hg.videolan.org/x265/rev/01e77fde7194
branches:  stable
changeset: 4806:01e77fde7194
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 16:53:46 2013 -0500
description:
common: disable MSVC warning that is exposed when compiling without primitives
Subject: [x265] cpu: move ASM fallback functions out of vec-primitives.cpp

details:   http://hg.videolan.org/x265/rev/e1dde58cf6e1
branches:  stable
changeset: 4807:e1dde58cf6e1
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 16:57:05 2013 -0500
description:
cpu: move ASM fallback functions out of vec-primitives.cpp

This fixes link errors when ASM and vector primitives are both disabled
Subject: [x265] api: give structs the same name as their typedef

details:   http://hg.videolan.org/x265/rev/30a0c2c5fcbd
branches:  stable
changeset: 4808:30a0c2c5fcbd
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 16:47:46 2013 -0500
description:
api: give structs the same name as their typedef

This allows them to be forward-decl'd.
Subject: [x265] Merge with stable

details:   http://hg.videolan.org/x265/rev/a4e9f242fdf3
branches:  
changeset: 4809:a4e9f242fdf3
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 17:05:53 2013 -0500
description:
Merge with stable
Subject: [x265] common: fix int typecast to operate on results of float expression

details:   http://hg.videolan.org/x265/rev/ae576a38ca5b
branches:  
changeset: 4810:ae576a38ca5b
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 16:52:00 2013 -0500
description:
common: fix int typecast to operate on results of float expression
Subject: [x265] cli: tweaks for command line help

details:   http://hg.videolan.org/x265/rev/bd53cb226710
branches:  
changeset: 4811:bd53cb226710
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 31 17:33:31 2013 -0500
description:
cli: tweaks for command line help

diffstat:

 source/CMakeLists.txt                |    2 +-
 source/common/common.cpp             |  174 ++++++++++++++++++++++++++++++++++-
 source/common/cpu.cpp                |   49 +++++++++
 source/common/vec/pixel-sse41.cpp    |    2 +
 source/common/vec/vec-primitives.cpp |   49 ---------
 source/common/x86/asm-primitives.cpp |   13 +-
 source/x265.cpp                      |   36 ++++++-
 source/x265.def.in                   |    1 +
 source/x265.h                        |   39 ++++++-
 9 files changed, 295 insertions(+), 70 deletions(-)

diffs (truncated from 593 to 300 lines):

diff -r e842b2a4aeeb -r bd53cb226710 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Thu Oct 31 13:19:33 2013 -0500
+++ b/source/CMakeLists.txt	Thu Oct 31 17:33:31 2013 -0500
@@ -12,7 +12,7 @@ include(CheckIncludeFiles)
 include(CheckFunctionExists)
 
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 2)
+set(X265_BUILD 3)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r e842b2a4aeeb -r bd53cb226710 source/common/common.cpp
--- a/source/common/common.cpp	Thu Oct 31 13:19:33 2013 -0500
+++ b/source/common/common.cpp	Thu Oct 31 17:33:31 2013 -0500
@@ -95,7 +95,7 @@ void x265_free(void *ptr)
  * qp to qscale. */
 int x265_exp2fix8(double x)
 {
-    int i =(int) x * (-64.f / 6.f) + 512.5f;
+    int i = (int)(x * (-64.f / 6.f) + 512.5f);
     if (i < 0) return 0;
     if (i > 1023) return 0xffff;
     return (x265_exp2_lut[i & 63] + 256) << (i >> 6) >> 8;
@@ -238,6 +238,175 @@ int x265_param_apply_profile(x265_param 
     return 0;
 }
 
+extern "C"
+int x265_param_default_preset(x265_param *param, const char *preset, const char *tune)
+{
+    x265_param_default(param);
+
+    if (preset)
+    {
+        char *end;
+        int i = strtol(preset, &end, 10);
+        if (*end == 0 && i >= 0 && i < (int)(sizeof(x265_preset_names)/sizeof(*x265_preset_names)-1))
+            preset = x265_preset_names[i];
+
+        if (!strcmp(preset, "ultrafast"))
+        {
+            param->maxCUSize = 32;
+            param->searchRange = 24;
+            param->bFrameAdaptive = 0;
+            param->bframes = 4;
+            param->tuQTMaxInterDepth = 1;
+            param->tuQTMaxIntraDepth = 1;
+            param->rdLevel = 0;
+            param->subpelRefine = 0;
+            param->maxNumMergeCand = 1;
+            param->searchMethod = 0;
+            param->bEnableRectInter = 0;
+            param->bEnableAMP = 0;
+            param->bEnableTransformSkip = 0;
+            param->bEnableEarlySkip = 1;
+            param->bEnableCbfFastMode = 1;
+            param->bEnableLoopFilter = 0;
+            param->bEnableSAO = 0;
+            param->bEnableSignHiding = 0;
+            param->bEnableWeightedPred = 0;
+        }
+        else if (!strcmp(preset, "superfast"))
+        {
+            param->maxCUSize = 32;
+            param->searchRange = 24;
+            param->bFrameAdaptive = 0;
+            param->bframes = 4;
+            param->tuQTMaxInterDepth = 1;
+            param->tuQTMaxIntraDepth = 1;
+            param->rdLevel = 0;
+            param->subpelRefine = 1;
+            param->maxNumMergeCand = 1;
+            param->searchMethod = 1;
+            param->bEnableRectInter = 0;
+            param->bEnableAMP = 0;
+            param->bEnableTransformSkip = 0;
+            param->bEnableEarlySkip = 1;
+            param->bEnableCbfFastMode = 1;
+            param->bEnableSAO = 0;
+            param->bEnableSignHiding = 0;
+            param->bEnableWeightedPred = 0;
+        }
+        else if (!strcmp(preset, "veryfast"))
+        {
+            param->bFrameAdaptive = 0;
+            param->bframes = 4;
+            param->tuQTMaxInterDepth = 1;
+            param->tuQTMaxIntraDepth = 1;
+            param->rdLevel = 0;
+            param->subpelRefine = 1;
+            param->searchMethod = 1;
+            param->maxNumMergeCand = 2;
+            param->bEnableRectInter = 0;
+            param->bEnableAMP = 0;
+            param->bEnableTransformSkip = 0;
+            param->bEnableEarlySkip = 1;
+            param->bEnableCbfFastMode = 1;
+        }
+        else if (!strcmp(preset, "faster"))
+        {
+            param->bFrameAdaptive = 0;
+            param->bframes = 4;
+            param->tuQTMaxInterDepth = 1;
+            param->tuQTMaxIntraDepth = 1;
+            param->rdLevel = 0;
+            param->subpelRefine = 1;
+            param->searchMethod = 1;
+            param->maxNumMergeCand = 2;
+            param->bEnableRectInter = 0;
+            param->bEnableAMP = 0;
+            param->bEnableTransformSkip = 0;
+            param->bEnableEarlySkip = 1;
+        }
+        else if (!strcmp(preset, "fast"))
+        {
+            param->tuQTMaxInterDepth = 1;
+            param->tuQTMaxIntraDepth = 1;
+            param->rdLevel = 0;
+            param->subpelRefine = 1;
+            param->searchMethod = 1;
+            param->maxNumMergeCand = 2;
+            param->bEnableAMP = 0;
+            param->bEnableTransformSkip = 0;
+        }
+        else if (!strcmp(preset, "medium"))
+        {
+            param->tuQTMaxInterDepth = 1;
+            param->tuQTMaxIntraDepth = 1;
+            param->rdLevel = 0;
+            param->maxNumMergeCand = 3;
+            param->bEnableTransformSkip = 0;
+        }
+        else if (!strcmp(preset, "slow"))
+        {
+            param->bFrameAdaptive = 2;
+            param->bframes = 4;
+            param->tuQTMaxInterDepth = 1;
+            param->tuQTMaxIntraDepth = 1;
+            param->rdLevel = 1;
+            param->maxNumMergeCand = 3;
+            param->bEnableTransformSkip = 0;
+        }
+        else if (!strcmp(preset, "slower"))
+        {
+            param->bFrameAdaptive = 2;
+            param->lookaheadDepth = 20;
+            param->bframes = 5;
+            param->tuQTMaxInterDepth = 2;
+            param->tuQTMaxIntraDepth = 2;
+            param->rdLevel = 2;
+            param->maxNumMergeCand = 4;
+            param->bEnableTransformSkip = 0;
+            param->maxNumReferences = 3;
+        }
+        else if (!strcmp(preset, "veryslow"))
+        {
+            param->bFrameAdaptive = 2;
+            param->lookaheadDepth = 30;
+            param->bframes = 9;
+            param->maxNumReferences = 5;
+        }
+        else if (!strcmp(preset, "placebo"))
+        {
+            param->bFrameAdaptive = 2;
+            param->lookaheadDepth = 60;
+            param->bframes = 16;
+            param->maxNumReferences = 16;
+            param->searchRange = 124;
+            // TODO: optimized esa
+        }
+        else
+            return -1;
+    }
+    if (tune)
+    {
+        if (!strcmp(tune, "psnr"))
+        {
+            // nop; currently the default
+        }
+        else if (!strcmp(tune, "ssim"))
+        {
+            // not yet supported
+        }
+        else if (!strcmp(tune, "zero-latency"))
+        {
+            param->bFrameAdaptive = 0;
+            param->bframes = 0;
+            param->lookaheadDepth = 0;
+        }
+        else
+            return -1;
+    }
+
+    return 0;
+}
+
 static inline int _confirm(x265_param *param, bool bflag, const char* message)
 {
     if (!bflag)
@@ -474,6 +643,9 @@ int x265_param_parse(x265_param *p, cons
 
     valuewasnull = !value;
 
+#if defined(_MSC_VER)
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif
 #define OPT(STR) else if (!strcmp(name, STR))
     if (0) ;
     OPT("fps")
diff -r e842b2a4aeeb -r bd53cb226710 source/common/cpu.cpp
--- a/source/common/cpu.cpp	Thu Oct 31 13:19:33 2013 -0500
+++ b/source/common/cpu.cpp	Thu Oct 31 17:33:31 2013 -0500
@@ -289,3 +289,52 @@ uint32_t cpu_detect(void)
     return cpu;
 }
 }
+
+#if !ENABLE_ASM_PRIMITIVES
+#include <intrin.h>
+extern "C" {
+int x265_cpu_cpuid_test(void)
+{
+    return 0;
+}
+
+void x265_cpu_cpuid(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+    int output[4];
+
+    __cpuidex(output, op, 0);
+    *eax = output[0];
+    *ebx = output[1];
+    *ecx = output[2];
+    *edx = output[3];
+}
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4100)
+#endif
+void x265_cpu_xgetbv(uint32_t op, uint32_t *eax, uint32_t *edx)
+{
+#if (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
+
+    // MSVC 2010 SP1 or later, or similar Intel release
+    uint64_t out = _xgetbv(op);
+
+#elif defined(__GNUC__)    // use inline assembly, Gnu/AT&T syntax
+
+    uint32_t a, d;
+    __asm("xgetbv" : "=a" (a), "=d" (d) : "c" (ctr) :);
+    *eax = a;
+    *edx = d;
+    return;
+
+#elif defined(_WIN64)      // On x64 with older compilers, this is impossible
+
+    uint64_t out = 0;
+
+#endif
+
+    *eax = (uint32_t)out;
+    *edx = (uint32_t)(out >> 32);
+}
+}
+#endif // if !ENABLE_ASM_PRIMITIVES
diff -r e842b2a4aeeb -r bd53cb226710 source/common/vec/pixel-sse41.cpp
--- a/source/common/vec/pixel-sse41.cpp	Thu Oct 31 13:19:33 2013 -0500
+++ b/source/common/vec/pixel-sse41.cpp	Thu Oct 31 17:33:31 2013 -0500
@@ -2715,7 +2715,9 @@ void Setup_Vec_PixelPrimitives_sse41(Enc
     SETUP_NONSAD(16, 4);
     SETUP_NONSAD(16, 12);
     SETUP_NONSAD(4, 16); // 4x16 SAD covered by assembly
+#if !defined(__clang__)
     SETUP_NONSAD(12, 16);
+#endif
 
     SETUP_NONSAD(8, 8); // 8x8 SAD covered by assembly
     SETUP_NONSAD(8, 4); // 8x4 SAD covered by assembly
diff -r e842b2a4aeeb -r bd53cb226710 source/common/vec/vec-primitives.cpp
--- a/source/common/vec/vec-primitives.cpp	Thu Oct 31 13:19:33 2013 -0500
+++ b/source/common/vec/vec-primitives.cpp	Thu Oct 31 17:33:31 2013 -0500
@@ -24,55 +24,6 @@
 #include "primitives.h"
 #include "x265.h"
 
-#if !ENABLE_ASM_PRIMITIVES
-#include <intrin.h>
-extern "C" {
-int x265_cpu_cpuid_test(void)
-{
-    return 0;
-}
-
-void x265_cpu_cpuid(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
-{
-    int output[4];
-
-    __cpuidex(output, op, 0);
-    *eax = output[0];


More information about the x265-commits mailing list