[x265-commits] [x265] weightp: add math include for POSIX systems

Sat Feb 1 22:17:32 CET 2014

details:   http://hg.videolan.org/x265/rev/389328343ccd
branches:  
changeset: 5963:389328343ccd
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 13:57:03 2014 -0600
description:
weightp: add math include for POSIX systems
Subject: [x265] cmake: improve handling of unknown system processor

details:   http://hg.videolan.org/x265/rev/7f1d29a897c1
branches:  
changeset: 5964:7f1d29a897c1
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 14:01:54 2014 -0600
description:
cmake: improve handling of unknown system processor
Subject: [x265] cmake: add two more system processor names that are synonyms of x86

details:   http://hg.videolan.org/x265/rev/68f2d08654b9
branches:  
changeset: 5965:68f2d08654b9
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 14:12:57 2014 -0600
description:
cmake: add two more system processor names that are synonyms of x86
Subject: [x265] cpu: port ARM cpu detection code from x264

details:   http://hg.videolan.org/x265/rev/0e734b111b1e
branches:  
changeset: 5966:0e734b111b1e
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 14:47:45 2014 -0600
description:
cpu: port ARM cpu detection code from x264
Subject: [x265] cmake: reorg main file for readability, no behavior changes

details:   http://hg.videolan.org/x265/rev/2812a45ace5c
branches:  
changeset: 5967:2812a45ace5c
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 14:48:18 2014 -0600
description:
cmake: reorg main file for readability, no behavior changes
Subject: [x265] cmake: only allow assembly to be enabled for X86, our only asm platform

details:   http://hg.videolan.org/x265/rev/2a7ff626383d
branches:  
changeset: 5968:2a7ff626383d
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 15:09:30 2014 -0600
description:
cmake: only allow assembly to be enabled for X86, our only asm platform
Subject: [x265] cmake: use strlower on CMAKE_SYSTEM_PROCESSOR

details:   http://hg.videolan.org/x265/rev/9e2b076968e1
branches:  
changeset: 5969:9e2b076968e1
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 15:10:02 2014 -0600
description:
cmake: use strlower on CMAKE_SYSTEM_PROCESSOR
Subject: [x265] cmake: fix warning

details:   http://hg.videolan.org/x265/rev/737ceb148a27
branches:  
changeset: 5970:737ceb148a27
user:      Steve Borho <steve at borho.org>
date:      Sat Feb 01 15:10:44 2014 -0600
description:
cmake: fix warning
Subject: [x265] reduce unused context models

details:   http://hg.videolan.org/x265/rev/33929c36a646
branches:  
changeset: 5971:33929c36a646
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Wed Jan 29 23:28:15 2014 +0900
description:
reduce unused context models

diffstat:

 source/CMakeLists.txt                 |  51 ++++++++++++-------
 source/Lib/TLibCommon/ContextTables.h |  12 ++--
 source/Lib/TLibCommon/TComDataCU.cpp  |  13 -----
 source/Lib/TLibCommon/TComDataCU.h    |   2 +-
 source/Lib/TLibCommon/TComTrQuant.cpp |   1 -
 source/Lib/TLibCommon/TComTrQuant.h   |   2 +-
 source/Lib/TLibEncoder/TEncSbac.cpp   |  10 +-
 source/common/cpu.cpp                 |  90 ++++++++++++++++++++++++++++++++--
 source/common/primitives.cpp          |   3 -
 source/encoder/weightPrediction.cpp   |   1 +
 source/x265.h                         |   6 ++
 11 files changed, 136 insertions(+), 55 deletions(-)

diffs (truncated from 413 to 300 lines):

diff -r 413ad959a5c6 -r 33929c36a646 source/CMakeLists.txt

--- a/source/CMakeLists.txt	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/CMakeLists.txt	Wed Jan 29 23:28:15 2014 +0900
@@ -21,15 +21,34 @@ configure_file("${PROJECT_SOURCE_DIR}/x2
 
 SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
 
-if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
+# System architecture detection
+string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC)
+if(${SYSPROC} STREQUAL "i386" OR ${SYSPROC} STREQUAL "amd64" OR
+   ${SYSPROC} STREQUAL "x86_64" OR ${SYSPROC} STREQUAL "x86")
+    message(STATUS "Detected x86 system processor")
     set(X86 1)
     add_definitions(-DX265_ARCH_X86=1)
     if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
         set(X64 1)
         add_definitions(-DX86_64=1)
     endif()
+elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm")
+    message(STATUS "Detected ARM system processor")
+    set(ARM 1)
+    add_definitions(-DX265_ARCH_ARM=1)
+else()
+    message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
+    message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
 endif()
 
+if(UNIX)
+    SET(PLATFORM_LIBS pthread)
+    if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+        SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
+    endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+endif(UNIX)
+
+# Compiler detection
 if(CMAKE_GENERATOR STREQUAL "Xcode")
   set(XCODE 1)
 endif()
@@ -98,6 +117,18 @@ if (GCC)
     endif(X64 AND NOT WIN32)
 endif(GCC)
 
+find_package(Yasm)
+if(YASM_FOUND AND X86)
+    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
+        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
+        option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" OFF)
+    else()
+        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
+        option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
+    endif()
+endif()
+
+# Build options
 option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
 if(HIGH_BIT_DEPTH)
     add_definitions(-DHIGH_BIT_DEPTH=1)
@@ -112,24 +143,6 @@ else(LOG_CU_STATISTICS)
     add_definitions(-DLOG_CU_STATISTICS=0)
 endif(LOG_CU_STATISTICS)
 
-find_package(Yasm)
-if(YASM_FOUND)
-    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
-        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
-        option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" OFF)
-    else()
-        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
-        option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
-    endif()
-endif(YASM_FOUND)
-
-if(UNIX)
-    SET(PLATFORM_LIBS pthread)
-    if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-        SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
-    endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-endif(UNIX)
-
 option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
 if(ENABLE_PPA)
     add_definitions(-DENABLE_PPA)
diff -r 413ad959a5c6 -r 33929c36a646 source/Lib/TLibCommon/ContextTables.h
--- a/source/Lib/TLibCommon/ContextTables.h	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/Lib/TLibCommon/ContextTables.h	Wed Jan 29 23:28:15 2014 +0900
@@ -67,7 +67,7 @@
 
 #define NUM_REF_NO_CTX                2       ///< number of context models for reference index
 #define NUM_TRANS_SUBDIV_FLAG_CTX     3       ///< number of context models for transform subdivision flags
-#define NUM_QT_CBF_CTX                4       ///< number of context models for QT CBF
+#define NUM_QT_CBF_CTX                6       ///< number of context models for QT CBF
 #define NUM_QT_ROOT_CBF_CTX           1       ///< number of context models for QT ROOT CBF
 #define NUM_DELTA_QP_CTX              3       ///< number of context models for dQP
 
@@ -111,7 +111,7 @@
 #define OFF_REF_NO_CTX                      (OFF_INTER_DIR_CTX          +     NUM_INTER_DIR_CTX)
 #define OFF_MV_RES_CTX                      (OFF_REF_NO_CTX             +     NUM_REF_NO_CTX)
 #define OFF_QT_CBF_CTX                      (OFF_MV_RES_CTX             +     NUM_MV_RES_CTX)
-#define OFF_TRANS_SUBDIV_FLAG_CTX           (OFF_QT_CBF_CTX             + 2 * NUM_QT_CBF_CTX)
+#define OFF_TRANS_SUBDIV_FLAG_CTX           (OFF_QT_CBF_CTX             +     NUM_QT_CBF_CTX)
 #define OFF_QT_ROOT_CBF_CTX                 (OFF_TRANS_SUBDIV_FLAG_CTX  +     NUM_TRANS_SUBDIV_FLAG_CTX)
 #define OFF_SIG_CG_FLAG_CTX                 (OFF_QT_ROOT_CBF_CTX        +     NUM_QT_ROOT_CBF_CTX)
 #define OFF_SIG_FLAG_CTX                    (OFF_SIG_CG_FLAG_CTX        + 2 * NUM_SIG_CG_FLAG_CTX)
@@ -264,11 +264,11 @@ static const uint8_t
 };
 
 static const uint8_t
-    INIT_QT_CBF[3][2 * NUM_QT_CBF_CTX] =
+    INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
 {
-    { 153,  111,  CNU,  CNU,  149,   92,  167,  154, },
-    { 153,  111,  CNU,  CNU,  149,  107,  167,  154, },
-    { 111,  141,  CNU,  CNU,   94,  138,  182,  154, },
+    { 153,  111,  149,   92,  167,  154, },
+    { 153,  111,  149,  107,  167,  154, },
+    { 111,  141,   94,  138,  182,  154, },
 };
 
 static const uint8_t
diff -r 413ad959a5c6 -r 33929c36a646 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Wed Jan 29 23:28:15 2014 +0900
@@ -1240,19 +1240,6 @@ uint32_t TComDataCU::getCtxSplitFlag(uin
     return ctx;
 }
 
-uint32_t TComDataCU::getCtxQtCbf(TextType ttype, uint32_t trDepth)
-{
-    if (ttype)
-    {
-        return trDepth;
-    }
-    else
-    {
-        const uint32_t ctx = (trDepth == 0 ? 1 : 0);
-        return ctx;
-    }
-}
-
 uint32_t TComDataCU::getQuadtreeTULog2MinSizeInCU(uint32_t absPartIdx)
 {
     uint32_t log2CbSize = g_convertToBit[getWidth(absPartIdx)] + 2;
diff -r 413ad959a5c6 -r 33929c36a646 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.h	Wed Jan 29 23:28:15 2014 +0900
@@ -467,7 +467,7 @@ public:
     // -------------------------------------------------------------------------------------------------------------------
 
     uint32_t      getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth);
-    uint32_t      getCtxQtCbf(TextType ttype, uint32_t trDepth);
+    uint32_t      getCtxQtCbf(TextType ttype, uint32_t trDepth) { return ttype == TEXT_LUMA ? (trDepth == 0 ? 1 : 0) : trDepth + 2; }
 
     uint32_t      getCtxSkipFlag(uint32_t absPartIdx);
     uint32_t      getCtxInterDir(uint32_t absPartIdx);
diff -r 413ad959a5c6 -r 33929c36a646 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Wed Jan 29 23:28:15 2014 +0900
@@ -789,7 +789,6 @@ uint32_t TComTrQuant::xRateDistOptQuant(
     else
     {
         ctxCbf    = cu->getCtxQtCbf(ttype, cu->getTransformIdx(absPartIdx));
-        ctxCbf    = (ttype ? NUM_QT_CBF_CTX : 0) + ctxCbf;
         bestCost  = blockUncodedCost + xGetICost(m_estBitsSbac->blockCbpBits[ctxCbf][0]);
         baseCost += xGetICost(m_estBitsSbac->blockCbpBits[ctxCbf][1]);
     }
diff -r 413ad959a5c6 -r 33929c36a646 source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/Lib/TLibCommon/TComTrQuant.h	Wed Jan 29 23:28:15 2014 +0900
@@ -68,7 +68,7 @@ typedef struct
     int greaterOneBits[NUM_ONE_FLAG_CTX][2];
     int levelAbsBits[NUM_ABS_FLAG_CTX][2];
 
-    int blockCbpBits[2 * NUM_QT_CBF_CTX][2];
+    int blockCbpBits[NUM_QT_CBF_CTX][2];
     int blockRootCbpBits[4][2];
 } estBitsSbacStruct;
 
diff -r 413ad959a5c6 -r 33929c36a646 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Wed Jan 29 23:28:15 2014 +0900
@@ -222,7 +222,7 @@ void TEncSbac::resetEntropy()
     initBuffer(&m_contextModels[OFF_INTER_DIR_CTX], sliceType, qp, (UChar*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
     initBuffer(&m_contextModels[OFF_REF_NO_CTX], sliceType, qp, (UChar*)INIT_REF_PIC, NUM_REF_NO_CTX);
     initBuffer(&m_contextModels[OFF_MV_RES_CTX], sliceType, qp, (UChar*)INIT_MVD, NUM_MV_RES_CTX);
-    initBuffer(&m_contextModels[OFF_QT_CBF_CTX], sliceType, qp, (UChar*)INIT_QT_CBF, 2 * NUM_QT_CBF_CTX);
+    initBuffer(&m_contextModels[OFF_QT_CBF_CTX], sliceType, qp, (UChar*)INIT_QT_CBF, NUM_QT_CBF_CTX);
     initBuffer(&m_contextModels[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (UChar*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
     initBuffer(&m_contextModels[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (UChar*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
     initBuffer(&m_contextModels[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (UChar*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
@@ -273,7 +273,7 @@ void TEncSbac::determineCabacInitIdx()
             curCost += calcCost(&m_contextModels[OFF_INTER_DIR_CTX], curSliceType, qp, (UChar*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
             curCost += calcCost(&m_contextModels[OFF_REF_NO_CTX], curSliceType, qp, (UChar*)INIT_REF_PIC, NUM_REF_NO_CTX);
             curCost += calcCost(&m_contextModels[OFF_MV_RES_CTX], curSliceType, qp, (UChar*)INIT_MVD, NUM_MV_RES_CTX);
-            curCost += calcCost(&m_contextModels[OFF_QT_CBF_CTX], curSliceType, qp, (UChar*)INIT_QT_CBF, 2 * NUM_QT_CBF_CTX);
+            curCost += calcCost(&m_contextModels[OFF_QT_CBF_CTX], curSliceType, qp, (UChar*)INIT_QT_CBF, NUM_QT_CBF_CTX);
             curCost += calcCost(&m_contextModels[OFF_TRANS_SUBDIV_FLAG_CTX], curSliceType, qp, (UChar*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
             curCost += calcCost(&m_contextModels[OFF_QT_ROOT_CBF_CTX], curSliceType, qp, (UChar*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
             curCost += calcCost(&m_contextModels[OFF_SIG_CG_FLAG_CTX], curSliceType, qp, (UChar*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
@@ -1895,7 +1895,7 @@ void TEncSbac::codeQtCbf(TComDataCU* cu,
     uint32_t cbf = cu->getCbf(absPartIdx, ttype, trDepth);
     uint32_t ctx = cu->getCtxQtCbf(ttype, trDepth);
 
-    m_binIf->encodeBin(cbf, m_contextModels[OFF_QT_CBF_CTX + (ttype ? NUM_QT_CBF_CTX : 0) + ctx]);
+    m_binIf->encodeBin(cbf, m_contextModels[OFF_QT_CBF_CTX + ctx]);
     DTRACE_CABAC_VL(g_nSymbolCounter++)
     DTRACE_CABAC_T("\tparseQtCbf()")
     DTRACE_CABAC_T("\tsymbol=")
@@ -2040,7 +2040,7 @@ void TEncSbac::codeQtCbfZero(TComDataCU*
     uint32_t cbf = 0;
     uint32_t ctx = cu->getCtxQtCbf(ttype, trDepth);
 
-    m_binIf->encodeBin(cbf, m_contextModels[OFF_QT_CBF_CTX + (ttype ? NUM_QT_CBF_CTX : 0) + ctx]);
+    m_binIf->encodeBin(cbf, m_contextModels[OFF_QT_CBF_CTX + ctx]);
 }
 
 void TEncSbac::codeQtRootCbfZero(TComDataCU*)
@@ -2457,7 +2457,7 @@ void TEncSbac::estCBFBit(estBitsSbacStru
 {
     ContextModel *ctx = &m_contextModels[OFF_QT_CBF_CTX];
 
-    for (uint32_t ctxInc = 0; ctxInc < 2 * NUM_QT_CBF_CTX; ctxInc++)
+    for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
     {
         estBitsSbac->blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc].m_state, 0);
         estBitsSbac->blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc].m_state, 1);
diff -r 413ad959a5c6 -r 33929c36a646 source/common/cpu.cpp
--- a/source/common/cpu.cpp	Fri Jan 31 16:53:36 2014 -0600
+++ b/source/common/cpu.cpp	Wed Jan 29 23:28:15 2014 +0900
@@ -40,10 +40,29 @@
 #include <machine/cpu.h>
 #endif
 
+#if X265_ARCH_ARM
+#include <signal.h>
+#include <setjmp.h>
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void sigill_handler(int sig)
+{
+    if (!canjump)
+    {
+        signal(sig, SIG_DFL);
+        raise(sig);
+    }
+
+    canjump = 0;
+    siglongjmp(jmpbuf, 1);
+}
+#endif
+
 namespace x265 {
-#if X265_ARCH_X86
 const cpu_name_t cpu_names[] =
 {
+#if X265_ARCH_X86
 #define MMX2 X265_CPU_MMX | X265_CPU_MMX2 | X265_CPU_CMOV
     { "MMX2",        MMX2 },
     { "MMXEXT",      MMX2 },
@@ -77,9 +96,17 @@ const cpu_name_t cpu_names[] =
     { "SlowPalignr",     X265_CPU_SLOW_PALIGNR },
     { "SlowShuffle",     X265_CPU_SLOW_SHUFFLE },
     { "UnalignedStack",  X265_CPU_STACK_MOD4 },
+
+#elif X265_ARCH_ARM
+    {"ARMv6",           X265_CPU_ARMV6},
+    {"NEON",            X265_CPU_NEON},
+    {"FastNeonMRC",     X265_CPU_FAST_NEON_MRC},
+#endif
     { "", 0 },
 };
 
+#if X265_ARCH_X86
+
 extern "C" {
 /* cpu-a.asm */
 int x265_cpu_cpuid_test(void);
@@ -94,10 +121,10 @@ void x265_cpu_xgetbv(uint32_t op, uint32
 uint32_t cpu_detect(void)
 {
     uint32_t cpu = 0;
+
     uint32_t eax, ebx, ecx, edx;
     uint32_t vendor[4] = { 0 };
     uint32_t max_extended_cap, max_basic_cap;
-    int cache;
 
 #if !X86_64
     if (!x265_cpu_cpuid_test())
@@ -240,7 +267,7 @@ uint32_t cpu_detect(void)
     {
         /* cacheline size is specified in 3 places, any of which may be missing */
         x265_cpu_cpuid(1, &eax, &ebx, &ecx, &edx);
-        cache = (ebx & 0xff00) >> 5; // cflush size
+        int cache = (ebx & 0xff00) >> 5; // cflush size
         if (!cache && max_extended_cap >= 0x80000006)
         {
             x265_cpu_cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
@@ -290,8 +317,59 @@ uint32_t cpu_detect(void)