[x265-commits] [x265] cmake: more asm simplifications

Steve Borho steve at borho.org
Mon Nov 4 06:47:45 CET 2013


details:   http://hg.videolan.org/x265/rev/ad8222ed1360
branches:  
changeset: 4818:ad8222ed1360
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 01 14:27:13 2013 -0500
description:
cmake: more asm simplifications
Subject: [x265] cmake: cleanup intrinc primitives

details:   http://hg.videolan.org/x265/rev/f81af999ef6c
branches:  
changeset: 4819:f81af999ef6c
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 01 14:43:11 2013 -0500
description:
cmake: cleanup intrinc primitives
Subject: [x265] rc: add CRF ratecontrol

details:   http://hg.videolan.org/x265/rev/c51c35880df5
branches:  
changeset: 4820:c51c35880df5
user:      idxa<idxa at sina.com>
date:      Wed Oct 30 16:59:47 2013 +0800
description:
rc: add CRF ratecontrol

1. add a parameter of "--crf" to the command line

2. modify the running branches of rateControlStart, using "if(isAbr)" instead of
"switch (cfg->param.rc.rateControlMode)", for the logic of classifying the
combination of multiple ratecontrol methods is very complex, it is not only
based on rateControlMode, so porting x264's way looks feasible.

3. add crf method into x265
Subject: [x265] fix vec/asm crash in COST_MV_X3_DIR: costs+3 is not aligned

details:   http://hg.videolan.org/x265/rev/8621008756ba
branches:  
changeset: 4821:8621008756ba
user:      Wenju He <wenju at multicorewareinc.com>
date:      Sat Nov 02 10:19:22 2013 -0500
description:
fix vec/asm crash in COST_MV_X3_DIR: costs+3 is not aligned

diffstat:

 source/common/CMakeLists.txt   |  80 ++++++++++++++++-------------------------
 source/common/common.cpp       |  14 ++++++-
 source/encoder/encoder.cpp     |   5 --
 source/encoder/motion.cpp      |   8 ++--
 source/encoder/ratecontrol.cpp |  80 +++++++++++++++++++++++++++--------------
 source/encoder/ratecontrol.h   |   3 +
 source/x265.cpp                |   2 +
 source/x265.h                  |   2 +-
 8 files changed, 107 insertions(+), 87 deletions(-)

diffs (truncated from 391 to 300 lines):

diff -r 0d79e31728a4 -r 8621008756ba source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt	Fri Nov 01 13:05:34 2013 -0500
+++ b/source/common/CMakeLists.txt	Sat Nov 02 10:19:22 2013 -0500
@@ -72,34 +72,29 @@ if(ENABLE_PRIMITIVES_VEC)
     if(HIGH_BIT_DEPTH)
         include_directories(../VectorClass)
     endif()
+    set(SSE3  vec/pixel-sse3.cpp  vec/dct-sse3.cpp  vec/blockcopy-sse3.cpp)
+    set(SSSE3 vec/pixel-ssse3.cpp vec/dct-ssse3.cpp vec/ipfilter-ssse3.cpp vec/intra-ssse3.cpp)
+    set(SSE41 vec/pixel-sse41.cpp vec/dct-sse41.cpp vec/ipfilter-sse41.cpp vec/intra-sse41.cpp vec/pixel16-sse41.cpp)
+    set(AVX2  vec/pixel-avx2.cpp)
+
     if (MSVC)
         add_definitions(/wd4127) # conditional expression is constant
         add_definitions(/wd4244) # 'argument' : conversion from 'int' to 'char', possible loss of data
+        set(PRIMITIVES ${SSE3} ${SSSE3} ${SSE41})
         if (INTEL_CXX)
-            add_definitions(/Qwd111)    # statement is unreachable
-            add_definitions(/Qwd128)    # loop is unreachable
-            add_definitions(/Qwd177)    # declared function is unused
-            add_definitions(/Qwd185)    # dynamic initialization in unreachable code
-            add_definitions(/Qwd280)    # conditional expression is constant
+            add_definitions(/Qwd111) # statement is unreachable
+            add_definitions(/Qwd128) # loop is unreachable
+            add_definitions(/Qwd177) # declared function is unused
+            add_definitions(/Qwd185) # dynamic initialization in unreachable code
+            add_definitions(/Qwd280) # conditional expression is constant
         endif()
-        set(PRIMITIVES vec/blockcopy-sse3.cpp
-            vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
-            vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
-            vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
-            vec/pixel16-sse41.cpp vec/intra-ssse3.cpp vec/intra-sse41.cpp)
         if (NOT X64)
             # x64 implies SSE4, so this flag would have no effect (and it issues a warning)
-            set_source_files_properties(vec/blockcopy-sse3.cpp
-                vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
-                vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
-                vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
-                vec/intra-ssse3.cpp vec/intra-sse41.cpp vec/pixel16-sse41.cpp
-                PROPERTIES COMPILE_FLAGS /arch:SSE2)
+            set_source_files_properties(${SSE3} ${SSSE3} ${SSE41} PROPERTIES COMPILE_FLAGS /arch:SSE2)
         endif()
         if (NOT (MSVC_VERSION LESS 1700) OR INTEL_CXX)
-            set(PRIMITIVES ${PRIMITIVES} vec/pixel-avx2.cpp)
-            set_source_files_properties(vec/pixel-avx2.cpp
-                PROPERTIES COMPILE_FLAGS /arch:AVX)
+            set(PRIMITIVES ${PRIMITIVES} ${AVX2})
+            set_source_files_properties(${AVX2} PROPERTIES COMPILE_FLAGS /arch:AVX)
         endif()
     endif()
     if(GCC)
@@ -110,26 +105,14 @@ if(ENABLE_PRIMITIVES_VEC)
             add_definitions(-Wno-shift-overflow -Wno-uninitialized)
         endif()
         if(INTEL_CXX OR CLANG OR (NOT GCC_VERSION VERSION_LESS 4.3))
-            set(PRIMITIVES vec/blockcopy-sse3.cpp
-                vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
-                vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
-                vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
-                vec/pixel16-sse41.cpp vec/intra-ssse3.cpp vec/intra-sse41.cpp)
-            set_source_files_properties(
-                vec/blockcopy-sse3.cpp vec/pixel-sse3.cpp vec/dct-sse3.cpp
-                PROPERTIES COMPILE_FLAGS "-msse3")
-            set_source_files_properties(
-                vec/ipfilter-ssse3.cpp vec/pixel-ssse3.cpp vec/dct-ssse3.cpp vec/intra-ssse3.cpp
-                PROPERTIES COMPILE_FLAGS "-mssse3")
-            set_source_files_properties(
-                vec/pixel-sse41.cpp vec/ipfilter-sse41.cpp vec/dct-sse41.cpp vec/intra-sse41.cpp
-                vec/pixel16-sse41.cpp
-                PROPERTIES COMPILE_FLAGS "-msse4.1")
+            set(PRIMITIVES ${SSE3} ${SSSE3} ${SSE41})
+            set_source_files_properties(${SSE3}  PROPERTIES COMPILE_FLAGS "-msse3")
+            set_source_files_properties(${SSSE3} PROPERTIES COMPILE_FLAGS "-mssse3")
+            set_source_files_properties(${SSE41} PROPERTIES COMPILE_FLAGS "-msse4.1")
         endif()
         if(INTEL_CXX OR CLANG OR (NOT GCC_VERSION VERSION_LESS 4.7))
-            set(PRIMITIVES ${PRIMITIVES} vec/pixel-avx2.cpp)
-            set_source_files_properties(vec/pixel-avx2.cpp
-                PROPERTIES COMPILE_FLAGS "-march=core-avx2")
+            set(PRIMITIVES ${PRIMITIVES} ${AVX2})
+            set_source_files_properties(${AVX2}  PROPERTIES COMPILE_FLAGS "-march=core-avx2")
         endif()
     endif(GCC)
 
@@ -138,24 +121,23 @@ if(ENABLE_PRIMITIVES_VEC)
 endif(ENABLE_PRIMITIVES_VEC)
 
 if(ENABLE_PRIMITIVES_ASM)
-    set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm)
-    if (X64)
-        add_definitions(-DARCH_X86_64=1)
-    else()
-        add_definitions(-DARCH_X86_64=0)
-        set(ASMS ${ASMS} pixel-32.asm)
+    set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h)
+    set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm)
+    if (NOT X64)
+        set(A_SRCS ${A_SRCS} pixel-32.asm)
     endif()
 
-    set(ASM_PRIMITIVES x86/asm-primitives.cpp x86/pixel.h x86/mc.h x86/ipfilter8.h)
     if(MSVC_IDE)
-        set(MSVC_ASMS "${ASMS}" CACHE INTERNAL "yasm sources")
-        source_group(Assembly FILES ${ASM_PRIMITIVES})
+        # MSVC requires custom build rules in the main cmake script for yasm
+        set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "yasm sources")
+        set(A_SRCS)
     else()
-        foreach(ASM ${ASMS})
-            set(ASM_PRIMITIVES ${ASM_PRIMITIVES} x86/${ASM})
-        endforeach()
         enable_language(ASM_YASM)
     endif()
+
+    foreach(SRC ${A_SRCS} ${C_SRCS})
+        set(ASM_PRIMITIVES ${ASM_PRIMITIVES} x86/${SRC})
+    endforeach()
 endif(ENABLE_PRIMITIVES_ASM)
 
 if(GCC)
diff -r 0d79e31728a4 -r 8621008756ba source/common/common.cpp
--- a/source/common/common.cpp	Fri Nov 01 13:05:34 2013 -0500
+++ b/source/common/common.cpp	Sat Nov 02 10:19:22 2013 -0500
@@ -185,6 +185,7 @@ void x265_param_default(x265_param *para
     param->saoLcuBasedOptimization = 1;
 
     /* Rate control options */
+    param->rc.rfConstant = 28;
     param->rc.bitrate = 0;
     param->rc.rateTolerance = 1.0;
     param->rc.qCompress = 0.6;
@@ -569,7 +570,7 @@ void x265_print_params(x265_param *param
         x265_log(param, X265_LOG_INFO, "Rate Control                 : CQP-%d\n", param->rc.qp);
         break;
     case X265_RC_CRF:
-        x265_log(param, X265_LOG_INFO, "Rate Control                 : CRF-%d\n", param->rc.rateFactor);
+        x265_log(param, X265_LOG_INFO, "Rate Control                 : CRF-%f\n", param->rc.rfConstant);
         break;
     }
 
@@ -710,10 +711,21 @@ int x265_param_parse(x265_param *p, cons
         p->maxNumReferences = atoi(value);
     OPT("weightp")
         p->bEnableWeightedPred = bvalue;
+    OPT("crf")
+    {
+        p->rc.rfConstant = atof(value);
+        p->rc.rateControlMode = X265_RC_CRF;
+    }
     OPT("bitrate")
+    {
         p->rc.bitrate = atoi(value);
+        p->rc.rateControlMode = X265_RC_ABR;
+    }
     OPT("qp")
+    {
         p->rc.qp = atoi(value);
+        p->rc.rateControlMode = X265_RC_CQP;
+    }
     OPT("cbqpoffs")
         p->cbQpOffset = atoi(value);
     OPT("crqpoffs")
diff -r 0d79e31728a4 -r 8621008756ba source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Nov 01 13:05:34 2013 -0500
+++ b/source/encoder/encoder.cpp	Sat Nov 02 10:19:22 2013 -0500
@@ -997,11 +997,6 @@ void Encoder::configure(x265_param *_par
     {
         _param->bEnableAMP = false;
     }
-    // if a bitrate is specified, chose ABR.  Else default to CQP
-    if (_param->rc.bitrate)
-    {
-        _param->rc.rateControlMode = X265_RC_ABR;
-    }
 
     if (!(_param->bEnableRDOQ && _param->bEnableTransformSkip))
     {
diff -r 0d79e31728a4 -r 8621008756ba source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Fri Nov 01 13:05:34 2013 -0500
+++ b/source/encoder/motion.cpp	Sat Nov 02 10:19:22 2013 -0500
@@ -405,14 +405,14 @@ me_hex2:
 #else // if 0
       /* equivalent to the above, but eliminates duplicate candidates */
         COST_MV_X3_DIR(-2, 0, -1, 2,  1, 2, costs);
-        COST_MV_X3_DIR(2, 0,  1, -2, -1, -2, costs + 3);
         bcost <<= 3;
         COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
         COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
         COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
-        COPY1_IF_LT(bcost, (costs[3] << 3) + 5);
-        COPY1_IF_LT(bcost, (costs[4] << 3) + 6);
-        COPY1_IF_LT(bcost, (costs[5] << 3) + 7);
+        COST_MV_X3_DIR(2, 0,  1, -2, -1, -2, costs);
+        COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
+        COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
+        COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
 
         if (bcost & 7)
         {
diff -r 0d79e31728a4 -r 8621008756ba source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Fri Nov 01 13:05:34 2013 -0500
+++ b/source/encoder/ratecontrol.cpp	Sat Nov 02 10:19:22 2013 -0500
@@ -115,9 +115,25 @@ void RateControl::calcAdaptiveQuantFrame
 RateControl::RateControl(TEncCfg * _cfg)
 {
     this->cfg = _cfg;
+    ncu = (int)((cfg->param.sourceHeight * cfg->param.sourceWidth) / pow((int)16, 2.0));
+
+    // validate for cfg->param.rc, maybe it is need to add a function like x265_parameters_valiate()
+    cfg->param.rc.rfConstant = Clip3((double)-QP_BD_OFFSET, (double)51, cfg->param.rc.rfConstant);
+    if (cfg->param.rc.rateControlMode == X265_RC_CRF)
+    {
+        cfg->param.rc.qp = (int)cfg->param.rc.rfConstant + QP_BD_OFFSET;
+        cfg->param.rc.bitrate = 0;
+
+        double baseCplx = ncu * (cfg->param.bframes ? 120 : 80);
+        double mbtree_offset = 0;//added later
+        rateFactorConstant = pow(baseCplx, 1 - cfg->param.rc.qCompress) /
+                             qp2qScale(cfg->param.rc.rfConstant + mbtree_offset + QP_BD_OFFSET);
+    }
+
+    isAbr = cfg->param.rc.rateControlMode != X265_RC_CQP; // later add 2pass option
+
     bitrate = cfg->param.rc.bitrate * 1000;
     frameDuration = 1.0 / cfg->param.frameRate;
-    ncu = (int)((cfg->param.sourceHeight * cfg->param.sourceWidth) / pow((int)16, 2.0));
     lastNonBPictType = -1;
     baseQp = cfg->param.rc.qp;
     qp = baseQp;
@@ -126,6 +142,7 @@ RateControl::RateControl(TEncCfg * _cfg)
     shortTermCplxSum = 0;
     shortTermCplxCount = 0;
     framesDone = 0;
+    lastNonBPictType = I_SLICE;
 
     if (cfg->param.rc.rateControlMode == X265_RC_ABR)
     {
@@ -137,8 +154,17 @@ RateControl::RateControl(TEncCfg * _cfg)
         /* estimated ratio that produces a reasonable QP for the first I-frame */
         cplxrSum = .01 * pow(7.0e5, cfg->param.rc.qCompress) * pow(ncu, 0.5);
         wantedBitsWindow = bitrate * frameDuration;
-        lastNonBPictType = I_SLICE;
     }
+    else if (cfg->param.rc.rateControlMode == X265_RC_CRF)
+    {
+#define ABR_INIT_QP ((int)cfg->param.rc.rfConstant + QP_BD_OFFSET)
+        accumPNorm = .01;
+        accumPQp = ABR_INIT_QP * accumPNorm;
+        /* estimated ratio that produces a reasonable QP for the first I-frame */
+        cplxrSum = .01 * pow(7.0e5, cfg->param.rc.qCompress) * pow(ncu, 0.5);
+        wantedBitsWindow = bitrate * frameDuration;
+    }
+
     ipOffset = 6.0 * X265_LOG2(cfg->param.rc.ipFactor);
     pbOffset = 6.0 * X265_LOG2(cfg->param.rc.pbFactor);
     for (int i = 0; i < 3; i++)
@@ -164,9 +190,8 @@ void RateControl::rateControlStart(TComP
     curSlice = pic->getSlice();
     sliceType = curSlice->getSliceType();
     rce->sliceType = sliceType;
-    switch (cfg->param.rc.rateControlMode)
-    {
-    case X265_RC_ABR:
+
+    if (isAbr) //ABR,CRF
     {
         lastSatd = l->getEstimatedPictureCost(pic);
         double q = qScale2qp(rateEstimateQscale(rce));
@@ -175,17 +200,10 @@ void RateControl::rateControlStart(TComP
         /* copy value of lastRceq into thread local rce struct *to be used in RateControlEnd() */
         rce->qRceq = lastRceq;
         accumPQpUpdate();
-        break;
     }
-
-    case X265_RC_CQP:
+    else //CQP
+    {
         qp = qpConstant[sliceType];
-        break;
-
-    case X265_RC_CRF:
-    default:
-        assert(!"unimplemented");
-        break;
     }
 
     if (sliceType != B_SLICE)
@@ -269,20 +287,28 @@ double RateControl::rateEstimateQscale(R
         rce->blurredComplexity = shortTermCplxSum / shortTermCplxCount;
         rce->mvBits = 0;
         rce->sliceType = sliceType;
-        q = getQScale(rce, wantedBitsWindow / cplxrSum);
 
-        /* ABR code can potentially be counterproductive in CBR, so just don't bother.
-         * Don't run it if the frame complexity is zero either. */
-        if (lastSatd)
+        if (cfg->param.rc.rateControlMode == X265_RC_CRF)
         {


More information about the x265-commits mailing list