[x265-commits] [x265] cmake: more asm simplifications
Steve Borho
steve at borho.org
Mon Nov 4 06:47:45 CET 2013
details: http://hg.videolan.org/x265/rev/ad8222ed1360
branches:
changeset: 4818:ad8222ed1360
user: Steve Borho <steve at borho.org>
date: Fri Nov 01 14:27:13 2013 -0500
description:
cmake: more asm simplifications
Subject: [x265] cmake: cleanup intrinc primitives
details: http://hg.videolan.org/x265/rev/f81af999ef6c
branches:
changeset: 4819:f81af999ef6c
user: Steve Borho <steve at borho.org>
date: Fri Nov 01 14:43:11 2013 -0500
description:
cmake: cleanup intrinc primitives
Subject: [x265] rc: add CRF ratecontrol
details: http://hg.videolan.org/x265/rev/c51c35880df5
branches:
changeset: 4820:c51c35880df5
user: idxa<idxa at sina.com>
date: Wed Oct 30 16:59:47 2013 +0800
description:
rc: add CRF ratecontrol
1. add a parameter of "--crf" to the command line
2. modify the running branches of rateControlStart, using "if(isAbr)" instead of
"switch (cfg->param.rc.rateControlMode)", for the logic of classifying the
combination of multiple ratecontrol methods is very complex, it is not only
based on rateControlMode, so porting x264's way looks feasible.
3. add crf method into x265
Subject: [x265] fix vec/asm crash in COST_MV_X3_DIR: costs+3 is not aligned
details: http://hg.videolan.org/x265/rev/8621008756ba
branches:
changeset: 4821:8621008756ba
user: Wenju He <wenju at multicorewareinc.com>
date: Sat Nov 02 10:19:22 2013 -0500
description:
fix vec/asm crash in COST_MV_X3_DIR: costs+3 is not aligned
diffstat:
source/common/CMakeLists.txt | 80 ++++++++++++++++-------------------------
source/common/common.cpp | 14 ++++++-
source/encoder/encoder.cpp | 5 --
source/encoder/motion.cpp | 8 ++--
source/encoder/ratecontrol.cpp | 80 +++++++++++++++++++++++++++--------------
source/encoder/ratecontrol.h | 3 +
source/x265.cpp | 2 +
source/x265.h | 2 +-
8 files changed, 107 insertions(+), 87 deletions(-)
diffs (truncated from 391 to 300 lines):
diff -r 0d79e31728a4 -r 8621008756ba source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Fri Nov 01 13:05:34 2013 -0500
+++ b/source/common/CMakeLists.txt Sat Nov 02 10:19:22 2013 -0500
@@ -72,34 +72,29 @@ if(ENABLE_PRIMITIVES_VEC)
if(HIGH_BIT_DEPTH)
include_directories(../VectorClass)
endif()
+ set(SSE3 vec/pixel-sse3.cpp vec/dct-sse3.cpp vec/blockcopy-sse3.cpp)
+ set(SSSE3 vec/pixel-ssse3.cpp vec/dct-ssse3.cpp vec/ipfilter-ssse3.cpp vec/intra-ssse3.cpp)
+ set(SSE41 vec/pixel-sse41.cpp vec/dct-sse41.cpp vec/ipfilter-sse41.cpp vec/intra-sse41.cpp vec/pixel16-sse41.cpp)
+ set(AVX2 vec/pixel-avx2.cpp)
+
if (MSVC)
add_definitions(/wd4127) # conditional expression is constant
add_definitions(/wd4244) # 'argument' : conversion from 'int' to 'char', possible loss of data
+ set(PRIMITIVES ${SSE3} ${SSSE3} ${SSE41})
if (INTEL_CXX)
- add_definitions(/Qwd111) # statement is unreachable
- add_definitions(/Qwd128) # loop is unreachable
- add_definitions(/Qwd177) # declared function is unused
- add_definitions(/Qwd185) # dynamic initialization in unreachable code
- add_definitions(/Qwd280) # conditional expression is constant
+ add_definitions(/Qwd111) # statement is unreachable
+ add_definitions(/Qwd128) # loop is unreachable
+ add_definitions(/Qwd177) # declared function is unused
+ add_definitions(/Qwd185) # dynamic initialization in unreachable code
+ add_definitions(/Qwd280) # conditional expression is constant
endif()
- set(PRIMITIVES vec/blockcopy-sse3.cpp
- vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
- vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
- vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
- vec/pixel16-sse41.cpp vec/intra-ssse3.cpp vec/intra-sse41.cpp)
if (NOT X64)
# x64 implies SSE4, so this flag would have no effect (and it issues a warning)
- set_source_files_properties(vec/blockcopy-sse3.cpp
- vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
- vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
- vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
- vec/intra-ssse3.cpp vec/intra-sse41.cpp vec/pixel16-sse41.cpp
- PROPERTIES COMPILE_FLAGS /arch:SSE2)
+ set_source_files_properties(${SSE3} ${SSSE3} ${SSE41} PROPERTIES COMPILE_FLAGS /arch:SSE2)
endif()
if (NOT (MSVC_VERSION LESS 1700) OR INTEL_CXX)
- set(PRIMITIVES ${PRIMITIVES} vec/pixel-avx2.cpp)
- set_source_files_properties(vec/pixel-avx2.cpp
- PROPERTIES COMPILE_FLAGS /arch:AVX)
+ set(PRIMITIVES ${PRIMITIVES} ${AVX2})
+ set_source_files_properties(${AVX2} PROPERTIES COMPILE_FLAGS /arch:AVX)
endif()
endif()
if(GCC)
@@ -110,26 +105,14 @@ if(ENABLE_PRIMITIVES_VEC)
add_definitions(-Wno-shift-overflow -Wno-uninitialized)
endif()
if(INTEL_CXX OR CLANG OR (NOT GCC_VERSION VERSION_LESS 4.3))
- set(PRIMITIVES vec/blockcopy-sse3.cpp
- vec/pixel-sse3.cpp vec/pixel-ssse3.cpp vec/pixel-sse41.cpp
- vec/ipfilter-ssse3.cpp vec/ipfilter-sse41.cpp
- vec/dct-sse3.cpp vec/dct-ssse3.cpp vec/dct-sse41.cpp
- vec/pixel16-sse41.cpp vec/intra-ssse3.cpp vec/intra-sse41.cpp)
- set_source_files_properties(
- vec/blockcopy-sse3.cpp vec/pixel-sse3.cpp vec/dct-sse3.cpp
- PROPERTIES COMPILE_FLAGS "-msse3")
- set_source_files_properties(
- vec/ipfilter-ssse3.cpp vec/pixel-ssse3.cpp vec/dct-ssse3.cpp vec/intra-ssse3.cpp
- PROPERTIES COMPILE_FLAGS "-mssse3")
- set_source_files_properties(
- vec/pixel-sse41.cpp vec/ipfilter-sse41.cpp vec/dct-sse41.cpp vec/intra-sse41.cpp
- vec/pixel16-sse41.cpp
- PROPERTIES COMPILE_FLAGS "-msse4.1")
+ set(PRIMITIVES ${SSE3} ${SSSE3} ${SSE41})
+ set_source_files_properties(${SSE3} PROPERTIES COMPILE_FLAGS "-msse3")
+ set_source_files_properties(${SSSE3} PROPERTIES COMPILE_FLAGS "-mssse3")
+ set_source_files_properties(${SSE41} PROPERTIES COMPILE_FLAGS "-msse4.1")
endif()
if(INTEL_CXX OR CLANG OR (NOT GCC_VERSION VERSION_LESS 4.7))
- set(PRIMITIVES ${PRIMITIVES} vec/pixel-avx2.cpp)
- set_source_files_properties(vec/pixel-avx2.cpp
- PROPERTIES COMPILE_FLAGS "-march=core-avx2")
+ set(PRIMITIVES ${PRIMITIVES} ${AVX2})
+ set_source_files_properties(${AVX2} PROPERTIES COMPILE_FLAGS "-march=core-avx2")
endif()
endif(GCC)
@@ -138,24 +121,23 @@ if(ENABLE_PRIMITIVES_VEC)
endif(ENABLE_PRIMITIVES_VEC)
if(ENABLE_PRIMITIVES_ASM)
- set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm)
- if (X64)
- add_definitions(-DARCH_X86_64=1)
- else()
- add_definitions(-DARCH_X86_64=0)
- set(ASMS ${ASMS} pixel-32.asm)
+ set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h)
+ set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm)
+ if (NOT X64)
+ set(A_SRCS ${A_SRCS} pixel-32.asm)
endif()
- set(ASM_PRIMITIVES x86/asm-primitives.cpp x86/pixel.h x86/mc.h x86/ipfilter8.h)
if(MSVC_IDE)
- set(MSVC_ASMS "${ASMS}" CACHE INTERNAL "yasm sources")
- source_group(Assembly FILES ${ASM_PRIMITIVES})
+ # MSVC requires custom build rules in the main cmake script for yasm
+ set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "yasm sources")
+ set(A_SRCS)
else()
- foreach(ASM ${ASMS})
- set(ASM_PRIMITIVES ${ASM_PRIMITIVES} x86/${ASM})
- endforeach()
enable_language(ASM_YASM)
endif()
+
+ foreach(SRC ${A_SRCS} ${C_SRCS})
+ set(ASM_PRIMITIVES ${ASM_PRIMITIVES} x86/${SRC})
+ endforeach()
endif(ENABLE_PRIMITIVES_ASM)
if(GCC)
diff -r 0d79e31728a4 -r 8621008756ba source/common/common.cpp
--- a/source/common/common.cpp Fri Nov 01 13:05:34 2013 -0500
+++ b/source/common/common.cpp Sat Nov 02 10:19:22 2013 -0500
@@ -185,6 +185,7 @@ void x265_param_default(x265_param *para
param->saoLcuBasedOptimization = 1;
/* Rate control options */
+ param->rc.rfConstant = 28;
param->rc.bitrate = 0;
param->rc.rateTolerance = 1.0;
param->rc.qCompress = 0.6;
@@ -569,7 +570,7 @@ void x265_print_params(x265_param *param
x265_log(param, X265_LOG_INFO, "Rate Control : CQP-%d\n", param->rc.qp);
break;
case X265_RC_CRF:
- x265_log(param, X265_LOG_INFO, "Rate Control : CRF-%d\n", param->rc.rateFactor);
+ x265_log(param, X265_LOG_INFO, "Rate Control : CRF-%f\n", param->rc.rfConstant);
break;
}
@@ -710,10 +711,21 @@ int x265_param_parse(x265_param *p, cons
p->maxNumReferences = atoi(value);
OPT("weightp")
p->bEnableWeightedPred = bvalue;
+ OPT("crf")
+ {
+ p->rc.rfConstant = atof(value);
+ p->rc.rateControlMode = X265_RC_CRF;
+ }
OPT("bitrate")
+ {
p->rc.bitrate = atoi(value);
+ p->rc.rateControlMode = X265_RC_ABR;
+ }
OPT("qp")
+ {
p->rc.qp = atoi(value);
+ p->rc.rateControlMode = X265_RC_CQP;
+ }
OPT("cbqpoffs")
p->cbQpOffset = atoi(value);
OPT("crqpoffs")
diff -r 0d79e31728a4 -r 8621008756ba source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Nov 01 13:05:34 2013 -0500
+++ b/source/encoder/encoder.cpp Sat Nov 02 10:19:22 2013 -0500
@@ -997,11 +997,6 @@ void Encoder::configure(x265_param *_par
{
_param->bEnableAMP = false;
}
- // if a bitrate is specified, chose ABR. Else default to CQP
- if (_param->rc.bitrate)
- {
- _param->rc.rateControlMode = X265_RC_ABR;
- }
if (!(_param->bEnableRDOQ && _param->bEnableTransformSkip))
{
diff -r 0d79e31728a4 -r 8621008756ba source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Fri Nov 01 13:05:34 2013 -0500
+++ b/source/encoder/motion.cpp Sat Nov 02 10:19:22 2013 -0500
@@ -405,14 +405,14 @@ me_hex2:
#else // if 0
/* equivalent to the above, but eliminates duplicate candidates */
COST_MV_X3_DIR(-2, 0, -1, 2, 1, 2, costs);
- COST_MV_X3_DIR(2, 0, 1, -2, -1, -2, costs + 3);
bcost <<= 3;
COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
- COPY1_IF_LT(bcost, (costs[3] << 3) + 5);
- COPY1_IF_LT(bcost, (costs[4] << 3) + 6);
- COPY1_IF_LT(bcost, (costs[5] << 3) + 7);
+ COST_MV_X3_DIR(2, 0, 1, -2, -1, -2, costs);
+ COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
+ COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
+ COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
if (bcost & 7)
{
diff -r 0d79e31728a4 -r 8621008756ba source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Fri Nov 01 13:05:34 2013 -0500
+++ b/source/encoder/ratecontrol.cpp Sat Nov 02 10:19:22 2013 -0500
@@ -115,9 +115,25 @@ void RateControl::calcAdaptiveQuantFrame
RateControl::RateControl(TEncCfg * _cfg)
{
this->cfg = _cfg;
+ ncu = (int)((cfg->param.sourceHeight * cfg->param.sourceWidth) / pow((int)16, 2.0));
+
+ // validate for cfg->param.rc, maybe it is need to add a function like x265_parameters_valiate()
+ cfg->param.rc.rfConstant = Clip3((double)-QP_BD_OFFSET, (double)51, cfg->param.rc.rfConstant);
+ if (cfg->param.rc.rateControlMode == X265_RC_CRF)
+ {
+ cfg->param.rc.qp = (int)cfg->param.rc.rfConstant + QP_BD_OFFSET;
+ cfg->param.rc.bitrate = 0;
+
+ double baseCplx = ncu * (cfg->param.bframes ? 120 : 80);
+ double mbtree_offset = 0;//added later
+ rateFactorConstant = pow(baseCplx, 1 - cfg->param.rc.qCompress) /
+ qp2qScale(cfg->param.rc.rfConstant + mbtree_offset + QP_BD_OFFSET);
+ }
+
+ isAbr = cfg->param.rc.rateControlMode != X265_RC_CQP; // later add 2pass option
+
bitrate = cfg->param.rc.bitrate * 1000;
frameDuration = 1.0 / cfg->param.frameRate;
- ncu = (int)((cfg->param.sourceHeight * cfg->param.sourceWidth) / pow((int)16, 2.0));
lastNonBPictType = -1;
baseQp = cfg->param.rc.qp;
qp = baseQp;
@@ -126,6 +142,7 @@ RateControl::RateControl(TEncCfg * _cfg)
shortTermCplxSum = 0;
shortTermCplxCount = 0;
framesDone = 0;
+ lastNonBPictType = I_SLICE;
if (cfg->param.rc.rateControlMode == X265_RC_ABR)
{
@@ -137,8 +154,17 @@ RateControl::RateControl(TEncCfg * _cfg)
/* estimated ratio that produces a reasonable QP for the first I-frame */
cplxrSum = .01 * pow(7.0e5, cfg->param.rc.qCompress) * pow(ncu, 0.5);
wantedBitsWindow = bitrate * frameDuration;
- lastNonBPictType = I_SLICE;
}
+ else if (cfg->param.rc.rateControlMode == X265_RC_CRF)
+ {
+#define ABR_INIT_QP ((int)cfg->param.rc.rfConstant + QP_BD_OFFSET)
+ accumPNorm = .01;
+ accumPQp = ABR_INIT_QP * accumPNorm;
+ /* estimated ratio that produces a reasonable QP for the first I-frame */
+ cplxrSum = .01 * pow(7.0e5, cfg->param.rc.qCompress) * pow(ncu, 0.5);
+ wantedBitsWindow = bitrate * frameDuration;
+ }
+
ipOffset = 6.0 * X265_LOG2(cfg->param.rc.ipFactor);
pbOffset = 6.0 * X265_LOG2(cfg->param.rc.pbFactor);
for (int i = 0; i < 3; i++)
@@ -164,9 +190,8 @@ void RateControl::rateControlStart(TComP
curSlice = pic->getSlice();
sliceType = curSlice->getSliceType();
rce->sliceType = sliceType;
- switch (cfg->param.rc.rateControlMode)
- {
- case X265_RC_ABR:
+
+ if (isAbr) //ABR,CRF
{
lastSatd = l->getEstimatedPictureCost(pic);
double q = qScale2qp(rateEstimateQscale(rce));
@@ -175,17 +200,10 @@ void RateControl::rateControlStart(TComP
/* copy value of lastRceq into thread local rce struct *to be used in RateControlEnd() */
rce->qRceq = lastRceq;
accumPQpUpdate();
- break;
}
-
- case X265_RC_CQP:
+ else //CQP
+ {
qp = qpConstant[sliceType];
- break;
-
- case X265_RC_CRF:
- default:
- assert(!"unimplemented");
- break;
}
if (sliceType != B_SLICE)
@@ -269,20 +287,28 @@ double RateControl::rateEstimateQscale(R
rce->blurredComplexity = shortTermCplxSum / shortTermCplxCount;
rce->mvBits = 0;
rce->sliceType = sliceType;
- q = getQScale(rce, wantedBitsWindow / cplxrSum);
- /* ABR code can potentially be counterproductive in CBR, so just don't bother.
- * Don't run it if the frame complexity is zero either. */
- if (lastSatd)
+ if (cfg->param.rc.rateControlMode == X265_RC_CRF)
{
More information about the x265-commits
mailing list