[x265-commits] [x265] psy-rd: add sad_square primitive array to optimize psy-rd
Steve Borho
steve at borho.org
Fri May 16 11:40:04 CEST 2014
details: http://hg.videolan.org/x265/rev/48af10fff12b
branches:
changeset: 6868:48af10fff12b
user: Steve Borho <steve at borho.org>
date: Fri May 16 10:42:23 2014 +0530
description:
psy-rd: add sad_square primitive array to optimize psy-rd
This fixes the DC component calculation at the same time making the calculation
more efficient.
Subject: [x265] common: make a global X265_LL macro for printing uint64_t
details: http://hg.videolan.org/x265/rev/5167067ed452
branches:
changeset: 6869:5167067ed452
user: Steve Borho <steve at borho.org>
date: Fri May 16 11:03:21 2014 +0530
description:
common: make a global X265_LL macro for printing uint64_t
Subject: [x265] nits
details: http://hg.videolan.org/x265/rev/0bd90aaaa3a7
branches:
changeset: 6870:0bd90aaaa3a7
user: Steve Borho <steve at borho.org>
date: Fri May 16 14:52:12 2014 +0530
description:
nits
Subject: [x265] cli: introduce --[no]-b-intra which enables/disables intra modes in B frames
details: http://hg.videolan.org/x265/rev/7d11f60c5dba
branches:
changeset: 6871:7d11f60c5dba
user: Sumalatha Polureddy<sumalatha at multicorewareinc.com>
date: Fri May 16 14:31:01 2014 +0530
description:
cli: introduce --[no]-b-intra which enables/disables intra modes in B frames
diffstat:
doc/reST/cli.rst | 6 ++++++
source/CMakeLists.txt | 5 +++++
source/Lib/TLibCommon/TComRdCost.h | 26 ++++++++++++++------------
source/Lib/TLibEncoder/TEncCu.cpp | 8 +++-----
source/Lib/TLibEncoder/TEncSearch.cpp | 2 +-
source/common/common.h | 8 ++++++++
source/common/param.cpp | 3 +++
source/common/primitives.h | 1 +
source/common/x86/asm-primitives.cpp | 6 ++++++
source/encoder/CMakeLists.txt | 5 -----
source/encoder/encoder.cpp | 22 +++++++---------------
source/x265.cpp | 3 +++
source/x265.h | 6 ++++++
13 files changed, 63 insertions(+), 38 deletions(-)
diffs (truncated from 339 to 300 lines):
diff -r 7533425d5060 -r 7d11f60c5dba doc/reST/cli.rst
--- a/doc/reST/cli.rst Fri May 16 08:00:23 2014 +0530
+++ b/doc/reST/cli.rst Fri May 16 14:31:01 2014 +0530
@@ -414,6 +414,12 @@ Spatial/intra options
**Values:** 0:disabled 1:RD-penalty 2:maximum
+.. option:: --b-intra, --no-b-intra
+
+ Enables the use of intra modes in very slow presets (rdLevel 5 or
+ 6). Presets slow to ultrafast do not try intra in B frames
+ regardless of this setting. Default enabled.
+
.. option:: --tskip, --no-tskip
Enable intra transform skipping (encode residual as coefficients)
diff -r 7533425d5060 -r 7d11f60c5dba source/CMakeLists.txt
--- a/source/CMakeLists.txt Fri May 16 08:00:23 2014 +0530
+++ b/source/CMakeLists.txt Fri May 16 14:31:01 2014 +0530
@@ -103,6 +103,11 @@ if(MSVC)
endif()
endif(MSVC)
+check_include_files(inttypes.h HAVE_INT_TYPES_H)
+if(HAVE_INT_TYPES_H)
+ add_definitions(-DHAVE_INT_TYPES_H=1)
+endif()
+
if(INTEL_CXX AND UNIX)
# treat icpc roughly like gcc
set(GCC 1)
diff -r 7533425d5060 -r 7d11f60c5dba source/Lib/TLibCommon/TComRdCost.h
--- a/source/Lib/TLibCommon/TComRdCost.h Fri May 16 08:00:23 2014 +0530
+++ b/source/Lib/TLibCommon/TComRdCost.h Fri May 16 14:31:01 2014 +0530
@@ -105,15 +105,14 @@ public:
/* return the difference in energy between the source block and the recon block */
inline uint32_t psyCost(int size, pixel *source, intptr_t sstride, pixel *recon, intptr_t rstride)
{
- int width, height;
- width = height = 1 << (size * 2);
- int part = partitionFromSizes(width, height);
- int dc = 2 * primitives.sad[part](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) / (width * height);
- int sEnergy = primitives.sa8d[size](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) - dc;
+ int pixelCountShift = (size + 2) * 2 - 1; // width * height / satd-scale(2)
+ int sdc = primitives.sad_square[size](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) >> pixelCountShift;
+ int sEnergy = primitives.sa8d[size](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) - sdc;
- dc = 2 * primitives.sad[part](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) / (width * height);
- int rEnergy = primitives.sa8d[size](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) - dc;
+ int rdc = primitives.sad_square[size](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) >> pixelCountShift;
+ int rEnergy = primitives.sa8d[size](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) - rdc;
+ X265_CHECK(sdc <= sEnergy && rdc <= rEnergy, "DC component of energy is more than total cost\n")
return abs(sEnergy - rEnergy);
}
@@ -123,7 +122,7 @@ public:
uint64_t tot = bits + (((psycost * m_psyRdScale) + 128) >> 8);
X265_CHECK(abs((float)((tot * m_lambdaMotionSSE + 128) >> 8) -
(float)tot * m_lambdaMotionSSE / 256.0) < 2,
- "calcPsyRdCost wrap detected dist: %d, tot %d, lambda: %d\n", distortion, (int)tot, (int)m_lambdaMotionSSE);
+ "calcPsyRdCost wrap detected tot: "X265_LL", lambda: "X265_LL"\n", tot, m_lambdaMotionSSE);
return distortion + ((tot * m_lambdaMotionSSE + 128) >> 8);
}
@@ -131,17 +130,20 @@ public:
{
X265_CHECK(abs((float)((bits * m_lambdaMotionSAD + 128) >> 8) -
(float)bits * m_lambdaMotionSAD / 256.0) < 2,
- "calcRdSADCost wrap detected dist: %d, bits %d, lambda: %d\n", sadCost, bits, (int)m_lambdaMotionSAD);
+ "calcRdSADCost wrap detected dist: %d, bits %d, lambda: "X265_LL"\n", sadCost, bits, m_lambdaMotionSAD);
return sadCost + ((bits * m_lambdaMotionSAD + 128) >> 8);
}
- inline uint32_t getCost(uint32_t bits) { return (uint32_t)((bits * m_lambdaMotionSAD + 128) >> 8); }
+ inline uint32_t getCost(uint32_t bits)
+ {
+ return (uint32_t)((bits * m_lambdaMotionSAD + 128) >> 8);
+ }
inline uint32_t scaleChromaDistCb(uint32_t dist)
{
X265_CHECK(abs((float)((dist * m_cbDistortionWeight + 128) >> 8) -
(float)dist * m_cbDistortionWeight / 256.0) < 2,
- "scaleChromaDistCb wrap detected dist: %d, lambda: %d\n", dist, (int)m_cbDistortionWeight);
+ "scaleChromaDistCb wrap detected dist: %d, lambda: "X265_LL"\n", dist, m_cbDistortionWeight);
return (uint32_t)(((dist * m_cbDistortionWeight) + 128) >> 8);
}
@@ -149,7 +151,7 @@ public:
{
X265_CHECK(abs((float)((dist * m_crDistortionWeight + 128) >> 8) -
(float)dist * m_crDistortionWeight / 256.0) < 2,
- "scaleChromaDistCr wrap detected dist: %d, lambda: %d\n", dist, (int)m_crDistortionWeight);
+ "scaleChromaDistCr wrap detected dist: %d, lambda: "X265_LL"\n", dist, m_crDistortionWeight);
return (uint32_t)(((dist * m_crDistortionWeight) + 128) >> 8);
}
};
diff -r 7533425d5060 -r 7d11f60c5dba source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Fri May 16 08:00:23 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Fri May 16 14:31:01 2014 +0530
@@ -887,10 +887,10 @@ void TEncCu::xCompressCU(TComDataCU*& ou
// do normal intra modes
// speedup for inter frames
- if (slice->getSliceType() == I_SLICE ||
+ if ((slice->getSliceType() == I_SLICE ||
outBestCU->getCbf(0, TEXT_LUMA) != 0 ||
outBestCU->getCbf(0, TEXT_CHROMA_U) != 0 ||
- outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) // avoid very complex intra if it is unlikely
+ outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) && m_param->bIntraInBFrames) // avoid very complex intra if it is unlikely
{
xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_2Nx2N);
outTempCU->initEstData(depth);
@@ -1386,7 +1386,6 @@ void TEncCu::xCheckRDCostIntra(TComDataC
// Encode Coefficients
bool bCodeDQP = getdQPFlag();
m_entropyCoder->encodeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), outTempCU->getCUSize(0), bCodeDQP);
-
m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
@@ -1402,7 +1401,6 @@ void TEncCu::xCheckRDCostIntra(TComDataC
{
outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
}
-
xCheckDQP(outTempCU);
xCheckBestMode(outBestCU, outTempCU, depth);
}
@@ -1444,7 +1442,7 @@ void TEncCu::xCheckRDCostIntraInInter(TC
int part = g_convertToBit[outTempCU->getCUSize(0)];
TComPicYuv *recon = outTempCU->getPic()->getPicYuvRec();
uint32_t psyRdCost = m_rdCost->psyCost(part, m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
- recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
+ recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
outTempCU->m_totalCost = m_rdCost->calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, psyRdCost);
}
else
diff -r 7533425d5060 -r 7d11f60c5dba source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri May 16 08:00:23 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri May 16 14:31:01 2014 +0530
@@ -2780,7 +2780,7 @@ void TEncSearch::encodeResAndCalcRdInter
{
int size = g_convertToBit[cu->getCUSize(0)];
uint32_t psyRdCost = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
- outReconYuv->getLumaAddr(), outReconYuv->getStride());
+ outReconYuv->getLumaAddr(), outReconYuv->getStride());
bcost = m_rdCost->calcPsyRdCost(bdist, bestBits, psyRdCost);
}
else
diff -r 7533425d5060 -r 7d11f60c5dba source/common/common.h
--- a/source/common/common.h Fri May 16 08:00:23 2014 +0530
+++ b/source/common/common.h Fri May 16 14:31:01 2014 +0530
@@ -64,6 +64,14 @@ extern "C" intptr_t x265_stack_align(voi
#endif // if defined(__GNUC__)
+#if HAVE_INT_TYPES_H
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#define X265_LL "%" PRIu64
+#else
+#define X265_LL "%lld"
+#endif
+
/* If compiled with CHECKED_BUILD perform run-time checks and log any that
* fail, both to stderr and to a file */
#if CHECKED_BUILD || _DEBUG
diff -r 7533425d5060 -r 7d11f60c5dba source/common/param.cpp
--- a/source/common/param.cpp Fri May 16 08:00:23 2014 +0530
+++ b/source/common/param.cpp Fri May 16 14:31:01 2014 +0530
@@ -157,6 +157,7 @@ void x265_param_default(x265_param *para
param->crQpOffset = 0;
param->rdPenalty = 0;
param->psyRd = 0.0;
+ param->bIntraInBFrames = 1;
/* Rate control options */
param->rc.vbvMaxBitrate = 0;
@@ -384,6 +385,7 @@ int x265_param_default_preset(x265_param
param->bEnableSAO = 0;
param->bEnableWeightedPred = 0;
param->bEnableWeightedBiPred = 0;
+ param->bIntraInBFrames = 0;
}
else if (!strcmp(tune, "zerolatency") ||
!strcmp(tune, "zero-latency"))
@@ -591,6 +593,7 @@ int x265_param_parse(x265_param *p, cons
OPT("rd") p->rdLevel = atoi(value);
OPT("psy-rd") p->psyRd = atof(value);
OPT("signhide") p->bEnableSignHiding = atobool(value);
+ OPT("b-intra") p->bIntraInBFrames = atobool(value);
OPT("lft") p->bEnableLoopFilter = atobool(value);
OPT("sao") p->bEnableSAO = atobool(value);
OPT("sao-lcu-bounds") p->saoLcuBoundary = atoi(value);
diff -r 7533425d5060 -r 7d11f60c5dba source/common/primitives.h
--- a/source/common/primitives.h Fri May 16 08:00:23 2014 +0530
+++ b/source/common/primitives.h Fri May 16 14:31:01 2014 +0530
@@ -190,6 +190,7 @@ struct EncoderPrimitives
pixelcmp_t satd[NUM_LUMA_PARTITIONS]; // Sum of Transformed differences (HADAMARD)
pixelcmp_t sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
pixelcmp_t sa8d[NUM_SQUARE_BLOCKS]; // sa8d primitives for square intra blocks
+ pixelcmp_t sad_square[NUM_SQUARE_BLOCKS]; // sad primitives for square coding blocks
blockfill_s_t blockfill_s[NUM_SQUARE_BLOCKS]; // block fill with value
blockcpy_pp_t blockcpy_pp; // block copy pixel from pixel
diff -r 7533425d5060 -r 7d11f60c5dba source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri May 16 08:00:23 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri May 16 14:31:01 2014 +0530
@@ -1332,6 +1332,12 @@ void Setup_Assembly_Primitives(EncoderPr
primitives.sa8d[BLOCK_32x32] = primitives.sa8d_inter[LUMA_32x32];
primitives.sa8d[BLOCK_64x64] = primitives.sa8d_inter[LUMA_64x64];
+ primitives.sad_square[BLOCK_4x4] = primitives.sad[LUMA_4x4];
+ primitives.sad_square[BLOCK_8x8] = primitives.sad[LUMA_8x8];
+ primitives.sad_square[BLOCK_16x16] = primitives.sad[LUMA_16x16];
+ primitives.sad_square[BLOCK_32x32] = primitives.sad[LUMA_32x32];
+ primitives.sad_square[BLOCK_64x64] = primitives.sad[LUMA_64x64];
+
// SA8D devolves to SATD for blocks not even multiples of 8x8
primitives.sa8d_inter[LUMA_4x4] = primitives.satd[LUMA_4x4];
primitives.sa8d_inter[LUMA_4x8] = primitives.satd[LUMA_4x8];
diff -r 7533425d5060 -r 7d11f60c5dba source/encoder/CMakeLists.txt
--- a/source/encoder/CMakeLists.txt Fri May 16 08:00:23 2014 +0530
+++ b/source/encoder/CMakeLists.txt Fri May 16 14:31:01 2014 +0530
@@ -39,11 +39,6 @@ if(MSVC)
"/wd4244 /wd4389 /wd4018 /wd4800")
endif(MSVC)
-check_include_files(inttypes.h HAVE_INT_TYPES_H)
-if(HAVE_INT_TYPES_H)
- add_definitions(-DHAVE_INT_TYPES_H=1)
-endif()
-
add_library(encoder OBJECT ../x265.h
${LIBENCODER_SRC} ${LIBENCODER_HDR}
bitcost.cpp bitcost.h
diff -r 7533425d5060 -r 7d11f60c5dba source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri May 16 08:00:23 2014 +0530
+++ b/source/encoder/encoder.cpp Fri May 16 14:31:01 2014 +0530
@@ -41,14 +41,6 @@
#include "x265.h"
-#if HAVE_INT_TYPES_H
-#define __STDC_FORMAT_MACROS
-#include <inttypes.h>
-#define LL "%" PRIu64
-#else
-#define LL "%lld"
-#endif
-
using namespace x265;
Encoder::Encoder()
@@ -637,39 +629,39 @@ void Encoder::printSummary()
int len = 0;
if (sliceType != I_SLICE)
{
- len += sprintf(stats + len, "EncCU "LL "%% Merge "LL "%%", encCu, cntSkipCu);
+ len += sprintf(stats + len, "EncCU "X265_LL "%% Merge "X265_LL "%%", encCu, cntSkipCu);
}
if (cntInter)
{
- len += sprintf(stats + len, " Inter "LL "%%", cntInter);
+ len += sprintf(stats + len, " Inter "X265_LL "%%", cntInter);
if (param->bEnableAMP)
- len += sprintf(stats + len, "(%dx%d "LL "%% %dx%d "LL "%% %dx%d "LL "%% AMP "LL "%%)",
+ len += sprintf(stats + len, "(%dx%d "X265_LL "%% %dx%d "X265_LL "%% %dx%d "X265_LL "%% AMP "X265_LL "%%)",
cuSize, cuSize, cuInterDistribution[0],
cuSize / 2, cuSize, cuInterDistribution[2],
cuSize, cuSize / 2, cuInterDistribution[1],
cuInterDistribution[3]);
else if (param->bEnableRectInter)
- len += sprintf(stats + len, "(%dx%d "LL "%% %dx%d "LL "%% %dx%d "LL "%%)",
+ len += sprintf(stats + len, "(%dx%d "X265_LL "%% %dx%d "X265_LL "%% %dx%d "X265_LL "%%)",
cuSize, cuSize, cuInterDistribution[0],
cuSize / 2, cuSize, cuInterDistribution[2],
cuSize, cuSize / 2, cuInterDistribution[1]);
}
if (cntIntra)
{
- len += sprintf(stats + len, " Intra "LL "%%(DC "LL "%% P "LL "%% Ang "LL "%%",
+ len += sprintf(stats + len, " Intra "X265_LL "%%(DC "X265_LL "%% P "X265_LL "%% Ang "X265_LL "%%",
cntIntra, cuIntraDistribution[0],
cuIntraDistribution[1], cuIntraDistribution[2]);
if (sliceType != I_SLICE)
{
if (depth == (int)g_maxCUDepth - 1)
- len += sprintf(stats + len, " %dx%d "LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
+ len += sprintf(stats + len, " %dx%d "X265_LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
}
len += sprintf(stats + len, ")");
if (sliceType == I_SLICE)
{
if (depth == (int)g_maxCUDepth - 1)
- len += sprintf(stats + len, " %dx%d: "LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
+ len += sprintf(stats + len, " %dx%d: "X265_LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
More information about the x265-commits
mailing list