[x265] [PATCH] LookaheadTLD: frameVariance is not used
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Fri Dec 1 16:04:00 CET 2017
# HG changeset patch
# User Ashok Kumar Mishra <ashok at multicorewareinc.com>
# Date 1510144762 -19800
# Wed Nov 08 18:09:22 2017 +0530
# Node ID 5bf57563452b17c48486ab73f3fcfa4ce4d639ba
# Parent b1dfa312234ed72c3541831a15f307feaf79484d
LookaheadTLD: frameVariance is not used
diff -r b1dfa312234e -r 5bf57563452b source/common/frame.cpp
--- a/source/common/frame.cpp Thu Nov 30 10:06:49 2017 +0530
+++ b/source/common/frame.cpp Wed Nov 08 18:09:22 2017 +0530
@@ -84,21 +84,16 @@
m_analysisData.interData = NULL;
m_analysis2Pass.analysisFramedata = NULL;
- if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode || !!param->bAQMotion, param->rc.qgSize))
+ if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))
X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
m_numRows = (m_fencPic->m_picHeight + param->maxCUSize - 1) / param->maxCUSize;
m_reconRowFlag = new ThreadSafeInteger[m_numRows];
m_reconColCount = new ThreadSafeInteger[m_numRows];
if (quantOffsets)
- int32_t cuCount;
- if (param->rc.qgSize == 8)
- cuCount = m_lowres.maxBlocksInRowFullRes * m_lowres.maxBlocksInColFullRes;
- else
- cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
+ int32_t cuCount = (param->rc.qgSize == 8) ? m_lowres.maxBlocksInRowFullRes * m_lowres.maxBlocksInColFullRes :
+ m_lowres.maxBlocksInRowLowRes * m_lowres.maxBlocksInColLowRes;
m_quantOffsets = new float[cuCount];
return true;
diff -r b1dfa312234e -r 5bf57563452b source/common/lowres.cpp
--- a/source/common/lowres.cpp Thu Nov 30 10:06:49 2017 +0530
+++ b/source/common/lowres.cpp Wed Nov 08 18:09:22 2017 +0530
@@ -24,50 +24,58 @@
#include "picyuv.h"
#include "lowres.h"
#include "mv.h"
using namespace X265_NS;
-bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled, uint32_t qgSize)
+void TEncPicQPAdaptationLayer::create(int iWidth, int iHeight, uint32_t uiAQPartWidth, uint32_t uiAQPartHeight)
+ m_AQPartWidth = uiAQPartWidth;
+ m_AQPartHeight = uiAQPartHeight;
+ m_NumAQPartInWidth = (iWidth + m_AQPartWidth - 1) / m_AQPartWidth;
+ m_NumAQPartInHeight = (iHeight + m_AQPartHeight - 1) / m_AQPartHeight;
+ m_dActivity = new double[m_NumAQPartInWidth * m_NumAQPartInHeight];
+bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
isLowres = true;
- bframes = _bframes;
+ bframes = param->bframes;
width = origPic->m_picWidth / 2;
lines = origPic->m_picHeight / 2;
lumaStride = width + 2 * origPic->m_lumaMarginX;
if (lumaStride & 31)
lumaStride += 32 - (lumaStride & 31);
- maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
- maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
- maxBlocksInRowFullRes = maxBlocksInRow * 2;
- maxBlocksInColFullRes = maxBlocksInCol * 2;
- int cuCount = maxBlocksInRow * maxBlocksInCol;
- int cuCountFullRes;
- if (qgSize == 8)
- cuCountFullRes = maxBlocksInRowFullRes * maxBlocksInColFullRes;
- else
- cuCountFullRes = cuCount;
+ maxBlocksInRowLowRes = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ maxBlocksInColLowRes = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+ maxBlocksInRowFullRes = maxBlocksInRowLowRes * 2;
+ maxBlocksInColFullRes = maxBlocksInColLowRes * 2;
+ int cuCountLowRes = maxBlocksInRowLowRes * maxBlocksInColLowRes;
+ int cuCountFullRes = (qgSize > 8) ? maxBlocksInRowLowRes * maxBlocksInColLowRes :
+ maxBlocksInRowFullRes * maxBlocksInColFullRes;
/* rounding the width to multiple of lowres CU size */
- width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
- lines = maxBlocksInCol * X265_LOWRES_CU_SIZE;
+ width = maxBlocksInRowLowRes * X265_LOWRES_CU_SIZE;
+ lines = maxBlocksInColLowRes * X265_LOWRES_CU_SIZE;
size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY);
size_t padoffset = lumaStride * origPic->m_lumaMarginY + origPic->m_lumaMarginX;
- if (bAQEnabled)
+ if (!!param->rc.aqMode || !!param->bAQMotion)
CHECKED_MALLOC_ZERO(qpAqOffset, double, cuCountFullRes);
CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes);
CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes);
CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes);
- CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes);
if (qgSize == 8)
- CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount);
+ CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCountLowRes);
- CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
+ CHECKED_MALLOC(propagateCost, uint16_t, cuCountLowRes);
/* allocate lowres buffers */
CHECKED_MALLOC_ZERO(buffer[0], pixel, 4 * planesize);
+ m_MaxAQDepth = g_log2Size[param->maxCUSize] - g_log2Size[qgSize] + 1;
+ if (m_MaxAQDepth > 0)
+ {
+ m_acAQLayer = new TEncPicQPAdaptationLayer[m_MaxAQDepth];
+ for (uint32_t d = 0; d < m_MaxAQDepth; d++)
+ {
+ m_acAQLayer[d].create(origPic->m_picWidth, origPic->m_picHeight, param->maxCUSize >> d, param->maxCUSize >> d);
+ }
+ }
buffer[1] = buffer[0] + planesize;
buffer[2] = buffer[1] + planesize;
buffer[3] = buffer[2] + planesize;
@@ -76,29 +84,24 @@
lowresPlane[1] = buffer[1] + padoffset;
lowresPlane[2] = buffer[2] + padoffset;
lowresPlane[3] = buffer[3] + padoffset;
- CHECKED_MALLOC(intraCost, int32_t, cuCount);
- CHECKED_MALLOC(intraMode, uint8_t, cuCount);
+ CHECKED_MALLOC(intraCost, int32_t, cuCountLowRes);
+ CHECKED_MALLOC(intraMode, uint8_t, cuCountLowRes);
for (int i = 0; i < bframes + 2; i++)
for (int j = 0; j < bframes + 2; j++)
- CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInCol);
- CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCount);
+ CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInColLowRes);
+ CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCountLowRes);
for (int i = 0; i < bframes + 1; i++)
- CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
- CHECKED_MALLOC(lowresMvs[1][i], MV, cuCount);
- CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCount);
- CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCount);
+ CHECKED_MALLOC(lowresMvs[0][i], MV, cuCountLowRes);
+ CHECKED_MALLOC(lowresMvs[1][i], MV, cuCountLowRes);
+ CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCountLowRes);
+ CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCountLowRes);
return true;
return false;
@@ -130,10 +133,8 @@
- X265_FREE(blockVariance);
// (re) initialize lowres state
void Lowres::init(PicYuv *origPic, int poc)
diff -r b1dfa312234e -r 5bf57563452b source/common/lowres.h
--- a/source/common/lowres.h Thu Nov 30 10:06:49 2017 +0530
+++ b/source/common/lowres.h Wed Nov 08 18:09:22 2017 +0530
@@ -102,7 +102,18 @@
+struct TEncPicQPAdaptationLayer
+ uint32_t m_AQPartWidth;
+ uint32_t m_AQPartHeight;
+ uint32_t m_NumAQPartInWidth;
+ uint32_t m_NumAQPartInHeight;
+ double* m_dActivity;
+ double m_dAvgActivity;
+ void create(int width, int height, uint32_t AQPartWidth, uint32_t AQPartHeight);
+ void destroy();
/* lowres buffers, sizes and strides */
struct Lowres : public ReferencePlanes
@@ -132,34 +143,30 @@
uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
int32_t* lowresMvCosts[2][X265_BFRAME_MAX + 1];
MV* lowresMvs[2][X265_BFRAME_MAX + 1];
- uint32_t maxBlocksInRow;
- uint32_t maxBlocksInCol;
+ uint32_t maxBlocksInRowLowRes;
+ uint32_t maxBlocksInColLowRes;
uint32_t maxBlocksInRowFullRes;
uint32_t maxBlocksInColFullRes;
/* used for vbvLookahead */
int plannedType[X265_LOOKAHEAD_MAX + 1];
int64_t plannedSatd[X265_LOOKAHEAD_MAX + 1];
int indB;
int bframes;
/* rate control / adaptive quant data */
- double* qpAqOffset; // AQ QP offset values for each 16x16 CU
- double* qpCuTreeOffset; // cuTree QP offset values for each 16x16 CU
+ double* qpAqOffset; // AQ QP offset values for each 16x16 CU
+ double* qpCuTreeOffset; // cuTree QP offset values for each 16x16 CU
double* qpAqMotionOffset;
- int* invQscaleFactor; // qScale values for qp Aq Offsets
+ int* invQscaleFactor; // qScale values for qp Aq Offsets
int* invQscaleFactor8x8; // temporary buffer for qg-size 8
- uint32_t* blockVariance;
- uint64_t wp_ssd[3]; // This is different than SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame
+ uint64_t wp_ssd[3]; // This is different than SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame
uint64_t wp_sum[3];
- uint64_t frameVariance;
+ TEncPicQPAdaptationLayer* m_acAQLayer;
+ uint32_t m_MaxAQDepth;
/* cutree intermediate data */
uint16_t* propagateCost;
double weightedCostDelta[X265_BFRAME_MAX + 2];
ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
- bool create(PicYuv *origPic, int _bframes, bool bAqEnabled, uint32_t qgSize);
+ bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
void destroy();
void init(PicYuv *origPic, int poc);
diff -r b1dfa312234e -r 5bf57563452b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Nov 30 10:06:49 2017 +0530
+++ b/source/encoder/encoder.cpp Wed Nov 08 18:09:22 2017 +0530
@@ -1009,10 +1009,9 @@
if (m_param->rc.qgSize == 8)
cuCount = inFrame->m_lowres.maxBlocksInRowFullRes * inFrame->m_lowres.maxBlocksInColFullRes;
- cuCount = inFrame->m_lowres.maxBlocksInRow * inFrame->m_lowres.maxBlocksInCol;
+ cuCount = inFrame->m_lowres.maxBlocksInRowLowRes * inFrame->m_lowres.maxBlocksInColLowRes;
memcpy(inFrame->m_quantOffsets, pic_in->quantOffsets, cuCount * sizeof(float));
if (m_pocLast == 0)
m_firstPts = inFrame->m_pts;
if (m_bframeDelay && m_pocLast == m_bframeDelay)
diff -r b1dfa312234e -r 5bf57563452b source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Thu Nov 30 10:06:49 2017 +0530
+++ b/source/encoder/sao.cpp Wed Nov 08 18:09:22 2017 +0530
@@ -136,12 +136,9 @@
CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
m_clipTable = &(m_clipTableBase[rangeExt]);
// Share with fast clip lookup table
for (int i = 0; i < rangeExt; i++)
m_clipTableBase[i] = 0;
for (int i = 0; i < maxY; i++)
m_clipTable[i] = (pixel)i;
diff -r b1dfa312234e -r 5bf57563452b source/encoder/search.cpp
--- a/source/encoder/search.cpp Thu Nov 30 10:06:49 2017 +0530
+++ b/source/encoder/search.cpp Wed Nov 08 18:09:22 2017 +0530
@@ -1951,17 +1951,14 @@
if (mvs[0].x == 0x7FFF)
/* this motion search was not estimated by lookahead */
return 0;
uint32_t block_x = (cu.m_cuPelX + g_zscanToPelX[pu.puAbsPartIdx] + pu.width / 2) >> 4;
uint32_t block_y = (cu.m_cuPelY + g_zscanToPelY[pu.puAbsPartIdx] + pu.height / 2) >> 4;
- uint32_t idx = block_y * m_frame->m_lowres.maxBlocksInRow + block_x;
- X265_CHECK(block_x < m_frame->m_lowres.maxBlocksInRow, "block_x is too high\n");
- X265_CHECK(block_y < m_frame->m_lowres.maxBlocksInCol, "block_y is too high\n");
+ uint32_t idx = block_y * m_frame->m_lowres.maxBlocksInRowLowRes + block_x;
+ X265_CHECK(block_x < m_frame->m_lowres.maxBlocksInRowLowRes, "block_x is too high\n");
+ X265_CHECK(block_y < m_frame->m_lowres.maxBlocksInColLowRes, "block_y is too high\n");
return mvs[idx] << 1; /* scale up lowres mv */
/* Pick between the two AMVP candidates which is the best one to use as
* MVP for the motion search, based on SAD cost */
int Search::selectMVP(const CUData& cu, const PredictionUnit& pu, const MV amvp[AMVP_NUM_CANDS], int list, int ref)
diff -r b1dfa312234e -r 5bf57563452b source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Thu Nov 30 10:06:49 2017 +0530
+++ b/source/encoder/slicetype.cpp Wed Nov 08 18:09:22 2017 +0530
@@ -120,7 +120,93 @@
return (uint32_t)sum_ssd;
+void LookaheadTLD::xPreanalyze(Frame* curFrame)
+ const uint32_t iWidth = curFrame->m_fencPic->m_picWidth;
+ const uint32_t iHeight = curFrame->m_fencPic->m_picHeight;
+ const intptr_t iStride = curFrame->m_fencPic->m_stride;
+ for (uint32_t d = 0; d < curFrame->m_lowres.m_MaxAQDepth; d++)
+ {
+ const pixel* src = curFrame->m_fencPic->m_picOrg[0];;
+ TEncPicQPAdaptationLayer pcAQLayer = curFrame->m_lowres.m_acAQLayer[d];
+ const uint32_t uiAQPartWidth = pcAQLayer.m_AQPartWidth;
+ const uint32_t uiAQPartHeight = pcAQLayer.m_AQPartHeight;
+ double* pcAQU = pcAQLayer.m_dActivity;
+ double dSumAct = 0.0;
+ for (uint32_t y = 0; y < iHeight; y += uiAQPartHeight)
+ {
+ const uint32_t uiCurrAQPartHeight = min(uiAQPartHeight, iHeight - y);
+ for (uint32_t x = 0; x < iWidth; x += uiAQPartWidth, pcAQU++)
+ {
+ const uint32_t uiCurrAQPartWidth = min(uiAQPartWidth, iWidth - x);
+ const pixel* pBlkY = &src[x];
+ uint64_t uiSum[4] = { 0, 0, 0, 0 };
+ uint64_t uiSumSq[4] = { 0, 0, 0, 0 };
+ uint32_t by = 0;
+ for (; by < uiCurrAQPartHeight >> 1; by++)
+ {
+ uint32_t bx = 0;
+ for (; bx < uiCurrAQPartWidth >> 1; bx++)
+ {
+ uiSum[0] += pBlkY[bx];
+ uiSumSq[0] += pBlkY[bx] * pBlkY[bx];
+ }
+ for (; bx < uiCurrAQPartWidth; bx++)
+ {
+ uiSum[1] += pBlkY[bx];
+ uiSumSq[1] += pBlkY[bx] * pBlkY[bx];
+ }
+ pBlkY += iStride;
+ }
+ for (; by < uiCurrAQPartHeight; by++)
+ {
+ uint32_t bx = 0;
+ for (; bx < uiCurrAQPartWidth >> 1; bx++)
+ {
+ uiSum[2] += pBlkY[bx];
+ uiSumSq[2] += pBlkY[bx] * pBlkY[bx];
+ }
+ for (; bx < uiCurrAQPartWidth; bx++)
+ {
+ uiSum[3] += pBlkY[bx];
+ uiSumSq[3] += pBlkY[bx] * pBlkY[bx];
+ }
+ pBlkY += iStride;
+ }
+ assert((uiCurrAQPartWidth & 1) == 0);
+ assert((uiCurrAQPartHeight & 1) == 0);
+ const uint32_t pixelWidthOfQuadrants = uiCurrAQPartWidth >> 1;
+ const uint32_t pixelHeightOfQuadrants = uiCurrAQPartHeight >> 1;
+ const uint32_t numPixInAQPart = pixelWidthOfQuadrants * pixelHeightOfQuadrants;
+ double dMinVar = DBL_MAX;
+ if (numPixInAQPart != 0)
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ const double dAverage = double(uiSum[i]) / numPixInAQPart;
+ const double dVariance = double(uiSumSq[i]) / numPixInAQPart - dAverage * dAverage;
+ dMinVar = min(dMinVar, dVariance);
+ }
+ }
+ else
+ {
+ dMinVar = 0.0;
+ }
+ double dActivity = 1.0 + dMinVar;
+ *pcAQU = dActivity;
+ dSumAct += dActivity;
+ }
+ src += iStride * uiCurrAQPartHeight;
+ }
+ const double dAvgAct = dSumAct / (pcAQLayer.m_NumAQPartInWidth * pcAQLayer.m_NumAQPartInHeight);
+ pcAQLayer.m_dAvgActivity = dAvgAct;
+ }
void LookaheadTLD::calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param)
/* Actual adaptive quantization */
@@ -156,14 +242,13 @@
double strength = 0.f;
if (param->rc.aqMode == X265_AQ_NONE || param->rc.aqStrength == 0)
- /* Need to init it anyways for CU tree */
- int cuCount = blockCount;
+// /* Need to init it anyways for CU tree */
+// int cuCount = blockCount;
if (param->rc.aqMode && param->rc.aqStrength == 0)
if (quantOffsets)
- for (int cuxy = 0; cuxy < cuCount; cuxy++)
+ for (int cuxy = 0; cuxy < blockCount; cuxy++)
curFrame->m_lowres.qpCuTreeOffset[cuxy] = curFrame->m_lowres.qpAqOffset[cuxy] = quantOffsets[cuxy];
curFrame->m_lowres.invQscaleFactor[cuxy] = x265_exp2fix8(curFrame->m_lowres.qpCuTreeOffset[cuxy]);
@@ -171,9 +256,9 @@
- memset(curFrame->m_lowres.qpCuTreeOffset, 0, cuCount * sizeof(double));
- memset(curFrame->m_lowres.qpAqOffset, 0, cuCount * sizeof(double));
- for (int cuxy = 0; cuxy < cuCount; cuxy++)
+ memset(curFrame->m_lowres.qpCuTreeOffset, 0, blockCount * sizeof(double));
+ memset(curFrame->m_lowres.qpAqOffset, 0, blockCount * sizeof(double));
+ for (int cuxy = 0; cuxy < blockCount; cuxy++)
curFrame->m_lowres.invQscaleFactor[cuxy] = 256;
@@ -188,35 +273,34 @@
+ if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
+ {
+ xPreanalyze(curFrame);
+ }
blockXY = 0;
double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
double bias_strength = 0.f;
if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
double bit_depth_correction = 1.f / (1 << (2*(X265_DEPTH-8)));
- curFrame->m_lowres.frameVariance = 0;
- uint64_t rowVariance = 0;
for (blockY = 0; blockY < maxRow; blockY += loopIncr)
- rowVariance = 0;
for (blockX = 0; blockX < maxCol; blockX += loopIncr)
uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
- curFrame->m_lowres.blockVariance[blockXY] = energy;
- rowVariance += energy;
qp_adj = pow(energy * bit_depth_correction + 1, 0.1);
curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
avg_adj += qp_adj;
avg_adj_pow2 += qp_adj * qp_adj;
- curFrame->m_lowres.frameVariance += (rowVariance / maxCol);
- curFrame->m_lowres.frameVariance /= maxRow;
avg_adj /= blockCount;
avg_adj_pow2 /= blockCount;
strength = param->rc.aqStrength * avg_adj;
- avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (modeTwoConst)) / avg_adj;
+ avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst) / avg_adj;
bias_strength = param->rc.aqStrength;
diff -r b1dfa312234e -r 5bf57563452b source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Thu Nov 30 10:06:49 2017 +0530
+++ b/source/encoder/slicetype.h Wed Nov 08 18:09:22 2017 +0530
@@ -82,14 +82,11 @@
~LookaheadTLD() { X265_FREE(wbuffer[0]); }
void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param);
void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize);
void weightsAnalyse(Lowres& fenc, Lowres& ref);
+ void xPreanalyze(Frame* curFrame);
uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
More information about the x265-devel
mailing list