[x265] [PATCH] SAO: perform merge distortion and cost calculation in one loop
Ashok Kumar Mishra
ashok at multicorewareinc.com
Fri Jan 22 16:01:41 CET 2016
As expected there is not much change in performance, since SAO is very less
time consuming module compared to other modules like look-ahead, motion
estimation etc.
And I have modified only one function in SAO. I have tested one 4k
video(tearsofsteel-4k-1000f-s214.y4m) with medium preset which has 1214
frames using intel vtune.
*Before: 2545,02 (total cpu time)*
*After: 2536,95 (total CPU time)*
Thanks
Ashok
On Mon, Jan 18, 2016 at 9:39 PM, Pradeep Ramachandran <
pradeep at multicorewareinc.com> wrote:
> Ashok,
> What is the impact on performance that you expect from these changes to
> SAO? Can you run encodes with 4K videos in ABR mode for ultrafast, medium,
> and veryslow and share the impact of performance if we expect an impact
> (which I think there will be)?
>
> Pradeep Ramachandran, PhD
> Solution Architect at www.multicorewareinc.com/
> Adjunct Faculty at www.cse.iitm.ac.in/
> pradeeprama.info/
> Ph: +91 99627 82018
>
> On Mon, Jan 18, 2016 at 9:21 PM, <ashok at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
>> # Date 1453132285 -19800
>> # Mon Jan 18 21:21:25 2016 +0530
>> # Node ID 0f89b83c6fe64181296a51392ff52236dc6a934e
>> # Parent add820b8cde0fc82a643fd32717def5a3e2f3888
>> SAO: perform merge distortion and cost calculation in one loop
>>
>> diff -r add820b8cde0 -r 0f89b83c6fe6 source/encoder/sao.cpp
>> --- a/source/encoder/sao.cpp Mon Jan 18 21:20:57 2016 +0530
>> +++ b/source/encoder/sao.cpp Mon Jan 18 21:21:25 2016 +0530
>> @@ -758,16 +758,6 @@
>> std::swap(m_tmpL1[2], m_tmpL2[2]);
>> }
>>
>> -void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam*
>> saoUnitSrc)
>> -{
>> - saoUnitDst->mergeMode = saoUnitSrc->mergeMode;
>> - saoUnitDst->typeIdx = saoUnitSrc->typeIdx;
>> - saoUnitDst->bandPos = saoUnitSrc->bandPos;
>> -
>> - for (int i = 0; i < SAO_NUM_OFFSET; i++)
>> - saoUnitDst->offset[i] = saoUnitSrc->offset[i];
>> -}
>> -
>> /* Calculate SAO statistics for current CTU without non-crossing slice */
>> void SAO::calcSaoStatsCu(int addr, int plane)
>> {
>> @@ -1233,158 +1223,12 @@
>> m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] =
>> m_numNoSao[1] / ((double)numctus);
>> }
>>
>> -void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY)
>> -{
>> - SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
>> - double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda};
>> - bool allowMerge[2]; // left, up
>> - allowMerge[1] = (idxY > 0);
>> -
>> - for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
>> - {
>> - int addr = idxX + idxY * m_numCuInWidth;
>> - int addrUp = idxY ? addr - m_numCuInWidth : -1;
>> - int addrLeft = idxX ? addr - 1 : -1;
>> - allowMerge[0] = (idxX > 0);
>> -
>> - m_entropyCoder.load(m_rdContexts.cur);
>> - if (allowMerge[0])
>> - m_entropyCoder.codeSaoMerge(0);
>> - if (allowMerge[1])
>> - m_entropyCoder.codeSaoMerge(0);
>> - m_entropyCoder.store(m_rdContexts.temp);
>> -
>> - // reset stats Y, Cb, Cr
>> - X265_CHECK(sizeof(PerPlane) == (sizeof(int32_t) * (NUM_PLANE *
>> MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS)), "Found Padding space in struct
>> PerPlane");
>> -
>> - // TODO: Confirm the address space is continuous
>> - if (m_param->bSaoNonDeblocked)
>> - {
>> - memcpy(m_count, m_countPreDblk[addr], sizeof(m_count));
>> - memcpy(m_offsetOrg, m_offsetOrgPreDblk[addr],
>> sizeof(m_offsetOrg));
>> - }
>> - else
>> - {
>> - memset(m_count, 0, sizeof(m_count));
>> - memset(m_offsetOrg, 0, sizeof(m_offsetOrg));
>> - }
>> -
>> - saoParam->ctuParam[0][addr].reset();
>> - saoParam->ctuParam[1][addr].reset();
>> - saoParam->ctuParam[2][addr].reset();
>> -
>> - if (saoParam->bSaoFlag[0])
>> - calcSaoStatsCu(addr, 0);
>> -
>> - if (saoParam->bSaoFlag[1])
>> - {
>> - calcSaoStatsCu(addr, 1);
>> - calcSaoStatsCu(addr, 2);
>> - }
>> -
>> - double mergeDist[NUM_MERGE_MODE] = {0.0, 0.0, 0.0};
>> - saoLumaComponentParamDist(saoParam, addr, mergeDist);
>> - if (m_chromaFormat != X265_CSP_I400)
>> - saoChromaComponentParamDist(saoParam, addr, mergeDist);
>> -
>> - // merge left or merge up
>> - for (int plane = 0; plane < 3; plane++)
>> - {
>> - for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
>> - {
>> - SaoCtuParam* mergeSrcParam = NULL;
>> - if (addrLeft >= 0 && mergeIdx == 0)
>> - mergeSrcParam =
>> &(saoParam->ctuParam[plane][addrLeft]);
>> - else if (addrUp >= 0 && mergeIdx == 1)
>> - mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
>> - if (mergeSrcParam)
>> - {
>> - int64_t estDist = 0;
>> - int typeIdx = mergeSrcParam->typeIdx;
>> - if (typeIdx >= 0)
>> - {
>> - int bandPos = (typeIdx == SAO_BO) ?
>> mergeSrcParam->bandPos : 0;
>> - for (int classIdx = 0; classIdx <
>> SAO_NUM_OFFSET; classIdx++)
>> - {
>> - int mergeOffset =
>> mergeSrcParam->offset[classIdx];
>> - estDist +=
>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset,
>> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
>> - }
>> - }
>> -
>> - copySaoUnit(&mergeSaoParam[plane][mergeIdx],
>> mergeSrcParam);
>> - mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx
>> ? SAO_MERGE_UP : SAO_MERGE_LEFT;
>> - mergeDist[mergeIdx + 1] += ((double)estDist /
>> lambda[plane]);
>> - }
>> - }
>> - }
>> -
>> - if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
>> - {
>> - // Cost of new SAO_params
>> - m_entropyCoder.load(m_rdContexts.cur);
>> - m_entropyCoder.resetBits();
>> - if (allowMerge[0])
>> - m_entropyCoder.codeSaoMerge(0);
>> - if (allowMerge[1])
>> - m_entropyCoder.codeSaoMerge(0);
>> - for (int plane = 0; plane < 3; plane++)
>> - {
>> - if (saoParam->bSaoFlag[plane > 0])
>> -
>> m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
>> - }
>> -
>> - uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>> - double bestCost = mergeDist[0] + (double)rate;
>> - m_entropyCoder.store(m_rdContexts.temp);
>> -
>> - // Cost of Merge
>> - for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
>> - {
>> - if (!allowMerge[mergeIdx])
>> - continue;
>> -
>> - m_entropyCoder.load(m_rdContexts.cur);
>> - m_entropyCoder.resetBits();
>> - if (allowMerge[0])
>> - m_entropyCoder.codeSaoMerge(1 - mergeIdx);
>> - if (allowMerge[1] && (mergeIdx == 1))
>> - m_entropyCoder.codeSaoMerge(1);
>> -
>> - rate = m_entropyCoder.getNumberOfWrittenBits();
>> - double mergeCost = mergeDist[mergeIdx + 1] +
>> (double)rate;
>> - if (mergeCost < bestCost)
>> - {
>> - SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP :
>> SAO_MERGE_LEFT;
>> - bestCost = mergeCost;
>> - m_entropyCoder.store(m_rdContexts.temp);
>> - for (int plane = 0; plane < 3; plane++)
>> - {
>> - mergeSaoParam[plane][mergeIdx].mergeMode =
>> mergeMode;
>> - if (saoParam->bSaoFlag[plane > 0])
>> -
>> copySaoUnit(&saoParam->ctuParam[plane][addr],
>> &mergeSaoParam[plane][mergeIdx]);
>> - }
>> - }
>> - }
>> -
>> - if (saoParam->ctuParam[0][addr].typeIdx < 0)
>> - m_numNoSao[0]++;
>> - if (m_chromaFormat != X265_CSP_I400 &&
>> saoParam->ctuParam[1][addr].typeIdx < 0)
>> - m_numNoSao[1]++;
>> -
>> - m_entropyCoder.load(m_rdContexts.temp);
>> - m_entropyCoder.store(m_rdContexts.cur);
>> - }
>> - }
>> -}
>> -
>> void SAO::rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX,
>> int addr)
>> {
>> - SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
>> double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda};
>> const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; //
>> left, up
>>
>> - const int addrUp = rowBaseAddr ? addr - m_numCuInWidth : -1;
>> - const int addrLeft = idxX ? addr - 1 : -1;
>> + const int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ?
>> addr - m_numCuInWidth : -1)};// left, up
>>
>> bool chroma = m_param->internalCsp != X265_CSP_I400;
>> int planes = chroma ? 3 : 1;
>> @@ -1425,45 +1269,14 @@
>> calcSaoStatsCu(addr, 1);
>> calcSaoStatsCu(addr, 2);
>> saoStatsInitialOffset(1);
>> - saoStatsInitialOffset(2);
>> +// saoStatsInitialOffset(2);
>> }
>>
>> - double mergeDist[NUM_MERGE_MODE] = {0.0, 0.0, 0.0};
>> + double mergeDist[NUM_MERGE_MODE] = { 0.0 };
>> saoLumaComponentParamDist(saoParam, addr, mergeDist);
>> if (chroma)
>> saoChromaComponentParamDist(saoParam, addr, mergeDist);
>>
>> - // merge left or merge up
>> - for (int plane = 0; plane < planes; plane++)
>> - {
>> - for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
>> - {
>> - SaoCtuParam* mergeSrcParam = NULL;
>> - if (addrLeft >= 0 && mergeIdx == 0)
>> - mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]);
>> - else if (addrUp >= 0 && mergeIdx == 1)
>> - mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
>> - if (mergeSrcParam)
>> - {
>> - int64_t estDist = 0;
>> - int typeIdx = mergeSrcParam->typeIdx;
>> - if (typeIdx >= 0)
>> - {
>> - int bandPos = (typeIdx == SAO_BO) ?
>> mergeSrcParam->bandPos : 0;
>> - for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
>> classIdx++)
>> - {
>> - int mergeOffset =
>> mergeSrcParam->offset[classIdx];
>> - estDist +=
>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset,
>> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
>> - }
>> - }
>> -
>> - copySaoUnit(&mergeSaoParam[plane][mergeIdx],
>> mergeSrcParam);
>> - mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ?
>> SAO_MERGE_UP : SAO_MERGE_LEFT;
>> - mergeDist[mergeIdx + 1] += ((double)estDist /
>> lambda[plane]);
>> - }
>> - }
>> - }
>> -
>> if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
>> {
>> // Cost of new SAO_params
>> @@ -1483,12 +1296,31 @@
>> double bestCost = mergeDist[0] + (double)rate;
>> m_entropyCoder.store(m_rdContexts.temp);
>>
>> - // Cost of Merge
>> + // Cost of merge left or Up
>> for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
>> {
>> if (!allowMerge[mergeIdx])
>> continue;
>>
>> + for (int plane = 0; plane < 3; plane++)
>> + {
>> + int64_t estDist = 0;
>> + SaoCtuParam* mergeSrcParam =
>> &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
>> + int typeIdx = mergeSrcParam->typeIdx;
>> + if (typeIdx >= 0)
>> + {
>> + int bandPos = (typeIdx == SAO_BO) ?
>> mergeSrcParam->bandPos : 0;
>> + for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
>> classIdx++)
>> + {
>> + int mergeOffset =
>> mergeSrcParam->offset[classIdx];
>> + estDist +=
>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset,
>> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
>> + }
>> + }
>> +
>> + mergeDist[mergeIdx + 1] += ((double)estDist /
>> lambda[plane]);
>> + }
>> +
>> +
>> m_entropyCoder.load(m_rdContexts.cur);
>> m_entropyCoder.resetBits();
>> if (allowMerge[0])
>> @@ -1505,9 +1337,17 @@
>> m_entropyCoder.store(m_rdContexts.temp);
>> for (int plane = 0; plane < planes; plane++)
>> {
>> - mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
>> if (saoParam->bSaoFlag[plane > 0])
>> - copySaoUnit(&saoParam->ctuParam[plane][addr],
>> &mergeSaoParam[plane][mergeIdx]);
>> + {
>> + SaoCtuParam* dstCtuParam =
>> &saoParam->ctuParam[plane][addr];
>> + SaoCtuParam* mergeSrcParam =
>> &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
>> + dstCtuParam->mergeMode = mergeMode;
>> + dstCtuParam->typeIdx = mergeSrcParam->typeIdx;
>> + dstCtuParam->bandPos = mergeSrcParam->bandPos;
>> +
>> + for (int i = 0; i < SAO_NUM_OFFSET; i++)
>> + dstCtuParam->offset[i] =
>> mergeSrcParam->offset[i];
>> + }
>> }
>> }
>> }
>> @@ -1524,75 +1364,81 @@
>>
>> // Rounds the division of initial offsets by the number of samples in
>> // each of the statistics table entries.
>> -void SAO::saoStatsInitialOffset(int plane)
>> +void SAO::saoStatsInitialOffset(int planes)
>> {
>> // EO
>> - for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>> + for (int plane = planes; plane <= 2*planes; plane++)
>> {
>> - for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
>> + for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>> {
>> - int32_t count = m_count[plane][typeIdx][classIdx];
>> - int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
>> - int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
>> + for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
>> + {
>> + int32_t count = m_count[plane][typeIdx][classIdx];
>> + int32_t& offsetOrg =
>> m_offsetOrg[plane][typeIdx][classIdx];
>> + int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
>> +
>> + if (count)
>> + {
>> + offsetOut = roundIBDI(offsetOrg, count <<
>> SAO_BIT_INC);
>> + offsetOut = x265_clip3(-OFFSET_THRESH + 1,
>> OFFSET_THRESH - 1, offsetOut);
>> +
>> + if (classIdx < 3)
>> + offsetOut = X265_MAX(offsetOut, 0);
>> + else
>> + offsetOut = X265_MIN(offsetOut, 0);
>> + }
>> + }
>> + }
>> + }
>> +
>> + // BO
>> + for (int plane = planes; plane <= 2*planes; plane++)
>> + {
>> + for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1;
>> classIdx++)
>> + {
>> + int32_t count = m_count[plane][SAO_BO][classIdx];
>> + int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
>> + int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx];
>>
>> if (count)
>> {
>> offsetOut = roundIBDI(offsetOrg, count << SAO_BIT_INC);
>> offsetOut = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH
>> - 1, offsetOut);
>> -
>> - if (classIdx < 3)
>> - offsetOut = X265_MAX(offsetOut, 0);
>> - else
>> - offsetOut = X265_MIN(offsetOut, 0);
>> }
>> }
>> }
>> - // BO
>> - for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
>> - {
>> - int32_t count = m_count[plane][SAO_BO][classIdx];
>> - int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
>> - int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx];
>> -
>> - if (count)
>> - {
>> - offsetOut = roundIBDI(offsetOrg, count << SAO_BIT_INC);
>> - offsetOut = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH -
>> 1, offsetOut);
>> - }
>> - }
>> }
>>
>> -inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda,
>> int offset, int32_t count, int32_t offsetOrg, int32_t*
>> currentDistortionTableBo, double* currentRdCostTableBo)
>> +inline int SAO::estIterOffset(int typeIdx, double lambda, int offset,
>> int32_t count, int32_t offsetOrg, int& distBOClasses, double& costBOClasses)
>> {
>> - int offsetOut = 0;
>> + int bestOffset = 0;
>>
>> - // Assuming sending quantized value 0 results in zero offset and
>> sending the value zero needs 1 bit. entropy coder can be used to measure
>> the exact rate here.
>> - double tempMinCost = lambda;
>> + // Assuming sending quantized value 0 results in zero offset and
>> sending the value zero needs 1 bit.
>> + // entropy coder can be used to measure the exact rate here.
>> + double bestCost = lambda;
>> while (offset != 0)
>> {
>> // Calculate the bits required for signalling the offset
>> - int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) :
>> (abs(offset) + 1);
>> + int rate = (typeIdx == SAO_BO) ? (abs(offset) + 2) :
>> (abs(offset) + 1);
>> if (abs(offset) == OFFSET_THRESH - 1)
>> - tempRate--;
>> + rate--;
>>
>> // Do the dequntization before distorion calculation
>> - int tempOffset = offset << SAO_BIT_INC;
>> - int64_t tempDist = estSaoDist(count, tempOffset, offsetOrg);
>> - double tempCost = ((double)tempDist + lambda *
>> (double)tempRate);
>> - if (tempCost < tempMinCost)
>> + int64_t dist = estSaoDist(count, offset << SAO_BIT_INC,
>> offsetOrg);
>> + double cost = ((double)dist + lambda * (double)rate);
>> + if (cost < bestCost)
>> {
>> - tempMinCost = tempCost;
>> - offsetOut = offset;
>> + bestCost = cost;
>> + bestOffset = offset;
>> if (typeIdx == SAO_BO)
>> {
>> - currentDistortionTableBo[classIdx - 1] = (int)tempDist;
>> - currentRdCostTableBo[classIdx - 1] = tempCost;
>> + distBOClasses = (int)dist;
>> + costBOClasses = bestCost;
>> }
>> }
>> offset = (offset > 0) ? (offset - 1) : (offset + 1);
>> }
>> -
>> - return offsetOut;
>> + return bestOffset;
>> }
>>
>> void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr,
>> double* mergeDist)
>> @@ -1602,8 +1448,8 @@
>>
>> SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
>>
>> - int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
>> - double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
>> + int distBOClasses[MAX_NUM_SAO_CLASS];
>> + double costBOClasses[MAX_NUM_SAO_CLASS];
>>
>> m_entropyCoder.load(m_rdContexts.temp);
>> m_entropyCoder.resetBits();
>> @@ -1621,15 +1467,9 @@
>> int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
>> int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
>>
>> - if (count)
>> - {
>> - offsetOut = estIterOffset(typeIdx, classIdx,
>> m_lumaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> - }
>> - else
>> - {
>> - offsetOrg = 0;
>> - offsetOut = 0;
>> - }
>> + if (offsetOut)
>> + offsetOut = estIterOffset(typeIdx, m_lumaLambda,
>> offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]);
>> +
>> estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC,
>> offsetOrg);
>> }
>>
>> @@ -1665,44 +1505,37 @@
>> int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
>> int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
>>
>> - currentDistortionTableBo[classIdx - 1] = 0;
>> - currentRdCostTableBo[classIdx - 1] = m_lumaLambda;
>> + distBOClasses[classIdx - 1] = 0;
>> + costBOClasses[classIdx - 1] = m_lumaLambda;
>>
>> - if (count)
>> - {
>> - offsetOut = estIterOffset(SAO_BO, classIdx, m_lumaLambda,
>> offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> - }
>> - else
>> - {
>> - offsetOrg = 0;
>> - offsetOut = 0;
>> - }
>> + if (offsetOut)
>> + offsetOut = estIterOffset(SAO_BO, m_lumaLambda, offsetOut,
>> count, offsetOrg, distBOClasses[classIdx - 1], costBOClasses[classIdx - 1]);
>> }
>>
>> // Estimate Best Position
>> - double bestRDCostTableBo = MAX_DOUBLE;
>> - int bestClassTableBo = 0;
>> + double bestRDCostBO = MAX_DOUBLE;
>> + int bestClassBO = 0;
>>
>> for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
>> {
>> double currentRDCost = 0.0;
>> for (int j = i; j < i + SAO_BO_LEN; j++)
>> - currentRDCost += currentRdCostTableBo[j];
>> + currentRDCost += costBOClasses[j];
>>
>> - if (currentRDCost < bestRDCostTableBo)
>> + if (currentRDCost < bestRDCostBO)
>> {
>> - bestRDCostTableBo = currentRDCost;
>> - bestClassTableBo = i;
>> + bestRDCostBO = currentRDCost;
>> + bestClassBO = i;
>> }
>> }
>>
>> estDist = 0;
>> - for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo +
>> SAO_BO_LEN; classIdx++)
>> - estDist += currentDistortionTableBo[classIdx];
>> + for (int classIdx = bestClassBO; classIdx < bestClassBO +
>> SAO_BO_LEN; classIdx++)
>> + estDist += distBOClasses[classIdx];
>>
>> m_entropyCoder.load(m_rdContexts.temp);
>> m_entropyCoder.resetBits();
>> - m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] +
>> (bestClassTableBo + 1), bestClassTableBo, 0);
>> + m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + (bestClassBO +
>> 1), bestClassBO, 0);
>>
>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> double cost = (double)estDist + m_lumaLambda * (double)estRate;
>> @@ -1714,9 +1547,9 @@
>>
>> lclCtuParam->mergeMode = SAO_MERGE_NONE;
>> lclCtuParam->typeIdx = SAO_BO;
>> - lclCtuParam->bandPos = bestClassTableBo;
>> + lclCtuParam->bandPos = bestClassBO;
>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> - lclCtuParam->offset[classIdx] =
>> (int)m_offset[0][SAO_BO][classIdx + bestClassTableBo + 1];
>> + lclCtuParam->offset[classIdx] =
>> (int)m_offset[0][SAO_BO][classIdx + bestClassBO + 1];
>> }
>>
>> mergeDist[0] = ((double)bestDist / m_lumaLambda);
>> @@ -1732,9 +1565,9 @@
>>
>> SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr],
>> &saoParam->ctuParam[2][addr] };
>>
>> - double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
>> - int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
>> - int bestClassTableBo[2] = { 0, 0 };
>> + double costBOClasses[MAX_NUM_SAO_CLASS];
>> + int distBOClasses[MAX_NUM_SAO_CLASS];
>> + int bestClassBO[2] = { 0, 0 };
>>
>> m_entropyCoder.load(m_rdContexts.temp);
>> m_entropyCoder.resetBits();
>> @@ -1754,15 +1587,8 @@
>> int32_t& offsetOrg =
>> m_offsetOrg[compIdx][typeIdx][classIdx];
>> int32_t& offsetOut =
>> m_offset[compIdx][typeIdx][classIdx];
>>
>> - if (count)
>> - {
>> - offsetOut = estIterOffset(typeIdx, classIdx,
>> m_chromaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> - }
>> - else
>> - {
>> - offsetOrg = 0;
>> - offsetOut = 0;
>> - }
>> + if (offsetOut)
>> + offsetOut = estIterOffset(typeIdx, m_chromaLambda,
>> offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]);
>>
>> estDist[compIdx - 1] += estSaoDist(count, (int)offsetOut
>> << SAO_BIT_INC, offsetOrg);
>> }
>> @@ -1772,9 +1598,7 @@
>> m_entropyCoder.resetBits();
>>
>> for (int compIdx = 0; compIdx < 2; compIdx++)
>> - {
>> m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx +
>> 1][typeIdx] + 1, typeIdx, compIdx + 1);
>> - }
>>
>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda
>> * (double)estRate;
>> @@ -1805,7 +1629,7 @@
>> // Estimate Best Position
>> for (int compIdx = 1; compIdx < 3; compIdx++)
>> {
>> - double bestRDCostTableBo = MAX_DOUBLE;
>> + double bestRDCostBO = MAX_DOUBLE;
>>
>> for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1;
>> classIdx++)
>> {
>> @@ -1813,45 +1637,36 @@
>> int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
>> int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
>>
>> - currentDistortionTableBo[classIdx - 1] = 0;
>> - currentRdCostTableBo[classIdx - 1] = m_chromaLambda;
>> + distBOClasses[classIdx - 1] = 0;
>> + costBOClasses[classIdx - 1] = m_chromaLambda;
>>
>> - if (count)
>> - {
>> - offsetOut = estIterOffset(SAO_BO, classIdx,
>> m_chromaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> - }
>> - else
>> - {
>> - offsetOrg = 0;
>> - offsetOut = 0;
>> - }
>> + if (offsetOut)
>> + offsetOut = estIterOffset(SAO_BO, m_chromaLambda,
>> offsetOut, count, offsetOrg, distBOClasses[classIdx - 1],
>> costBOClasses[classIdx - 1]);
>> }
>>
>> for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
>> {
>> double currentRDCost = 0.0;
>> for (int j = i; j < i + SAO_BO_LEN; j++)
>> - currentRDCost += currentRdCostTableBo[j];
>> + currentRDCost += costBOClasses[j];
>>
>> - if (currentRDCost < bestRDCostTableBo)
>> + if (currentRDCost < bestRDCostBO)
>> {
>> - bestRDCostTableBo = currentRDCost;
>> - bestClassTableBo[compIdx - 1] = i;
>> + bestRDCostBO = currentRDCost;
>> + bestClassBO[compIdx - 1] = i;
>> }
>> }
>>
>> estDist[compIdx - 1] = 0;
>> - for (int classIdx = bestClassTableBo[compIdx - 1]; classIdx <
>> bestClassTableBo[compIdx - 1] + SAO_BO_LEN; classIdx++)
>> - estDist[compIdx - 1] += currentDistortionTableBo[classIdx];
>> + for (int classIdx = bestClassBO[compIdx - 1]; classIdx <
>> bestClassBO[compIdx - 1] + SAO_BO_LEN; classIdx++)
>> + estDist[compIdx - 1] += distBOClasses[classIdx];
>> }
>>
>> m_entropyCoder.load(m_rdContexts.temp);
>> m_entropyCoder.resetBits();
>>
>> for (int compIdx = 0; compIdx < 2; compIdx++)
>> - {
>> - m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] +
>> (bestClassTableBo[compIdx] + 1), bestClassTableBo[compIdx], compIdx + 1);
>> - }
>> + m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] +
>> (bestClassBO[compIdx] + 1), bestClassBO[compIdx], compIdx + 1);
>>
>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda *
>> (double)estRate;
>> @@ -1865,9 +1680,9 @@
>> {
>> lclCtuParam[compIdx]->mergeMode = SAO_MERGE_NONE;
>> lclCtuParam[compIdx]->typeIdx = SAO_BO;
>> - lclCtuParam[compIdx]->bandPos = bestClassTableBo[compIdx];
>> + lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx];
>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> - lclCtuParam[compIdx]->offset[classIdx] =
>> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassTableBo[compIdx] +
>> 1];
>> + lclCtuParam[compIdx]->offset[classIdx] =
>> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx] + 1];
>> }
>> }
>>
>> diff -r add820b8cde0 -r 0f89b83c6fe6 source/encoder/sao.h
>> --- a/source/encoder/sao.h Mon Jan 18 21:20:57 2016 +0530
>> +++ b/source/encoder/sao.h Mon Jan 18 21:21:25 2016 +0530
>> @@ -136,7 +136,7 @@
>> void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX);
>> void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int
>> idxX);
>>
>> - void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam*
>> saoUnitSrc);
>> +// void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam*
>> saoUnitSrc);
>>
>> void calcSaoStatsCu(int addr, int plane);
>> void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
>> @@ -144,10 +144,10 @@
>> void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double*
>> mergeDist);
>> void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>> double* mergeDist);
>>
>> - inline int estIterOffset(int typeIdx, int classIdx, double lambda,
>> int offset, int32_t count, int32_t offsetOrg,
>> - int32_t* currentDistortionTableBo, double*
>> currentRdCostTableBo);
>> + inline int estIterOffset(int typeIdx, double lambda, int offset,
>> int32_t count, int32_t offsetOrg,
>> + int& currentDistortionTableBo, double&
>> currentRdCostTableBo);
>> void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
>> - void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
>> +// void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
>> void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int
>> addr);
>>
>> void saoStatsInitialOffset(int plane);
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160122/d6e7f708/attachment-0001.html>
More information about the x265-devel
mailing list