[x265] [PATCH] [OUTPUT CHANGED]SAO: convert sao rdo cost calculation from float to int
Ashok Kumar Mishra
ashok at multicorewareinc.com
Thu May 26 14:35:22 CEST 2016
The reason for output change is quite obvious. lambda value, cost
calculation and comparison are
on fixed point.
On Thu, May 26, 2016 at 5:54 PM, Ashok Kumar Mishra <
ashok at multicorewareinc.com> wrote:
> this is a output changing patch. is there any other issue apart from
> changing output?
>
> On Thu, May 26, 2016 at 5:46 PM, Deepthi Nandakumar <
> deepthi at multicorewareinc.com> wrote:
>
>>
>>
>> On Wed, May 4, 2016 at 7:39 PM, <ashok at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
>>> # Date 1462355258 -19800
>>> # Wed May 04 15:17:38 2016 +0530
>>> # Node ID 70a0888d0703a35b0c3c3a57f96931d0767eb470
>>> # Parent 9f27620a948b67498056246b97db72bebac99218
>>> [OUTPUT CHANGED]SAO: convert sao rdo cost calculation from float to int
>>>
>>> diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.cpp
>>> --- a/source/encoder/sao.cpp Mon Apr 25 13:39:54 2016 +0530
>>> +++ b/source/encoder/sao.cpp Wed May 04 15:17:38 2016 +0530
>>> @@ -53,7 +53,7 @@
>>> return r;
>>> }
>>>
>>> -inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)
>>> +inline int64_t estSaoDist(int32_t count, int32_t offset, int32_t
>>> offsetOrg)
>>> {
>>> return (count * offset - offsetOrg * 2) * offset;
>>> }
>>> @@ -1193,7 +1193,7 @@
>>> const CUData* cu = m_frame->m_encData->getPicCTU(addr);
>>> int qp = cu->m_qp[0];
>>>
>>> - double lambda[2] = {0.0};
>>> + int64_t lambda[2] = { 0 };
>>>
>>> int qpCb = qp;
>>> if (m_param->internalCsp == X265_CSP_I420)
>>> @@ -1201,8 +1201,8 @@
>>> else
>>> qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0],
>>> QP_MAX_SPEC);
>>>
>>> - lambda[0] = x265_lambda2_tab[qp];
>>> - lambda[1] = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma
>>> + lambda[0] = (int64_t)floor(256.0 * x265_lambda2_tab[qp]);
>>> + lambda[1] = (int64_t)floor(256.0 * x265_lambda2_tab[qpCb]); // Use
>>> Cb QP for SAO chroma
>>>
>>> const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; //
>>> left, up
>>>
>>> @@ -1250,8 +1250,8 @@
>>> m_entropyCoder.store(m_rdContexts.temp);
>>>
>>> // Estimate distortion and cost of new SAO params
>>> - double bestCost = 0.0;
>>> - double rateDist = 0.0;
>>> + int64_t bestCost = 0;
>>> + int64_t rateDist = 0;
>>> // Estimate distortion and cost of new SAO params
>>> saoLumaComponentParamDist(saoParam, addr, rateDist, lambda,
>>> bestCost);
>>> if (chroma)
>>> @@ -1265,7 +1265,7 @@
>>> if (!allowMerge[mergeIdx])
>>> continue;
>>>
>>> - double mergeDist = 0;
>>> + int64_t mergeDist = 0;
>>> for (int plane = 0; plane < planes; plane++)
>>> {
>>> int64_t estDist = 0;
>>> @@ -1280,7 +1280,7 @@
>>> estDist +=
>>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset,
>>> m_offsetOrg[plane][typeIdx][classIdx + bandPos]);
>>> }
>>> }
>>> - mergeDist += ((double)estDist / lambda[!!plane]);
>>> + mergeDist += (estDist / (lambda[!!plane] >> 8));
>>> }
>>>
>>> m_entropyCoder.load(m_rdContexts.cur);
>>> @@ -1290,8 +1290,8 @@
>>> if (allowMerge[1] && (mergeIdx == 1))
>>> m_entropyCoder.codeSaoMerge(1);
>>>
>>> - int32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>> - double mergeCost = mergeDist + (double)estRate;
>>> + uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>> + int64_t mergeCost = mergeDist + estRate;
>>> if (mergeCost < bestCost)
>>> {
>>> SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP :
>>> SAO_MERGE_LEFT;
>>> @@ -1337,7 +1337,7 @@
>>> {
>>> for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1;
>>> classIdx++)
>>> {
>>> - int32_t count = m_count[plane][typeIdx][classIdx];
>>> + int32_t& count = m_count[plane][typeIdx][classIdx];
>>> int32_t& offsetOrg =
>>> m_offsetOrg[plane][typeIdx][classIdx];
>>> int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
>>>
>>> @@ -1360,7 +1360,7 @@
>>> {
>>> for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
>>> {
>>> - int32_t count = m_count[plane][SAO_BO][classIdx];
>>> + int32_t& count = m_count[plane][SAO_BO][classIdx];
>>> int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
>>> int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx];
>>>
>>> @@ -1373,14 +1373,27 @@
>>> }
>>> }
>>>
>>> -void SAO::estIterOffset(int typeIdx, double lambda, int32_t count,
>>> int32_t offsetOrg, int& offset, int& distClasses, double& costClasses)
>>> +inline int64_t SAO::calcSaoRdoCost(int64_t distortion, uint32_t bits,
>>> int64_t lambda)
>>> +{
>>> +#if X265_DEPTH < 10
>>> + X265_CHECK(bits <= (INT64_MAX - 128) / lambda,
>>> + "calcRdCost wrap detected dist: %u, bits %u, lambda:
>>> " X265_LL "\n",
>>> + distortion, bits, lambda);
>>> +#else
>>> + X265_CHECK(bits <= (INT64_MAX - 128) / lambda2,
>>> + "calcRdCost wrap detected dist: " X265_LL ", bits
>>> %u, lambda: " X265_LL "\n",
>>> + distortion, bits, lambda);
>>>
>>
>> Fails smoke tests - did you mean lambda here?
>>
>>
>>> +#endif
>>> + return distortion + ((bits * lambda + 128) >> 8);
>>> +}
>>> +void SAO::estIterOffset(int typeIdx, int64_t lambda, int32_t count,
>>> int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t&
>>> costClasses)
>>> {
>>> int bestOffset = 0;
>>> distClasses = 0;
>>>
>>> // Assuming sending quantized value 0 results in zero offset and
>>> sending the value zero needs 1 bit.
>>> // entropy coder can be used to measure the exact rate here.
>>> - double bestCost = lambda;
>>> + int64_t bestCost = calcSaoRdoCost(0, 1, lambda);
>>> while (offset != 0)
>>> {
>>> // Calculate the bits required for signalling the offset
>>> @@ -1390,7 +1403,7 @@
>>>
>>> // Do the dequntization before distorion calculation
>>> int64_t dist = estSaoDist(count, offset << SAO_BIT_INC,
>>> offsetOrg);
>>> - double cost = ((double)dist + lambda * (double)rate);
>>> + int64_t cost = calcSaoRdoCost(dist, rate, lambda);
>>> if (cost < bestCost)
>>> {
>>> bestCost = cost;
>>> @@ -1404,22 +1417,23 @@
>>> offset = bestOffset;
>>> }
>>>
>>> -void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr,
>>> double& rateDist, double* lambda, double &bestCost)
>>> +void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr,
>>> int64_t& rateDist, int64_t* lambda, int64_t &bestCost)
>>> {
>>> int64_t bestDist = 0;
>>> int bestTypeIdx = -1;
>>>
>>> SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
>>>
>>> - int distClasses[MAX_NUM_SAO_CLASS];
>>> - double costClasses[MAX_NUM_SAO_CLASS];
>>> + int32_t distClasses[MAX_NUM_SAO_CLASS];
>>> + int64_t costClasses[MAX_NUM_SAO_CLASS];
>>>
>>> // RDO SAO_NA
>>> m_entropyCoder.load(m_rdContexts.temp);
>>> m_entropyCoder.resetBits();
>>> m_entropyCoder.codeSaoType(0);
>>>
>>> - double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() *
>>> lambda[0];
>>> + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>>> + int64_t costPartBest = calcSaoRdoCost(0, rate, lambda[0]);
>>>
>>> //EO distortion calculation
>>> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>>> @@ -1427,7 +1441,7 @@
>>> int64_t estDist = 0;
>>> for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1;
>>> classIdx++)
>>> {
>>> - int32_t count = m_count[0][typeIdx][classIdx];
>>> + int32_t& count = m_count[0][typeIdx][classIdx];
>>> int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
>>> int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
>>>
>>> @@ -1441,12 +1455,12 @@
>>> m_entropyCoder.resetBits();
>>> m_entropyCoder.codeSaoOffsetEO(m_offset[0][typeIdx] + 1,
>>> typeIdx, 0);
>>>
>>> - uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>> - double cost = (double)estDist + lambda[0] * (double)estRate;
>>> + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>>> + int64_t cost = calcSaoRdoCost(estDist, rate, lambda[0]);
>>>
>>> - if (cost < dCostPartBest)
>>> + if (cost < costPartBest)
>>> {
>>> - dCostPartBest = cost;
>>> + costPartBest = cost;
>>> bestDist = estDist;
>>> bestTypeIdx = typeIdx;
>>> }
>>> @@ -1458,14 +1472,14 @@
>>> lclCtuParam->typeIdx = bestTypeIdx;
>>> lclCtuParam->bandPos = 0;
>>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>>> - lclCtuParam->offset[classIdx] =
>>> (int)m_offset[0][bestTypeIdx][classIdx + 1];
>>> + lclCtuParam->offset[classIdx] =
>>> m_offset[0][bestTypeIdx][classIdx + 1];
>>> }
>>>
>>> //BO RDO
>>> int64_t estDist = 0;
>>> for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
>>> {
>>> - int32_t count = m_count[0][SAO_BO][classIdx];
>>> + int32_t& count = m_count[0][SAO_BO][classIdx];
>>> int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
>>> int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
>>>
>>> @@ -1473,12 +1487,12 @@
>>> }
>>>
>>> // Estimate Best Position
>>> - double bestRDCostBO = MAX_DOUBLE;
>>> - int bestClassBO = 0;
>>> + int64_t bestRDCostBO = MAX_INT64;
>>> + int32_t bestClassBO = 0;
>>>
>>> for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
>>> {
>>> - double currentRDCost = 0.0;
>>> + int64_t currentRDCost = 0;
>>> for (int j = i; j < i + SAO_NUM_OFFSET; j++)
>>> currentRDCost += costClasses[j];
>>>
>>> @@ -1498,21 +1512,21 @@
>>> m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO,
>>> bestClassBO, 0);
>>>
>>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>> - double cost = (double)estDist + lambda[0] * (double)estRate;
>>> + int64_t cost = calcSaoRdoCost(estDist, estRate, lambda[0]);
>>>
>>> - if (cost < dCostPartBest)
>>> + if (cost < costPartBest)
>>> {
>>> - dCostPartBest = cost;
>>> + costPartBest = cost;
>>> bestDist = estDist;
>>>
>>> lclCtuParam->mergeMode = SAO_MERGE_NONE;
>>> lclCtuParam->typeIdx = SAO_BO;
>>> lclCtuParam->bandPos = bestClassBO;
>>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>>> - lclCtuParam->offset[classIdx] =
>>> (int)m_offset[0][SAO_BO][classIdx + bestClassBO];
>>> + lclCtuParam->offset[classIdx] =
>>> m_offset[0][SAO_BO][classIdx + bestClassBO];
>>> }
>>>
>>> - rateDist = ((double)bestDist / lambda[0]);
>>> + rateDist = bestDist / (lambda[0] >> 8);
>>> m_entropyCoder.load(m_rdContexts.temp);
>>> m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
>>> m_entropyCoder.store(m_rdContexts.temp);
>>> @@ -1520,26 +1534,27 @@
>>> if (m_param->internalCsp == X265_CSP_I400)
>>> {
>>> uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>>> - bestCost = rateDist + (double)rate;
>>> + bestCost = rateDist + rate;
>>> }
>>> }
>>>
>>> -void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>>> double& rateDist, double* lambda, double &bestCost)
>>> +void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr,
>>> int64_t& rateDist, int64_t* lambda, int64_t &bestCost)
>>> {
>>> int64_t bestDist = 0;
>>> int bestTypeIdx = -1;
>>>
>>> SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr],
>>> &saoParam->ctuParam[2][addr] };
>>>
>>> - double costClasses[MAX_NUM_SAO_CLASS];
>>> - int distClasses[MAX_NUM_SAO_CLASS];
>>> - int bestClassBO[2] = { 0, 0 };
>>> + int64_t costClasses[MAX_NUM_SAO_CLASS];
>>> + int32_t distClasses[MAX_NUM_SAO_CLASS];
>>> + int32_t bestClassBO[2] = { 0, 0 };
>>>
>>> m_entropyCoder.load(m_rdContexts.temp);
>>> m_entropyCoder.resetBits();
>>> m_entropyCoder.codeSaoType(0);
>>>
>>> - double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() *
>>> lambda[1];
>>> + uint32_t bits = m_entropyCoder.getNumberOfWrittenBits();
>>> + int64_t costPartBest = calcSaoRdoCost(0, bits, lambda[1]);
>>>
>>> //EO RDO
>>> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>>> @@ -1549,7 +1564,7 @@
>>> {
>>> for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1;
>>> classIdx++)
>>> {
>>> - int32_t count = m_count[compIdx][typeIdx][classIdx];
>>> + int32_t& count = m_count[compIdx][typeIdx][classIdx];
>>> int32_t& offsetOrg =
>>> m_offsetOrg[compIdx][typeIdx][classIdx];
>>> int32_t& offsetOut =
>>> m_offset[compIdx][typeIdx][classIdx];
>>>
>>> @@ -1566,11 +1581,11 @@
>>> m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx +
>>> 1][typeIdx] + 1, typeIdx, compIdx + 1);
>>>
>>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>> - double cost = (double)(estDist[0] + estDist[1]) + lambda[1] *
>>> (double)estRate;
>>> + int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]),
>>> estRate, lambda[1]);
>>>
>>> - if (cost < dCostPartBest)
>>> + if (cost < costPartBest)
>>> {
>>> - dCostPartBest = cost;
>>> + costPartBest = cost;
>>> bestDist = (estDist[0] + estDist[1]);
>>> bestTypeIdx = typeIdx;
>>> }
>>> @@ -1584,7 +1599,7 @@
>>> lclCtuParam[compIdx]->typeIdx = bestTypeIdx;
>>> lclCtuParam[compIdx]->bandPos = 0;
>>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
>>> classIdx++)
>>> - lclCtuParam[compIdx]->offset[classIdx] =
>>> (int)m_offset[compIdx + 1][bestTypeIdx][classIdx + 1];
>>> + lclCtuParam[compIdx]->offset[classIdx] =
>>> m_offset[compIdx + 1][bestTypeIdx][classIdx + 1];
>>> }
>>> }
>>>
>>> @@ -1594,11 +1609,11 @@
>>> // Estimate Best Position
>>> for (int compIdx = 1; compIdx < 3; compIdx++)
>>> {
>>> - double bestRDCostBO = MAX_DOUBLE;
>>> + int64_t bestRDCostBO = MAX_INT64;
>>>
>>> for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
>>> {
>>> - int32_t count = m_count[compIdx][SAO_BO][classIdx];
>>> + int32_t& count = m_count[compIdx][SAO_BO][classIdx];
>>> int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
>>> int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
>>>
>>> @@ -1607,7 +1622,7 @@
>>>
>>> for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
>>> {
>>> - double currentRDCost = 0.0;
>>> + int64_t currentRDCost = 0;
>>> for (int j = i; j < i + SAO_NUM_OFFSET; j++)
>>> currentRDCost += costClasses[j];
>>>
>>> @@ -1630,11 +1645,11 @@
>>> m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] +
>>> bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1);
>>>
>>> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>> - double cost = (double)(estDist[0] + estDist[1]) + lambda[1] *
>>> (double)estRate;
>>> + int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate,
>>> lambda[1]);
>>>
>>> - if (cost < dCostPartBest)
>>> + if (cost < costPartBest)
>>> {
>>> - dCostPartBest = cost;
>>> + costPartBest = cost;
>>> bestDist = (estDist[0] + estDist[1]);
>>>
>>> for (int compIdx = 0; compIdx < 2; compIdx++)
>>> @@ -1643,11 +1658,11 @@
>>> lclCtuParam[compIdx]->typeIdx = SAO_BO;
>>> lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx];
>>> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
>>> classIdx++)
>>> - lclCtuParam[compIdx]->offset[classIdx] =
>>> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];
>>> + lclCtuParam[compIdx]->offset[classIdx] =
>>> m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];
>>> }
>>> }
>>>
>>> - rateDist += ((double)bestDist / lambda[1]);
>>> + rateDist += (bestDist / (lambda[1] >> 8));
>>> m_entropyCoder.load(m_rdContexts.temp);
>>>
>>> if (saoParam->bSaoFlag[1])
>>> @@ -1657,12 +1672,12 @@
>>> m_entropyCoder.store(m_rdContexts.temp);
>>>
>>> uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>>> - bestCost = rateDist + (double)rate;
>>> + bestCost = rateDist + rate;
>>> }
>>> else
>>> {
>>> uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>>> - bestCost = rateDist + (double)rate;
>>> + bestCost = rateDist + rate;
>>> }
>>> }
>>>
>>> diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.h
>>> --- a/source/encoder/sao.h Mon Apr 25 13:39:54 2016 +0530
>>> +++ b/source/encoder/sao.h Wed May 04 15:17:38 2016 +0530
>>> @@ -126,12 +126,13 @@
>>> void calcSaoStatsCu(int addr, int plane);
>>> void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
>>>
>>> - void saoLumaComponentParamDist(SAOParam* saoParam, int addr,
>>> double& rateDist, double* lambda, double &bestCost);
>>> - void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>>> double& rateDist, double* lambda, double &bestCost);
>>> + void saoLumaComponentParamDist(SAOParam* saoParam, int addr,
>>> int64_t& rateDist, int64_t* lambda, int64_t& bestCost);
>>> + void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>>> int64_t& rateDist, int64_t* lambda, int64_t& bestCost);
>>>
>>> - void estIterOffset(int typeIdx, double lambda, int32_t count,
>>> int32_t offsetOrg, int& offset, int& distClasses, double& costClasses);
>>> + void estIterOffset(int typeIdx, int64_t lambda, int32_t count,
>>> int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t&
>>> costClasses);
>>> void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
>>> void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX,
>>> int addr);
>>> + int64_t calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t
>>> lambda);
>>>
>>> void saoStatsInitialOffset(int planes);
>>>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>>
>> --
>> Deepthi Nandakumar
>> Engineering Manager, x265
>> Multicoreware, Inc
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160526/50510612/attachment-0001.html>
More information about the x265-devel
mailing list