[x265] [PATCH] SAO: fix for output mismatch in linux
Ashok Kumar Mishra
ashok at multicorewareinc.com
Fri Apr 22 15:28:40 CEST 2016
Kindly ignore this patch. Let me explain the reason for binary mismatch in
linux.
Even there is binary mismatch in release and debug mode in linux. It is all
because of double type comparison
to find the best band offset position among 32 sao bands. There is no error
or reading uninitialized memory. Only
sometimes the band offset is changing based on the comparison of band
costs. Very few pixels are changing keeping
SSIM and PSNR values remain same.
On Mon, Apr 18, 2016 at 4:34 PM, <ashok at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1460973467 -19800
> # Mon Apr 18 15:27:47 2016 +0530
> # Node ID e038716af1b84f81e0123e75e1d016af31a0c505
> # Parent 02d79be487d7f825c961d15535a8681a201da3b1
> SAO: fix for output mismatch in linux
>
> diff -r 02d79be487d7 -r e038716af1b8 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp Sun Apr 17 21:07:28 2016 +0000
> +++ b/source/encoder/sao.cpp Mon Apr 18 15:27:47 2016 +0530
> @@ -1249,13 +1249,12 @@
> m_entropyCoder.codeSaoMerge(0);
> m_entropyCoder.store(m_rdContexts.temp);
>
> - double mergeDist[NUM_MERGE_MODE] = { 0.0 };
> + // Estimate distortion and cost of new SAO params
> double bestCost = 0.0;
> -
> - // Estimate distortion and cost of new SAO params
> - saoLumaComponentParamDist(saoParam, addr, mergeDist, lambda,
> bestCost);
> - if (chroma && saoParam->bSaoFlag[1])
> - saoChromaComponentParamDist(saoParam, addr, mergeDist, lambda,
> bestCost);
> + double rateDist = 0.0;
> + saoLumaComponentParamDist(saoParam, addr, rateDist, lambda, bestCost);
> + if (chroma)
> + saoChromaComponentParamDist(saoParam, addr, rateDist, lambda,
> bestCost);
>
> if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
> {
> @@ -1265,6 +1264,7 @@
> if (!allowMerge[mergeIdx])
> continue;
>
> + double mergeDist = 0;
> for (int plane = 0; plane < 3; plane++)
> {
> int64_t estDist = 0;
> @@ -1280,7 +1280,7 @@
> }
> }
>
> - mergeDist[mergeIdx + 1] += ((double)estDist /
> lambda[!!plane]);
> + mergeDist += ((double)estDist / lambda[!!plane]);
> }
>
> m_entropyCoder.load(m_rdContexts.cur);
> @@ -1290,8 +1290,8 @@
> if (allowMerge[1] && (mergeIdx == 1))
> m_entropyCoder.codeSaoMerge(1);
>
> - int32_t rate = m_entropyCoder.getNumberOfWrittenBits();
> - double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
> + uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> + double mergeCost = mergeDist + (double)estRate;
> if (mergeCost < bestCost)
> {
> SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP :
> SAO_MERGE_LEFT;
> @@ -1374,7 +1374,7 @@
> }
> }
>
> -void SAO::estIterOffset(int typeIdx, double lambda, int32_t count,
> int32_t offsetOrg, int& offset, int& distClasses, double& costClasses)
> +inline void SAO::estIterOffset(int typeIdx, double lambda, int& offset,
> int32_t count, int32_t offsetOrg, int& distClasses, double& costClasses)
> {
> int bestOffset = 0;
> distClasses = 0;
> @@ -1405,15 +1405,13 @@
> offset = bestOffset;
> }
>
> -void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, double*
> mergeDist, double* lambda, double &bestCost)
> +void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, double&
> rateDist, double* lambda, double &bestCost)
> {
> int64_t bestDist = 0;
> int bestTypeIdx = -1;
>
> SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
>
> - int distClasses[MAX_NUM_SAO_CLASS];
> - double costClasses[MAX_NUM_SAO_CLASS];
>
> // RDO SAO_NA
> m_entropyCoder.load(m_rdContexts.temp);
> @@ -1423,6 +1421,8 @@
> double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() *
> lambda[0];
>
> //EO distortion calculation
> + int distEOClasses;
> + double costEOClasses;
> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
> {
> int64_t estDist = 0;
> @@ -1431,11 +1431,11 @@
> int32_t count = m_count[0][typeIdx][classIdx];
> int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
> int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
> + if (count)
> + estIterOffset(typeIdx, lambda[0], offsetOut, count,
> offsetOrg, distEOClasses, costEOClasses);
>
> - estIterOffset(typeIdx, lambda[0], count, offsetOrg,
> offsetOut, distClasses[classIdx], costClasses[classIdx]);
> -
> - //Calculate distortion
> - estDist += distClasses[classIdx];
> + estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC,
> offsetOrg);
> +
> }
>
> m_entropyCoder.load(m_rdContexts.temp);
> @@ -1463,6 +1463,8 @@
> }
>
> //BO RDO
> + int distBOClasses[MAX_NUM_SAO_CLASS];
> + double costBOClasses[MAX_NUM_SAO_CLASS];
> int64_t estDist = 0;
> for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
> {
> @@ -1470,7 +1472,7 @@
> int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
> int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
>
> - estIterOffset(SAO_BO, lambda[0], count, offsetOrg, offsetOut,
> distClasses[classIdx], costClasses[classIdx]);
> + estIterOffset(SAO_BO, lambda[0], offsetOut, count, offsetOrg,
> distBOClasses[classIdx], costBOClasses[classIdx]);
> }
>
> // Estimate Best Position
> @@ -1481,7 +1483,7 @@
> {
> double currentRDCost = 0.0;
> for (int j = i; j < i + SAO_NUM_OFFSET; j++)
> - currentRDCost += costClasses[j];
> + currentRDCost += costBOClasses[j];
>
> if (currentRDCost < bestRDCostBO)
> {
> @@ -1492,7 +1494,7 @@
>
> estDist = 0;
> for (int classIdx = bestClassBO; classIdx < bestClassBO +
> SAO_NUM_OFFSET; classIdx++)
> - estDist += distClasses[classIdx];
> + estDist += distBOClasses[classIdx];
>
> m_entropyCoder.load(m_rdContexts.temp);
> m_entropyCoder.resetBits();
> @@ -1513,33 +1515,35 @@
> lclCtuParam->offset[classIdx] =
> (int)m_offset[0][SAO_BO][classIdx + bestClassBO];
> }
>
> - mergeDist[0] = ((double)bestDist / lambda[0]);
> + rateDist = ((double)bestDist / lambda[0]);
> m_entropyCoder.load(m_rdContexts.temp);
> m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
> m_entropyCoder.store(m_rdContexts.temp);
>
> - uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
> - bestCost = mergeDist[0] + (double)rate;
> + if (m_param->internalCsp == X265_CSP_I400)
> + {
> + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
> + bestCost = rateDist + (double)rate;
> + }
> }
>
> -void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr,
> double* mergeDist, double* lambda, double &bestCost)
> +void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr,
> double& rateDist, double* lambda, double &bestCost)
> {
> int64_t bestDist = 0;
> int bestTypeIdx = -1;
>
> SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr],
> &saoParam->ctuParam[2][addr] };
>
> - double costClasses[MAX_NUM_SAO_CLASS];
> - int distClasses[MAX_NUM_SAO_CLASS];
> - int bestClassBO[2] = { 0, 0 };
>
> m_entropyCoder.load(m_rdContexts.temp);
> m_entropyCoder.resetBits();
> m_entropyCoder.codeSaoType(0);
>
> - double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() *
> lambda[1];
> + double costPartBest = m_entropyCoder.getNumberOfWrittenBits() *
> lambda[1];
>
> //EO RDO
> + double costEOClasses;
> + int distEOClasses;
> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
> {
> int64_t estDist[2] = {0, 0};
> @@ -1551,9 +1555,10 @@
> int32_t& offsetOrg =
> m_offsetOrg[compIdx][typeIdx][classIdx];
> int32_t& offsetOut = m_offset[compIdx][typeIdx][classIdx];
>
> - estIterOffset(typeIdx, lambda[1], count, offsetOrg,
> offsetOut, distClasses[classIdx], costClasses[classIdx]);
> + if (count)
> + estIterOffset(typeIdx, lambda[1], offsetOut, count,
> offsetOrg, distEOClasses, costEOClasses);
>
> - estDist[compIdx - 1] += distClasses[classIdx];
> + estDist[compIdx - 1] += estSaoDist(count, (int)offsetOut
> << SAO_BIT_INC, offsetOrg);
> }
> }
>
> @@ -1566,9 +1571,9 @@
> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> double cost = (double)(estDist[0] + estDist[1]) + lambda[1] *
> (double)estRate;
>
> - if (cost < dCostPartBest)
> + if (cost < costPartBest)
> {
> - dCostPartBest = cost;
> + costPartBest = cost;
> bestDist = (estDist[0] + estDist[1]);
> bestTypeIdx = typeIdx;
> }
> @@ -1587,6 +1592,9 @@
> }
>
> // BO RDO
> + double costBOClasses[MAX_NUM_SAO_CLASS];
> + int distBOClasses[MAX_NUM_SAO_CLASS];
> + int bestClassBO[2] = { 0, 0 };
> int64_t estDist[2];
>
> // Estimate Best Position
> @@ -1600,14 +1608,14 @@
> int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
> int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
>
> - estIterOffset(SAO_BO, lambda[1], count, offsetOrg, offsetOut,
> distClasses[classIdx], costClasses[classIdx]);
> + estIterOffset(SAO_BO, lambda[1], offsetOut, count, offsetOrg,
> distBOClasses[classIdx], costBOClasses[classIdx]);
> }
>
> for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
> {
> double currentRDCost = 0.0;
> for (int j = i; j < i + SAO_NUM_OFFSET; j++)
> - currentRDCost += costClasses[j];
> + currentRDCost += costBOClasses[j];
>
> if (currentRDCost < bestRDCostBO)
> {
> @@ -1618,7 +1626,7 @@
>
> estDist[compIdx - 1] = 0;
> for (int classIdx = bestClassBO[compIdx - 1]; classIdx <
> bestClassBO[compIdx - 1] + SAO_NUM_OFFSET; classIdx++)
> - estDist[compIdx - 1] += distClasses[classIdx];
> + estDist[compIdx - 1] += distBOClasses[classIdx];
> }
>
> m_entropyCoder.load(m_rdContexts.temp);
> @@ -1630,9 +1638,9 @@
> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> double cost = (double)(estDist[0] + estDist[1]) + lambda[1] *
> (double)estRate;
>
> - if (cost < dCostPartBest)
> + if (cost < costPartBest)
> {
> - dCostPartBest = cost;
> + costPartBest = cost;
> bestDist = (estDist[0] + estDist[1]);
>
> for (int compIdx = 0; compIdx < 2; compIdx++)
> @@ -1645,15 +1653,20 @@
> }
> }
>
> - mergeDist[0] += ((double)bestDist / lambda[1]);
> + rateDist += ((double)bestDist / lambda[1]);
> m_entropyCoder.load(m_rdContexts.temp);
> + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
> + if (saoParam->bSaoFlag[1])
> + {
> + m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
> + m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
> + m_entropyCoder.store(m_rdContexts.temp);
>
> - m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
> - m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
> - m_entropyCoder.store(m_rdContexts.temp);
> -
> - uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
> - bestCost = mergeDist[0] + (double)rate;
> + rate = m_entropyCoder.getNumberOfWrittenBits();
> + bestCost = rateDist + (double)rate;
> + }
> + else
> + bestCost = rateDist + (double)rate;
> }
>
> // NOTE: must put in namespace X265_NS since we need class SAO
> diff -r 02d79be487d7 -r e038716af1b8 source/encoder/sao.h
> --- a/source/encoder/sao.h Sun Apr 17 21:07:28 2016 +0000
> +++ b/source/encoder/sao.h Mon Apr 18 15:27:47 2016 +0530
> @@ -46,7 +46,6 @@
> class SAO
> {
> public:
> -
> enum { SAO_MAX_DEPTH = 4 };
> enum { SAO_BO_BITS = 5 };
> enum { MAX_NUM_SAO_CLASS = 32 };
> @@ -54,7 +53,6 @@
> enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
> enum { NUM_EDGETYPE = 5 };
> enum { NUM_PLANE = 3 };
> - enum { NUM_MERGE_MODE = 3 };
> enum { SAO_DEPTHRATE_SIZE = 4 };
>
> static const uint32_t s_eoTable[NUM_EDGETYPE];
> @@ -127,10 +125,11 @@
> void calcSaoStatsCu(int addr, int plane);
> void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
>
> - void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double*
> mergeDist, double* lambda, double &bestCost);
> - void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
> double* mergeDist, double* lambda, double &bestCost);
> + void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double&
> rateDist, double* lambda, double &bestCost);
> + void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
> double& rateDist, double* lambda, double &bestCost);
>
> - void estIterOffset(int typeIdx, double lambda, int32_t count, int32_t
> offsetOrg, int& offset, int& distClasses, double& costClasses);
> + void estIterOffset(int typeIdx, double lambda, int& offset, int32_t
> count, int32_t offsetOrg,
> + int& distClasses, double& costClasses);
> void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
> void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int
> addr);
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160422/f78bfb2f/attachment-0001.html>
More information about the x265-devel
mailing list