[x265] [PATCH] [OUTPUT CHANGED]SAO: convert sao rdo cost calculation from float to int

Ashok Kumar Mishra ashok at multicorewareinc.com
Thu May 26 14:24:32 CEST 2016


this is a output changing patch. is there any other issue apart from
changing output?

On Thu, May 26, 2016 at 5:46 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:

>
>
> On Wed, May 4, 2016 at 7:39 PM, <ashok at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
>> # Date 1462355258 -19800
>> #      Wed May 04 15:17:38 2016 +0530
>> # Node ID 70a0888d0703a35b0c3c3a57f96931d0767eb470
>> # Parent  9f27620a948b67498056246b97db72bebac99218
>> [OUTPUT CHANGED]SAO: convert sao rdo cost calculation from float to int
>>
>> diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.cpp
>> --- a/source/encoder/sao.cpp    Mon Apr 25 13:39:54 2016 +0530
>> +++ b/source/encoder/sao.cpp    Wed May 04 15:17:38 2016 +0530
>> @@ -53,7 +53,7 @@
>>      return r;
>>  }
>>
>> -inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)
>> +inline int64_t estSaoDist(int32_t count, int32_t offset, int32_t
>> offsetOrg)
>>  {
>>      return (count * offset - offsetOrg * 2) * offset;
>>  }
>> @@ -1193,7 +1193,7 @@
>>      const CUData* cu = m_frame->m_encData->getPicCTU(addr);
>>      int qp = cu->m_qp[0];
>>
>> -    double lambda[2] = {0.0};
>> +    int64_t lambda[2] = { 0 };
>>
>>      int qpCb = qp;
>>      if (m_param->internalCsp == X265_CSP_I420)
>> @@ -1201,8 +1201,8 @@
>>      else
>>          qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0],
>> QP_MAX_SPEC);
>>
>> -    lambda[0] = x265_lambda2_tab[qp];
>> -    lambda[1] = x265_lambda2_tab[qpCb]; // Use Cb QP for SAO chroma
>> +    lambda[0] = (int64_t)floor(256.0 * x265_lambda2_tab[qp]);
>> +    lambda[1] = (int64_t)floor(256.0 * x265_lambda2_tab[qpCb]); // Use
>> Cb QP for SAO chroma
>>
>>      const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; //
>> left, up
>>
>> @@ -1250,8 +1250,8 @@
>>      m_entropyCoder.store(m_rdContexts.temp);
>>
>>      // Estimate distortion and cost of new SAO params
>> -    double bestCost = 0.0;
>> -    double rateDist = 0.0;
>> +    int64_t bestCost = 0;
>> +    int64_t rateDist = 0;
>>      // Estimate distortion and cost of new SAO params
>>      saoLumaComponentParamDist(saoParam, addr, rateDist, lambda,
>> bestCost);
>>      if (chroma)
>> @@ -1265,7 +1265,7 @@
>>              if (!allowMerge[mergeIdx])
>>                  continue;
>>
>> -            double mergeDist = 0;
>> +            int64_t mergeDist = 0;
>>              for (int plane = 0; plane < planes; plane++)
>>              {
>>                  int64_t estDist = 0;
>> @@ -1280,7 +1280,7 @@
>>                          estDist +=
>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset,
>> m_offsetOrg[plane][typeIdx][classIdx + bandPos]);
>>                      }
>>                  }
>> -                mergeDist += ((double)estDist / lambda[!!plane]);
>> +                mergeDist += (estDist / (lambda[!!plane] >> 8));
>>              }
>>
>>              m_entropyCoder.load(m_rdContexts.cur);
>> @@ -1290,8 +1290,8 @@
>>              if (allowMerge[1] && (mergeIdx == 1))
>>                  m_entropyCoder.codeSaoMerge(1);
>>
>> -            int32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> -            double mergeCost = mergeDist + (double)estRate;
>> +            uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> +            int64_t mergeCost = mergeDist + estRate;
>>              if (mergeCost < bestCost)
>>              {
>>                  SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP :
>> SAO_MERGE_LEFT;
>> @@ -1337,7 +1337,7 @@
>>          {
>>              for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1;
>> classIdx++)
>>              {
>> -                int32_t  count     = m_count[plane][typeIdx][classIdx];
>> +                int32_t&  count     = m_count[plane][typeIdx][classIdx];
>>                  int32_t& offsetOrg =
>> m_offsetOrg[plane][typeIdx][classIdx];
>>                  int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
>>
>> @@ -1360,7 +1360,7 @@
>>      {
>>          for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
>>          {
>> -            int32_t  count     = m_count[plane][SAO_BO][classIdx];
>> +            int32_t&  count     = m_count[plane][SAO_BO][classIdx];
>>              int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
>>              int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx];
>>
>> @@ -1373,14 +1373,27 @@
>>      }
>>  }
>>
>> -void SAO::estIterOffset(int typeIdx, double lambda, int32_t count,
>> int32_t offsetOrg, int& offset, int& distClasses, double& costClasses)
>> +inline int64_t SAO::calcSaoRdoCost(int64_t distortion, uint32_t bits,
>> int64_t lambda)
>> +{
>> +#if X265_DEPTH < 10
>> +        X265_CHECK(bits <= (INT64_MAX - 128) / lambda,
>> +                   "calcRdCost wrap detected dist: %u, bits %u, lambda:
>> " X265_LL "\n",
>> +                   distortion, bits, lambda);
>> +#else
>> +        X265_CHECK(bits <= (INT64_MAX - 128) / lambda2,
>> +                   "calcRdCost wrap detected dist: " X265_LL ", bits %u,
>> lambda: " X265_LL "\n",
>> +                   distortion, bits, lambda);
>>
>
> Fails smoke tests - did you mean lambda here?
>
>
>> +#endif
>> +        return distortion + ((bits * lambda + 128) >> 8);
>> +}
>> +void SAO::estIterOffset(int typeIdx, int64_t lambda, int32_t count,
>> int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t&
>> costClasses)
>>  {
>>      int bestOffset = 0;
>>      distClasses    = 0;
>>
>>      // Assuming sending quantized value 0 results in zero offset and
>> sending the value zero needs 1 bit.
>>      // entropy coder can be used to measure the exact rate here.
>> -    double bestCost = lambda;
>> +    int64_t bestCost = calcSaoRdoCost(0, 1, lambda);
>>      while (offset != 0)
>>      {
>>          // Calculate the bits required for signalling the offset
>> @@ -1390,7 +1403,7 @@
>>
>>          // Do the dequntization before distorion calculation
>>          int64_t dist = estSaoDist(count, offset << SAO_BIT_INC,
>> offsetOrg);
>> -        double cost  = ((double)dist + lambda * (double)rate);
>> +        int64_t cost  = calcSaoRdoCost(dist, rate, lambda);
>>          if (cost < bestCost)
>>          {
>>              bestCost = cost;
>> @@ -1404,22 +1417,23 @@
>>      offset = bestOffset;
>>  }
>>
>> -void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr,
>> double& rateDist, double* lambda, double &bestCost)
>> +void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr,
>> int64_t& rateDist, int64_t* lambda, int64_t &bestCost)
>>  {
>>      int64_t bestDist = 0;
>>      int bestTypeIdx = -1;
>>
>>      SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
>>
>> -    int    distClasses[MAX_NUM_SAO_CLASS];
>> -    double costClasses[MAX_NUM_SAO_CLASS];
>> +    int32_t distClasses[MAX_NUM_SAO_CLASS];
>> +    int64_t costClasses[MAX_NUM_SAO_CLASS];
>>
>>      // RDO SAO_NA
>>      m_entropyCoder.load(m_rdContexts.temp);
>>      m_entropyCoder.resetBits();
>>      m_entropyCoder.codeSaoType(0);
>>
>> -    double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() *
>> lambda[0];
>> +    uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>> +    int64_t costPartBest = calcSaoRdoCost(0, rate, lambda[0]);
>>
>>      //EO distortion calculation
>>      for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>> @@ -1427,7 +1441,7 @@
>>          int64_t estDist = 0;
>>          for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
>>          {
>> -            int32_t  count     = m_count[0][typeIdx][classIdx];
>> +            int32_t&  count     = m_count[0][typeIdx][classIdx];
>>              int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
>>              int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
>>
>> @@ -1441,12 +1455,12 @@
>>          m_entropyCoder.resetBits();
>>          m_entropyCoder.codeSaoOffsetEO(m_offset[0][typeIdx] + 1,
>> typeIdx, 0);
>>
>> -        uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> -        double cost = (double)estDist + lambda[0] * (double)estRate;
>> +        uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>> +        int64_t cost = calcSaoRdoCost(estDist, rate, lambda[0]);
>>
>> -        if (cost < dCostPartBest)
>> +        if (cost < costPartBest)
>>          {
>> -            dCostPartBest = cost;
>> +            costPartBest = cost;
>>              bestDist = estDist;
>>              bestTypeIdx = typeIdx;
>>          }
>> @@ -1458,14 +1472,14 @@
>>          lclCtuParam->typeIdx = bestTypeIdx;
>>          lclCtuParam->bandPos = 0;
>>          for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> -            lclCtuParam->offset[classIdx] =
>> (int)m_offset[0][bestTypeIdx][classIdx + 1];
>> +            lclCtuParam->offset[classIdx] =
>> m_offset[0][bestTypeIdx][classIdx + 1];
>>      }
>>
>>      //BO RDO
>>      int64_t estDist = 0;
>>      for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
>>      {
>> -        int32_t  count     = m_count[0][SAO_BO][classIdx];
>> +        int32_t&  count     = m_count[0][SAO_BO][classIdx];
>>          int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
>>          int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
>>
>> @@ -1473,12 +1487,12 @@
>>      }
>>
>>      // Estimate Best Position
>> -    double bestRDCostBO = MAX_DOUBLE;
>> -    int    bestClassBO  = 0;
>> +    int64_t bestRDCostBO = MAX_INT64;
>> +    int32_t bestClassBO  = 0;
>>
>>      for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
>>      {
>> -        double currentRDCost = 0.0;
>> +        int64_t currentRDCost = 0;
>>          for (int j = i; j < i + SAO_NUM_OFFSET; j++)
>>              currentRDCost += costClasses[j];
>>
>> @@ -1498,21 +1512,21 @@
>>      m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO,
>> bestClassBO, 0);
>>
>>      uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> -    double cost = (double)estDist + lambda[0] * (double)estRate;
>> +    int64_t cost = calcSaoRdoCost(estDist, estRate, lambda[0]);
>>
>> -    if (cost < dCostPartBest)
>> +    if (cost < costPartBest)
>>      {
>> -        dCostPartBest = cost;
>> +        costPartBest = cost;
>>          bestDist = estDist;
>>
>>          lclCtuParam->mergeMode = SAO_MERGE_NONE;
>>          lclCtuParam->typeIdx = SAO_BO;
>>          lclCtuParam->bandPos = bestClassBO;
>>          for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> -            lclCtuParam->offset[classIdx] =
>> (int)m_offset[0][SAO_BO][classIdx + bestClassBO];
>> +            lclCtuParam->offset[classIdx] = m_offset[0][SAO_BO][classIdx
>> + bestClassBO];
>>      }
>>
>> -    rateDist = ((double)bestDist / lambda[0]);
>> +    rateDist = bestDist / (lambda[0] >> 8);
>>      m_entropyCoder.load(m_rdContexts.temp);
>>      m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
>>      m_entropyCoder.store(m_rdContexts.temp);
>> @@ -1520,26 +1534,27 @@
>>      if (m_param->internalCsp == X265_CSP_I400)
>>      {
>>          uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>> -        bestCost = rateDist + (double)rate;
>> +        bestCost = rateDist + rate;
>>      }
>>  }
>>
>> -void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>> double& rateDist, double* lambda, double &bestCost)
>> +void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr,
>> int64_t& rateDist, int64_t* lambda, int64_t &bestCost)
>>  {
>>      int64_t bestDist = 0;
>>      int bestTypeIdx = -1;
>>
>>      SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr],
>> &saoParam->ctuParam[2][addr] };
>>
>> -    double costClasses[MAX_NUM_SAO_CLASS];
>> -    int    distClasses[MAX_NUM_SAO_CLASS];
>> -    int    bestClassBO[2] = { 0, 0 };
>> +    int64_t costClasses[MAX_NUM_SAO_CLASS];
>> +    int32_t distClasses[MAX_NUM_SAO_CLASS];
>> +    int32_t bestClassBO[2] = { 0, 0 };
>>
>>      m_entropyCoder.load(m_rdContexts.temp);
>>      m_entropyCoder.resetBits();
>>      m_entropyCoder.codeSaoType(0);
>>
>> -    double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() *
>> lambda[1];
>> +    uint32_t bits = m_entropyCoder.getNumberOfWrittenBits();
>> +    int64_t costPartBest = calcSaoRdoCost(0, bits, lambda[1]);
>>
>>      //EO RDO
>>      for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>> @@ -1549,7 +1564,7 @@
>>          {
>>              for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1;
>> classIdx++)
>>              {
>> -                int32_t  count = m_count[compIdx][typeIdx][classIdx];
>> +                int32_t& count = m_count[compIdx][typeIdx][classIdx];
>>                  int32_t& offsetOrg =
>> m_offsetOrg[compIdx][typeIdx][classIdx];
>>                  int32_t& offsetOut =
>> m_offset[compIdx][typeIdx][classIdx];
>>
>> @@ -1566,11 +1581,11 @@
>>              m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx +
>> 1][typeIdx] + 1, typeIdx, compIdx + 1);
>>
>>          uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> -        double cost = (double)(estDist[0] + estDist[1]) + lambda[1] *
>> (double)estRate;
>> +        int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]),
>> estRate, lambda[1]);
>>
>> -        if (cost < dCostPartBest)
>> +        if (cost < costPartBest)
>>          {
>> -            dCostPartBest = cost;
>> +            costPartBest = cost;
>>              bestDist = (estDist[0] + estDist[1]);
>>              bestTypeIdx = typeIdx;
>>          }
>> @@ -1584,7 +1599,7 @@
>>              lclCtuParam[compIdx]->typeIdx = bestTypeIdx;
>>              lclCtuParam[compIdx]->bandPos = 0;
>>              for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> -                lclCtuParam[compIdx]->offset[classIdx] =
>> (int)m_offset[compIdx + 1][bestTypeIdx][classIdx + 1];
>> +                lclCtuParam[compIdx]->offset[classIdx] =
>> m_offset[compIdx + 1][bestTypeIdx][classIdx + 1];
>>          }
>>      }
>>
>> @@ -1594,11 +1609,11 @@
>>      // Estimate Best Position
>>      for (int compIdx = 1; compIdx < 3; compIdx++)
>>      {
>> -        double bestRDCostBO = MAX_DOUBLE;
>> +        int64_t bestRDCostBO = MAX_INT64;
>>
>>          for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
>>          {
>> -            int32_t  count = m_count[compIdx][SAO_BO][classIdx];
>> +            int32_t&  count = m_count[compIdx][SAO_BO][classIdx];
>>              int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
>>              int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
>>
>> @@ -1607,7 +1622,7 @@
>>
>>          for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
>>          {
>> -            double currentRDCost = 0.0;
>> +            int64_t currentRDCost = 0;
>>              for (int j = i; j < i + SAO_NUM_OFFSET; j++)
>>                  currentRDCost += costClasses[j];
>>
>> @@ -1630,11 +1645,11 @@
>>          m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] +
>> bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1);
>>
>>      uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>> -    double cost = (double)(estDist[0] + estDist[1]) + lambda[1] *
>> (double)estRate;
>> +    int64_t cost = calcSaoRdoCost((estDist[0] + estDist[1]), estRate,
>> lambda[1]);
>>
>> -    if (cost < dCostPartBest)
>> +    if (cost < costPartBest)
>>      {
>> -        dCostPartBest = cost;
>> +        costPartBest = cost;
>>          bestDist = (estDist[0] + estDist[1]);
>>
>>          for (int compIdx = 0; compIdx < 2; compIdx++)
>> @@ -1643,11 +1658,11 @@
>>              lclCtuParam[compIdx]->typeIdx = SAO_BO;
>>              lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx];
>>              for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> -                lclCtuParam[compIdx]->offset[classIdx] =
>> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];
>> +                lclCtuParam[compIdx]->offset[classIdx] =
>> m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];
>>          }
>>      }
>>
>> -    rateDist += ((double)bestDist / lambda[1]);
>> +    rateDist += (bestDist / (lambda[1] >> 8));
>>      m_entropyCoder.load(m_rdContexts.temp);
>>
>>      if (saoParam->bSaoFlag[1])
>> @@ -1657,12 +1672,12 @@
>>          m_entropyCoder.store(m_rdContexts.temp);
>>
>>          uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>> -        bestCost = rateDist + (double)rate;
>> +        bestCost = rateDist + rate;
>>      }
>>      else
>>      {
>>          uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>> -        bestCost = rateDist + (double)rate;
>> +        bestCost = rateDist + rate;
>>      }
>>  }
>>
>> diff -r 9f27620a948b -r 70a0888d0703 source/encoder/sao.h
>> --- a/source/encoder/sao.h      Mon Apr 25 13:39:54 2016 +0530
>> +++ b/source/encoder/sao.h      Wed May 04 15:17:38 2016 +0530
>> @@ -126,12 +126,13 @@
>>      void calcSaoStatsCu(int addr, int plane);
>>      void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
>>
>> -    void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double&
>> rateDist, double* lambda, double &bestCost);
>> -    void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>> double& rateDist, double* lambda, double &bestCost);
>> +    void saoLumaComponentParamDist(SAOParam* saoParam, int addr,
>> int64_t& rateDist, int64_t* lambda, int64_t& bestCost);
>> +    void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>> int64_t& rateDist, int64_t* lambda, int64_t& bestCost);
>>
>> -    void estIterOffset(int typeIdx, double lambda, int32_t count,
>> int32_t offsetOrg, int& offset, int& distClasses, double& costClasses);
>> +    void estIterOffset(int typeIdx, int64_t lambda, int32_t count,
>> int32_t offsetOrg, int32_t& offset, int32_t& distClasses, int64_t&
>> costClasses);
>>      void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
>>      void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int
>> addr);
>> +    int64_t calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t
>> lambda);
>>
>>      void saoStatsInitialOffset(int planes);
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
>
> --
> Deepthi Nandakumar
> Engineering Manager, x265
> Multicoreware, Inc
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160526/6afdf2a8/attachment-0001.html>


More information about the x265-devel mailing list