[x265] [PATCH] SAO: perform merge distortion and cost calculation in one loop

Ashok Kumar Mishra ashok at multicorewareinc.com
Fri Jan 22 16:01:41 CET 2016


As expected there is not much change in performance, since SAO is very less
time consuming module compared to other modules like look-ahead, motion
estimation etc.
And I have modified only one function in SAO. I have tested one 4k
video(tearsofsteel-4k-1000f-s214.y4m) with medium preset which has 1214
frames using intel vtune.

*Before: 2545,02 (total cpu time)*
*After:    2536,95 (total CPU time)*

Thanks
Ashok

On Mon, Jan 18, 2016 at 9:39 PM, Pradeep Ramachandran <
pradeep at multicorewareinc.com> wrote:

> Ashok,
> What is the impact on performance that you expect from these changes to
> SAO? Can you run encodes with 4K videos in ABR mode for ultrafast, medium,
> and veryslow and share the impact of performance if we expect an impact
> (which I think there will be)?
>
> Pradeep Ramachandran, PhD
> Solution Architect at www.multicorewareinc.com/
> Adjunct Faculty at www.cse.iitm.ac.in/
> pradeeprama.info/
> Ph:   +91 99627 82018
>
> On Mon, Jan 18, 2016 at 9:21 PM, <ashok at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
>> # Date 1453132285 -19800
>> #      Mon Jan 18 21:21:25 2016 +0530
>> # Node ID 0f89b83c6fe64181296a51392ff52236dc6a934e
>> # Parent  add820b8cde0fc82a643fd32717def5a3e2f3888
>> SAO: perform merge distortion and cost calculation in one loop
>>
>> diff -r add820b8cde0 -r 0f89b83c6fe6 source/encoder/sao.cpp
>> --- a/source/encoder/sao.cpp    Mon Jan 18 21:20:57 2016 +0530
>> +++ b/source/encoder/sao.cpp    Mon Jan 18 21:21:25 2016 +0530
>> @@ -758,16 +758,6 @@
>>      std::swap(m_tmpL1[2], m_tmpL2[2]);
>>  }
>>
>> -void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam*
>> saoUnitSrc)
>> -{
>> -    saoUnitDst->mergeMode   = saoUnitSrc->mergeMode;
>> -    saoUnitDst->typeIdx     = saoUnitSrc->typeIdx;
>> -    saoUnitDst->bandPos     = saoUnitSrc->bandPos;
>> -
>> -    for (int i = 0; i < SAO_NUM_OFFSET; i++)
>> -        saoUnitDst->offset[i] = saoUnitSrc->offset[i];
>> -}
>> -
>>  /* Calculate SAO statistics for current CTU without non-crossing slice */
>>  void SAO::calcSaoStatsCu(int addr, int plane)
>>  {
>> @@ -1233,158 +1223,12 @@
>>          m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] =
>> m_numNoSao[1] / ((double)numctus);
>>  }
>>
>> -void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY)
>> -{
>> -    SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
>> -    double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda};
>> -    bool allowMerge[2]; // left, up
>> -    allowMerge[1] = (idxY > 0);
>> -
>> -    for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
>> -    {
>> -        int addr     = idxX + idxY * m_numCuInWidth;
>> -        int addrUp   = idxY ? addr - m_numCuInWidth : -1;
>> -        int addrLeft = idxX ? addr - 1 : -1;
>> -        allowMerge[0] = (idxX > 0);
>> -
>> -        m_entropyCoder.load(m_rdContexts.cur);
>> -        if (allowMerge[0])
>> -            m_entropyCoder.codeSaoMerge(0);
>> -        if (allowMerge[1])
>> -            m_entropyCoder.codeSaoMerge(0);
>> -        m_entropyCoder.store(m_rdContexts.temp);
>> -
>> -        // reset stats Y, Cb, Cr
>> -        X265_CHECK(sizeof(PerPlane) == (sizeof(int32_t) * (NUM_PLANE *
>> MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS)), "Found Padding space in struct
>> PerPlane");
>> -
>> -        // TODO: Confirm the address space is continuous
>> -        if (m_param->bSaoNonDeblocked)
>> -        {
>> -            memcpy(m_count, m_countPreDblk[addr], sizeof(m_count));
>> -            memcpy(m_offsetOrg, m_offsetOrgPreDblk[addr],
>> sizeof(m_offsetOrg));
>> -        }
>> -        else
>> -        {
>> -            memset(m_count, 0, sizeof(m_count));
>> -            memset(m_offsetOrg, 0, sizeof(m_offsetOrg));
>> -        }
>> -
>> -        saoParam->ctuParam[0][addr].reset();
>> -        saoParam->ctuParam[1][addr].reset();
>> -        saoParam->ctuParam[2][addr].reset();
>> -
>> -        if (saoParam->bSaoFlag[0])
>> -            calcSaoStatsCu(addr, 0);
>> -
>> -        if (saoParam->bSaoFlag[1])
>> -        {
>> -            calcSaoStatsCu(addr, 1);
>> -            calcSaoStatsCu(addr, 2);
>> -        }
>> -
>> -        double mergeDist[NUM_MERGE_MODE] = {0.0, 0.0, 0.0};
>> -        saoLumaComponentParamDist(saoParam, addr, mergeDist);
>> -        if (m_chromaFormat != X265_CSP_I400)
>> -            saoChromaComponentParamDist(saoParam, addr, mergeDist);
>> -
>> -        // merge left or merge up
>> -        for (int plane = 0; plane < 3; plane++)
>> -        {
>> -            for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
>> -            {
>> -                SaoCtuParam* mergeSrcParam = NULL;
>> -                if (addrLeft >= 0 && mergeIdx == 0)
>> -                    mergeSrcParam =
>> &(saoParam->ctuParam[plane][addrLeft]);
>> -                else if (addrUp >= 0 && mergeIdx == 1)
>> -                    mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
>> -                if (mergeSrcParam)
>> -                {
>> -                    int64_t estDist = 0;
>> -                    int typeIdx = mergeSrcParam->typeIdx;
>> -                    if (typeIdx >= 0)
>> -                    {
>> -                        int bandPos = (typeIdx == SAO_BO) ?
>> mergeSrcParam->bandPos : 0;
>> -                        for (int classIdx = 0; classIdx <
>> SAO_NUM_OFFSET; classIdx++)
>> -                        {
>> -                            int mergeOffset =
>> mergeSrcParam->offset[classIdx];
>> -                            estDist +=
>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset,
>> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
>> -                        }
>> -                    }
>> -
>> -                    copySaoUnit(&mergeSaoParam[plane][mergeIdx],
>> mergeSrcParam);
>> -                    mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx
>> ? SAO_MERGE_UP : SAO_MERGE_LEFT;
>> -                    mergeDist[mergeIdx + 1] += ((double)estDist /
>> lambda[plane]);
>> -                }
>> -            }
>> -        }
>> -
>> -        if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
>> -        {
>> -            // Cost of new SAO_params
>> -            m_entropyCoder.load(m_rdContexts.cur);
>> -            m_entropyCoder.resetBits();
>> -            if (allowMerge[0])
>> -                m_entropyCoder.codeSaoMerge(0);
>> -            if (allowMerge[1])
>> -                m_entropyCoder.codeSaoMerge(0);
>> -            for (int plane = 0; plane < 3; plane++)
>> -            {
>> -                if (saoParam->bSaoFlag[plane > 0])
>> -
>> m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
>> -            }
>> -
>> -            uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
>> -            double bestCost = mergeDist[0] + (double)rate;
>> -            m_entropyCoder.store(m_rdContexts.temp);
>> -
>> -            // Cost of Merge
>> -            for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
>> -            {
>> -                if (!allowMerge[mergeIdx])
>> -                    continue;
>> -
>> -                m_entropyCoder.load(m_rdContexts.cur);
>> -                m_entropyCoder.resetBits();
>> -                if (allowMerge[0])
>> -                    m_entropyCoder.codeSaoMerge(1 - mergeIdx);
>> -                if (allowMerge[1] && (mergeIdx == 1))
>> -                    m_entropyCoder.codeSaoMerge(1);
>> -
>> -                rate = m_entropyCoder.getNumberOfWrittenBits();
>> -                double mergeCost = mergeDist[mergeIdx + 1] +
>> (double)rate;
>> -                if (mergeCost < bestCost)
>> -                {
>> -                    SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP :
>> SAO_MERGE_LEFT;
>> -                    bestCost = mergeCost;
>> -                    m_entropyCoder.store(m_rdContexts.temp);
>> -                    for (int plane = 0; plane < 3; plane++)
>> -                    {
>> -                        mergeSaoParam[plane][mergeIdx].mergeMode =
>> mergeMode;
>> -                        if (saoParam->bSaoFlag[plane > 0])
>> -
>> copySaoUnit(&saoParam->ctuParam[plane][addr],
>> &mergeSaoParam[plane][mergeIdx]);
>> -                    }
>> -                }
>> -            }
>> -
>> -            if (saoParam->ctuParam[0][addr].typeIdx < 0)
>> -                m_numNoSao[0]++;
>> -            if (m_chromaFormat != X265_CSP_I400 &&
>> saoParam->ctuParam[1][addr].typeIdx < 0)
>> -                m_numNoSao[1]++;
>> -
>> -            m_entropyCoder.load(m_rdContexts.temp);
>> -            m_entropyCoder.store(m_rdContexts.cur);
>> -        }
>> -    }
>> -}
>> -
>>  void SAO::rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX,
>> int addr)
>>  {
>> -    SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
>>      double lambda[3] = {m_lumaLambda, m_chromaLambda, m_chromaLambda};
>>      const bool allowMerge[2] = {(idxX != 0), (rowBaseAddr != 0)}; //
>> left, up
>>
>> -    const int addrUp   = rowBaseAddr ? addr - m_numCuInWidth : -1;
>> -    const int addrLeft = idxX ? addr - 1 : -1;
>> +    const int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ?
>> addr - m_numCuInWidth : -1)};// left, up
>>
>>      bool chroma = m_param->internalCsp != X265_CSP_I400;
>>      int planes = chroma ? 3 : 1;
>> @@ -1425,45 +1269,14 @@
>>          calcSaoStatsCu(addr, 1);
>>          calcSaoStatsCu(addr, 2);
>>          saoStatsInitialOffset(1);
>> -        saoStatsInitialOffset(2);
>> +//        saoStatsInitialOffset(2);
>>      }
>>
>> -    double mergeDist[NUM_MERGE_MODE] = {0.0, 0.0, 0.0};
>> +    double mergeDist[NUM_MERGE_MODE] = { 0.0 };
>>      saoLumaComponentParamDist(saoParam, addr, mergeDist);
>>      if (chroma)
>>          saoChromaComponentParamDist(saoParam, addr, mergeDist);
>>
>> -    // merge left or merge up
>> -    for (int plane = 0; plane < planes; plane++)
>> -    {
>> -        for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
>> -        {
>> -            SaoCtuParam* mergeSrcParam = NULL;
>> -            if (addrLeft >= 0 && mergeIdx == 0)
>> -                mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]);
>> -            else if (addrUp >= 0 && mergeIdx == 1)
>> -                mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
>> -            if (mergeSrcParam)
>> -            {
>> -                int64_t estDist = 0;
>> -                int typeIdx = mergeSrcParam->typeIdx;
>> -                if (typeIdx >= 0)
>> -                {
>> -                    int bandPos = (typeIdx == SAO_BO) ?
>> mergeSrcParam->bandPos : 0;
>> -                    for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
>> classIdx++)
>> -                    {
>> -                        int mergeOffset =
>> mergeSrcParam->offset[classIdx];
>> -                        estDist +=
>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset,
>> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
>> -                    }
>> -                }
>> -
>> -                copySaoUnit(&mergeSaoParam[plane][mergeIdx],
>> mergeSrcParam);
>> -                mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ?
>> SAO_MERGE_UP : SAO_MERGE_LEFT;
>> -                mergeDist[mergeIdx + 1] += ((double)estDist /
>> lambda[plane]);
>> -            }
>> -        }
>> -    }
>> -
>>      if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
>>      {
>>          // Cost of new SAO_params
>> @@ -1483,12 +1296,31 @@
>>          double bestCost = mergeDist[0] + (double)rate;
>>          m_entropyCoder.store(m_rdContexts.temp);
>>
>> -        // Cost of Merge
>> +        // Cost of merge left or Up
>>          for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
>>          {
>>              if (!allowMerge[mergeIdx])
>>                  continue;
>>
>> +            for (int plane = 0; plane < 3; plane++)
>> +            {
>> +                int64_t estDist = 0;
>> +                SaoCtuParam* mergeSrcParam =
>> &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
>> +                int typeIdx = mergeSrcParam->typeIdx;
>> +                if (typeIdx >= 0)
>> +                {
>> +                    int bandPos = (typeIdx == SAO_BO) ?
>> mergeSrcParam->bandPos : 0;
>> +                    for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
>> classIdx++)
>> +                    {
>> +                        int mergeOffset =
>> mergeSrcParam->offset[classIdx];
>> +                        estDist +=
>> estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset,
>> m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
>> +                    }
>> +                }
>> +
>> +                mergeDist[mergeIdx + 1] += ((double)estDist /
>> lambda[plane]);
>> +            }
>> +
>> +
>>              m_entropyCoder.load(m_rdContexts.cur);
>>              m_entropyCoder.resetBits();
>>              if (allowMerge[0])
>> @@ -1505,9 +1337,17 @@
>>                  m_entropyCoder.store(m_rdContexts.temp);
>>                  for (int plane = 0; plane < planes; plane++)
>>                  {
>> -                    mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
>>                      if (saoParam->bSaoFlag[plane > 0])
>> -                        copySaoUnit(&saoParam->ctuParam[plane][addr],
>> &mergeSaoParam[plane][mergeIdx]);
>> +                    {
>> +                        SaoCtuParam* dstCtuParam   =
>> &saoParam->ctuParam[plane][addr];
>> +                        SaoCtuParam* mergeSrcParam =
>> &(saoParam->ctuParam[plane][addrMerge[mergeIdx]]);
>> +                        dstCtuParam->mergeMode = mergeMode;
>> +                        dstCtuParam->typeIdx   = mergeSrcParam->typeIdx;
>> +                        dstCtuParam->bandPos   = mergeSrcParam->bandPos;
>> +
>> +                        for (int i = 0; i < SAO_NUM_OFFSET; i++)
>> +                            dstCtuParam->offset[i] =
>> mergeSrcParam->offset[i];
>> +                    }
>>                  }
>>              }
>>          }
>> @@ -1524,75 +1364,81 @@
>>
>>  // Rounds the division of initial offsets by the number of samples in
>>  // each of the statistics table entries.
>> -void SAO::saoStatsInitialOffset(int plane)
>> +void SAO::saoStatsInitialOffset(int planes)
>>  {
>>      // EO
>> -    for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>> +    for (int plane = planes; plane <= 2*planes; plane++)
>>      {
>> -        for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
>> +        for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
>>          {
>> -            int32_t  count     = m_count[plane][typeIdx][classIdx];
>> -            int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
>> -            int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
>> +            for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
>> +            {
>> +                int32_t  count     = m_count[plane][typeIdx][classIdx];
>> +                int32_t& offsetOrg =
>> m_offsetOrg[plane][typeIdx][classIdx];
>> +                int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
>> +
>> +                if (count)
>> +                {
>> +                    offsetOut = roundIBDI(offsetOrg, count <<
>> SAO_BIT_INC);
>> +                    offsetOut = x265_clip3(-OFFSET_THRESH + 1,
>> OFFSET_THRESH - 1, offsetOut);
>> +
>> +                    if (classIdx < 3)
>> +                        offsetOut = X265_MAX(offsetOut, 0);
>> +                    else
>> +                        offsetOut = X265_MIN(offsetOut, 0);
>> +                }
>> +            }
>> +        }
>> +    }
>> +
>> +    // BO
>> +    for (int plane = planes; plane <= 2*planes; plane++)
>> +    {
>> +        for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1;
>> classIdx++)
>> +        {
>> +            int32_t  count     = m_count[plane][SAO_BO][classIdx];
>> +            int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
>> +            int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx];
>>
>>              if (count)
>>              {
>>                  offsetOut = roundIBDI(offsetOrg, count << SAO_BIT_INC);
>>                  offsetOut = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH
>> - 1, offsetOut);
>> -
>> -                if (classIdx < 3)
>> -                    offsetOut = X265_MAX(offsetOut, 0);
>> -                else
>> -                    offsetOut = X265_MIN(offsetOut, 0);
>>              }
>>          }
>>      }
>> -    // BO
>> -    for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
>> -    {
>> -        int32_t  count     = m_count[plane][SAO_BO][classIdx];
>> -        int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
>> -        int32_t& offsetOut = m_offset[plane][SAO_BO][classIdx];
>> -
>> -        if (count)
>> -        {
>> -            offsetOut = roundIBDI(offsetOrg, count << SAO_BIT_INC);
>> -            offsetOut = x265_clip3(-OFFSET_THRESH + 1, OFFSET_THRESH -
>> 1, offsetOut);
>> -        }
>> -    }
>>  }
>>
>> -inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda,
>> int offset, int32_t count, int32_t offsetOrg, int32_t*
>> currentDistortionTableBo, double* currentRdCostTableBo)
>> +inline int SAO::estIterOffset(int typeIdx, double lambda, int offset,
>> int32_t count, int32_t offsetOrg, int& distBOClasses, double& costBOClasses)
>>  {
>> -    int offsetOut = 0;
>> +    int bestOffset = 0;
>>
>> -    // Assuming sending quantized value 0 results in zero offset and
>> sending the value zero needs 1 bit. entropy coder can be used to measure
>> the exact rate here.
>> -    double tempMinCost = lambda;
>> +    // Assuming sending quantized value 0 results in zero offset and
>> sending the value zero needs 1 bit.
>> +    // entropy coder can be used to measure the exact rate here.
>> +    double bestCost = lambda;
>>      while (offset != 0)
>>      {
>>          // Calculate the bits required for signalling the offset
>> -        int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) :
>> (abs(offset) + 1);
>> +        int rate = (typeIdx == SAO_BO) ? (abs(offset) + 2) :
>> (abs(offset) + 1);
>>          if (abs(offset) == OFFSET_THRESH - 1)
>> -            tempRate--;
>> +            rate--;
>>
>>          // Do the dequntization before distorion calculation
>> -        int tempOffset = offset << SAO_BIT_INC;
>> -        int64_t tempDist  = estSaoDist(count, tempOffset, offsetOrg);
>> -        double tempCost   = ((double)tempDist + lambda *
>> (double)tempRate);
>> -        if (tempCost < tempMinCost)
>> +        int64_t dist = estSaoDist(count, offset << SAO_BIT_INC,
>> offsetOrg);
>> +        double cost  = ((double)dist + lambda * (double)rate);
>> +        if (cost < bestCost)
>>          {
>> -            tempMinCost = tempCost;
>> -            offsetOut = offset;
>> +            bestCost = cost;
>> +            bestOffset = offset;
>>              if (typeIdx == SAO_BO)
>>              {
>> -                currentDistortionTableBo[classIdx - 1] = (int)tempDist;
>> -                currentRdCostTableBo[classIdx - 1] = tempCost;
>> +                distBOClasses = (int)dist;
>> +                costBOClasses = bestCost;
>>              }
>>          }
>>          offset = (offset > 0) ? (offset - 1) : (offset + 1);
>>      }
>> -
>> -    return offsetOut;
>> +    return bestOffset;
>>  }
>>
>>  void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr,
>> double* mergeDist)
>> @@ -1602,8 +1448,8 @@
>>
>>      SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
>>
>> -    int    currentDistortionTableBo[MAX_NUM_SAO_CLASS];
>> -    double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
>> +    int    distBOClasses[MAX_NUM_SAO_CLASS];
>> +    double costBOClasses[MAX_NUM_SAO_CLASS];
>>
>>      m_entropyCoder.load(m_rdContexts.temp);
>>      m_entropyCoder.resetBits();
>> @@ -1621,15 +1467,9 @@
>>              int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
>>              int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
>>
>> -            if (count)
>> -            {
>> -                offsetOut = estIterOffset(typeIdx, classIdx,
>> m_lumaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> -            }
>> -            else
>> -            {
>> -                offsetOrg = 0;
>> -                offsetOut = 0;
>> -            }
>> +            if (offsetOut)
>> +                offsetOut = estIterOffset(typeIdx, m_lumaLambda,
>> offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]);
>> +
>>              estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC,
>> offsetOrg);
>>          }
>>
>> @@ -1665,44 +1505,37 @@
>>          int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
>>          int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
>>
>> -        currentDistortionTableBo[classIdx - 1] = 0;
>> -        currentRdCostTableBo[classIdx - 1] = m_lumaLambda;
>> +        distBOClasses[classIdx - 1] = 0;
>> +        costBOClasses[classIdx - 1] = m_lumaLambda;
>>
>> -        if (count)
>> -        {
>> -            offsetOut = estIterOffset(SAO_BO, classIdx, m_lumaLambda,
>> offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> -        }
>> -        else
>> -        {
>> -            offsetOrg = 0;
>> -            offsetOut = 0;
>> -        }
>> +        if (offsetOut)
>> +            offsetOut = estIterOffset(SAO_BO, m_lumaLambda, offsetOut,
>> count, offsetOrg, distBOClasses[classIdx - 1], costBOClasses[classIdx - 1]);
>>      }
>>
>>      // Estimate Best Position
>> -    double bestRDCostTableBo = MAX_DOUBLE;
>> -    int    bestClassTableBo  = 0;
>> +    double bestRDCostBO = MAX_DOUBLE;
>> +    int    bestClassBO  = 0;
>>
>>      for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
>>      {
>>          double currentRDCost = 0.0;
>>          for (int j = i; j < i + SAO_BO_LEN; j++)
>> -            currentRDCost += currentRdCostTableBo[j];
>> +            currentRDCost += costBOClasses[j];
>>
>> -        if (currentRDCost < bestRDCostTableBo)
>> +        if (currentRDCost < bestRDCostBO)
>>          {
>> -            bestRDCostTableBo = currentRDCost;
>> -            bestClassTableBo  = i;
>> +            bestRDCostBO = currentRDCost;
>> +            bestClassBO  = i;
>>          }
>>      }
>>
>>      estDist = 0;
>> -    for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo +
>> SAO_BO_LEN; classIdx++)
>> -        estDist += currentDistortionTableBo[classIdx];
>> +    for (int classIdx = bestClassBO; classIdx < bestClassBO +
>> SAO_BO_LEN; classIdx++)
>> +        estDist += distBOClasses[classIdx];
>>
>>      m_entropyCoder.load(m_rdContexts.temp);
>>      m_entropyCoder.resetBits();
>> -    m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] +
>> (bestClassTableBo + 1), bestClassTableBo, 0);
>> +    m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + (bestClassBO +
>> 1), bestClassBO, 0);
>>
>>      uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>      double cost = (double)estDist + m_lumaLambda * (double)estRate;
>> @@ -1714,9 +1547,9 @@
>>
>>          lclCtuParam->mergeMode = SAO_MERGE_NONE;
>>          lclCtuParam->typeIdx = SAO_BO;
>> -        lclCtuParam->bandPos = bestClassTableBo;
>> +        lclCtuParam->bandPos = bestClassBO;
>>          for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> -            lclCtuParam->offset[classIdx] =
>> (int)m_offset[0][SAO_BO][classIdx + bestClassTableBo + 1];
>> +            lclCtuParam->offset[classIdx] =
>> (int)m_offset[0][SAO_BO][classIdx + bestClassBO + 1];
>>      }
>>
>>      mergeDist[0] = ((double)bestDist / m_lumaLambda);
>> @@ -1732,9 +1565,9 @@
>>
>>      SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr],
>> &saoParam->ctuParam[2][addr] };
>>
>> -    double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
>> -    int    currentDistortionTableBo[MAX_NUM_SAO_CLASS];
>> -    int    bestClassTableBo[2] = { 0, 0 };
>> +    double costBOClasses[MAX_NUM_SAO_CLASS];
>> +    int    distBOClasses[MAX_NUM_SAO_CLASS];
>> +    int    bestClassBO[2] = { 0, 0 };
>>
>>      m_entropyCoder.load(m_rdContexts.temp);
>>      m_entropyCoder.resetBits();
>> @@ -1754,15 +1587,8 @@
>>                  int32_t& offsetOrg =
>> m_offsetOrg[compIdx][typeIdx][classIdx];
>>                  int32_t& offsetOut =
>> m_offset[compIdx][typeIdx][classIdx];
>>
>> -                if (count)
>> -                {
>> -                    offsetOut = estIterOffset(typeIdx, classIdx,
>> m_chromaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> -                }
>> -                else
>> -                {
>> -                    offsetOrg = 0;
>> -                    offsetOut = 0;
>> -                }
>> +                if (offsetOut)
>> +                    offsetOut = estIterOffset(typeIdx, m_chromaLambda,
>> offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]);
>>
>>                  estDist[compIdx - 1] += estSaoDist(count, (int)offsetOut
>> << SAO_BIT_INC, offsetOrg);
>>              }
>> @@ -1772,9 +1598,7 @@
>>          m_entropyCoder.resetBits();
>>
>>          for (int compIdx = 0; compIdx < 2; compIdx++)
>> -        {
>>              m_entropyCoder.codeSaoOffsetEO(m_offset[compIdx +
>> 1][typeIdx] + 1, typeIdx, compIdx + 1);
>> -        }
>>
>>          uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>          double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda
>> * (double)estRate;
>> @@ -1805,7 +1629,7 @@
>>      // Estimate Best Position
>>      for (int compIdx = 1; compIdx < 3; compIdx++)
>>      {
>> -        double bestRDCostTableBo = MAX_DOUBLE;
>> +        double bestRDCostBO = MAX_DOUBLE;
>>
>>          for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1;
>> classIdx++)
>>          {
>> @@ -1813,45 +1637,36 @@
>>              int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
>>              int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
>>
>> -            currentDistortionTableBo[classIdx - 1] = 0;
>> -            currentRdCostTableBo[classIdx - 1] = m_chromaLambda;
>> +            distBOClasses[classIdx - 1] = 0;
>> +            costBOClasses[classIdx - 1] = m_chromaLambda;
>>
>> -            if (count)
>> -            {
>> -                offsetOut = estIterOffset(SAO_BO, classIdx,
>> m_chromaLambda, offsetOut, count, offsetOrg, currentDistortionTableBo,
>> currentRdCostTableBo);
>> -            }
>> -            else
>> -            {
>> -                offsetOrg = 0;
>> -                offsetOut = 0;
>> -            }
>> +            if (offsetOut)
>> +                offsetOut = estIterOffset(SAO_BO, m_chromaLambda,
>> offsetOut, count, offsetOrg, distBOClasses[classIdx - 1],
>> costBOClasses[classIdx - 1]);
>>          }
>>
>>          for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
>>          {
>>              double currentRDCost = 0.0;
>>              for (int j = i; j < i + SAO_BO_LEN; j++)
>> -                currentRDCost += currentRdCostTableBo[j];
>> +                currentRDCost += costBOClasses[j];
>>
>> -            if (currentRDCost < bestRDCostTableBo)
>> +            if (currentRDCost < bestRDCostBO)
>>              {
>> -                bestRDCostTableBo = currentRDCost;
>> -                bestClassTableBo[compIdx - 1]  = i;
>> +                bestRDCostBO = currentRDCost;
>> +                bestClassBO[compIdx - 1]  = i;
>>              }
>>          }
>>
>>          estDist[compIdx - 1] = 0;
>> -        for (int classIdx = bestClassTableBo[compIdx - 1]; classIdx <
>> bestClassTableBo[compIdx - 1] + SAO_BO_LEN; classIdx++)
>> -            estDist[compIdx - 1] += currentDistortionTableBo[classIdx];
>> +        for (int classIdx = bestClassBO[compIdx - 1]; classIdx <
>> bestClassBO[compIdx - 1] + SAO_BO_LEN; classIdx++)
>> +            estDist[compIdx - 1] += distBOClasses[classIdx];
>>      }
>>
>>      m_entropyCoder.load(m_rdContexts.temp);
>>      m_entropyCoder.resetBits();
>>
>>      for (int compIdx = 0; compIdx < 2; compIdx++)
>> -    {
>> -        m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] +
>> (bestClassTableBo[compIdx] + 1), bestClassTableBo[compIdx], compIdx + 1);
>> -    }
>> +        m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] +
>> (bestClassBO[compIdx] + 1), bestClassBO[compIdx], compIdx + 1);
>>
>>      uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>>      double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda *
>> (double)estRate;
>> @@ -1865,9 +1680,9 @@
>>          {
>>              lclCtuParam[compIdx]->mergeMode = SAO_MERGE_NONE;
>>              lclCtuParam[compIdx]->typeIdx = SAO_BO;
>> -            lclCtuParam[compIdx]->bandPos = bestClassTableBo[compIdx];
>> +            lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx];
>>              for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>> -                lclCtuParam[compIdx]->offset[classIdx] =
>> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassTableBo[compIdx] +
>> 1];
>> +                lclCtuParam[compIdx]->offset[classIdx] =
>> (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx] + 1];
>>          }
>>      }
>>
>> diff -r add820b8cde0 -r 0f89b83c6fe6 source/encoder/sao.h
>> --- a/source/encoder/sao.h      Mon Jan 18 21:20:57 2016 +0530
>> +++ b/source/encoder/sao.h      Mon Jan 18 21:21:25 2016 +0530
>> @@ -136,7 +136,7 @@
>>      void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX);
>>      void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int
>> idxX);
>>
>> -    void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam*
>> saoUnitSrc);
>> +//    void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam*
>> saoUnitSrc);
>>
>>      void calcSaoStatsCu(int addr, int plane);
>>      void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
>> @@ -144,10 +144,10 @@
>>      void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double*
>> mergeDist);
>>      void saoChromaComponentParamDist(SAOParam* saoParam, int addr,
>> double* mergeDist);
>>
>> -    inline int estIterOffset(int typeIdx, int classIdx, double lambda,
>> int offset, int32_t count, int32_t offsetOrg,
>> -                             int32_t* currentDistortionTableBo, double*
>> currentRdCostTableBo);
>> +    inline int estIterOffset(int typeIdx, double lambda, int offset,
>> int32_t count, int32_t offsetOrg,
>> +                             int& currentDistortionTableBo, double&
>> currentRdCostTableBo);
>>      void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
>> -    void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
>> +//    void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
>>      void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int
>> addr);
>>
>>      void saoStatsInitialOffset(int plane);
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160122/d6e7f708/attachment-0001.html>


More information about the x265-devel mailing list