[x265] [PATCH RFC] psyrd: bug fix in use of psyrdcost for PU/TU decision in inter

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Wed Jun 11 13:13:53 CEST 2014


# HG changeset patch
# User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
# Date 1402483864 -19800
# Node ID a407bf9959c957d8a3c355594a060abab8a182fc
# Parent  dbe573edb3459f57ab058318e4227f6b19045c2d
psyrd: bug fix in use of psyrdcost for PU/TU decision in inter

diff -r dbe573edb345 -r a407bf9959c9 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Jun 10 17:47:15 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Wed Jun 11 16:21:04 2014 +0530
@@ -2777,7 +2777,18 @@
         m_entropyCoder->resetBits();
         m_entropyCoder->encodeQtRootCbfZero(cu);
         uint32_t zeroResiBits = m_entropyCoder->getNumberOfWrittenBits();
-        uint64_t zeroCost = m_rdCost->calcRdCost(zeroDistortion, zeroResiBits);
+        uint64_t zeroCost = 0;
+        uint32_t zeroPsyEnergyY = 0;
+        if (m_rdCost->psyRdEnabled())
+        {
+            int size = g_convertToBit[cuSize];
+            zeroPsyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+                (pixel*)RDCost::zeroPel, MAX_CU_SIZE); // need to check whether zero distortion is similar to psyenergy of fenc
+            zeroCost = m_rdCost->calcPsyRdCost(zeroDistortion, zeroResiBits, zeroPsyEnergyY);
+        }
+        else
+            zeroCost = m_rdCost->calcRdCost(zeroDistortion, zeroResiBits);
+
         if (cu->isLosslessCoded(0))
         {
             zeroCost = cost + 1;
@@ -2785,6 +2796,7 @@
         if (zeroCost < cost)
         {
             distortion = zeroDistortion;
+            cu->m_psyEnergy = zeroPsyEnergyY;
 
             const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >> (depth << 1);
             ::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(uint8_t));
@@ -2807,7 +2819,10 @@
 
         bits = xSymbolBitsInter(cu);
 
-        cost = m_rdCost->calcRdCost(distortion, bits);
+        if (m_rdCost->psyRdEnabled())
+            cost = m_rdCost->calcPsyRdCost(distortion, bits, cu->m_psyEnergy);
+        else
+            cost = m_rdCost->calcRdCost(distortion, bits);
 
         if (cost < bestCost)
         {
@@ -3258,6 +3273,7 @@
             m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY,  coeffCurY, trSize, scalingListType, false, lastPos[TEXT_LUMA][0]); //this is for inter mode only
 
             const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, curResiY, strideResiY);
+            uint32_t nonZeroPsyEnergyY = 0;
             if (m_rdCost->psyRdEnabled())
             {
                 pixel*   pred = predYuv->getLumaAddr(absPartIdx);
@@ -3267,22 +3283,20 @@
                 uint32_t stride = fencYuv->getStride();
                 //===== reconstruction =====
                 primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, pred, curResiY, stride, strideResiY);
+                int size = g_convertToBit[trSize];
+                nonZeroPsyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
+                    cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getStride());
             }
             if (cu->isLosslessCoded(0))
             {
                 distY = nonZeroDistY;
+                psyEnergyY = nonZeroPsyEnergyY;
             }
             else
             {
                 uint64_t singleCostY = 0;
                 if (m_rdCost->psyRdEnabled())
-                {
-                    int size = g_convertToBit[trSize];
-                    uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
-                    psyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
-                        cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getStride());
-                    singleCostY = m_rdCost->calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], psyEnergyY);
-                }
+                    singleCostY = m_rdCost->calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], nonZeroPsyEnergyY);
                 else
                     singleCostY = m_rdCost->calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
                 m_entropyCoder->resetBits();
@@ -3307,6 +3321,7 @@
                 else
                 {
                     distY = nonZeroDistY;
+                    psyEnergyY = nonZeroPsyEnergyY;
                     if (checkTransformSkipY)
                     {
                         minCost[TEXT_LUMA][0] = singleCostY;
@@ -3370,6 +3385,7 @@
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  curResiU, strideResiC);
                     const uint32_t nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
+                    uint32_t  nonZeroPsyEnergyU = 0;
                     if (m_rdCost->psyRdEnabled())
                     {
                         pixel*   pred = predYuv->getCbAddr(absPartIdxC);
@@ -3379,21 +3395,21 @@
                         uint32_t stride = fencYuv->getCStride();
                         //===== reconstruction =====
                         primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiU, stride, strideResiC);
+                        int size = g_convertToBit[trSizeC];
+                        nonZeroPsyEnergyU = m_rdCost->psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
+                            cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
                     }
                     if (cu->isLosslessCoded(0))
                     {
                         distU = nonZeroDistU;
+                        psyEnergyU = nonZeroPsyEnergyU;
                     }
                     else
                     {
                         uint64_t singleCostU = 0;
                         if (m_rdCost->psyRdEnabled())
                         {
-                            int size = g_convertToBit[trSizeC];
-                            uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
-                            psyEnergyU = m_rdCost->psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
-                                cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
-                            singleCostU = m_rdCost->calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][0], psyEnergyU);
+                            singleCostU = m_rdCost->calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][0], nonZeroPsyEnergyU);
                         }
                         else
                             singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][0]);
@@ -3419,6 +3435,7 @@
                         else
                         {
                             distU = nonZeroDistU;
+                            psyEnergyU = nonZeroPsyEnergyU;
                             if (checkTransformSkipUV)
                             {
                                 minCost[TEXT_CHROMA_U][tuIterator.m_section] = singleCostU;
@@ -3462,6 +3479,7 @@
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  curResiV, strideResiC);
                     const uint32_t nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
+                    uint32_t nonZeroPsyEnergyV = 0;
 
                     if (m_rdCost->psyRdEnabled())
                     {
@@ -3472,21 +3490,21 @@
                         uint32_t stride = fencYuv->getCStride();
                         //===== reconstruction =====
                         primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiV, stride, strideResiC);
+                        int size = g_convertToBit[trSizeC];
+                        nonZeroPsyEnergyV = m_rdCost->psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
+                            cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
                     }
                     if (cu->isLosslessCoded(0))
                     {
                         distV = nonZeroDistV;
+                        psyEnergyV = nonZeroPsyEnergyV;
                     }
                     else
                     {
                         uint64_t singleCostV = 0;
                         if (m_rdCost->psyRdEnabled())
                         {
-                            int size = g_convertToBit[trSizeC];
-                            uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
-                            psyEnergyV = m_rdCost->psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
-                                cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
-                            singleCostV = m_rdCost->calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section], psyEnergyV);
+                            singleCostV = m_rdCost->calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section], nonZeroPsyEnergyV);
                         }
                         else
                             singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
@@ -3512,6 +3530,7 @@
                         else
                         {
                             distV = nonZeroDistV;
+                            psyEnergyV = nonZeroPsyEnergyV;
                             if (checkTransformSkipUV)
                             {
                                 minCost[TEXT_CHROMA_V][tuIterator.m_section] = singleCostV;
@@ -3548,6 +3567,7 @@
         if (checkTransformSkipY)
         {
             uint32_t nonZeroDistY = 0, absSumTransformSkipY;
+            uint32_t nonZeroPsyEnergyY = 0;
             uint64_t singleCostY = MAX_INT64;
 
             coeff_t bestCoeffY[MAX_TS_SIZE * MAX_TS_SIZE];
@@ -3599,9 +3619,9 @@
                     //===== reconstruction =====
                     primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, pred, curResiY, stride, strideResiY);
                     int size = g_convertToBit[trSize];
-                    psyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
+                    nonZeroPsyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
                         cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getStride());
-                    singleCostY = m_rdCost->calcPsyRdCost(nonZeroDistY, skipSingleBitsY, psyEnergyY);
+                    singleCostY = m_rdCost->calcPsyRdCost(nonZeroDistY, skipSingleBitsY, nonZeroPsyEnergyY);
                 }
                 else
                     singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);
@@ -3616,7 +3636,7 @@
             else
             {
                 singleDistComp[TEXT_LUMA][0] = nonZeroDistY;
-                singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
+                singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
                 absSum[TEXT_LUMA][0] = absSumTransformSkipY;
                 bestTransformMode[TEXT_LUMA][0] = 1;
             }
@@ -3627,6 +3647,7 @@
         if (bCodeChroma && checkTransformSkipUV)
         {
             uint32_t nonZeroDistU = 0, nonZeroDistV = 0, absSumTransformSkipU, absSumTransformSkipV;
+            uint32_t nonZeroPsyEnergyU = 0, nonZeroPsyEnergyV = 0;
             uint64_t singleCostU = MAX_INT64;
             uint64_t singleCostV = MAX_INT64;
 
@@ -3704,9 +3725,9 @@
                         //===== reconstruction =====
                         primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiU, stride, strideResiC);
                         int size = g_convertToBit[trSizeC];
-                        psyEnergyU = m_rdCost->psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
+                        nonZeroPsyEnergyU = m_rdCost->psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
                             cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
-                        singleCostU = m_rdCost->calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section], psyEnergyU);
+                        singleCostU = m_rdCost->calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section], nonZeroPsyEnergyU);
                     }
                     else
                         singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
@@ -3722,7 +3743,7 @@
                 else
                 {
                     singleDistComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroDistU;
-                    singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = psyEnergyU;
+                    singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroPsyEnergyU;
                     absSum[TEXT_CHROMA_U][tuIterator.m_section] = absSumTransformSkipU;
                     bestTransformMode[TEXT_CHROMA_U][tuIterator.m_section] = 1;
                 }
@@ -3743,7 +3764,6 @@
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  curResiV, strideResiC);
                     nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
-                    singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
                     if (m_rdCost->psyRdEnabled())
                     {
                         pixel*   pred = predYuv->getCrAddr(absPartIdxC);
@@ -3754,9 +3774,9 @@
                         //===== reconstruction =====
                         primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiV, stride, strideResiC);
                         int size = g_convertToBit[trSizeC];
-                        psyEnergyV = m_rdCost->psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
+                        nonZeroPsyEnergyV = m_rdCost->psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
                             cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
-                        singleCostV = m_rdCost->calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section], psyEnergyV);
+                        singleCostV = m_rdCost->calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section], nonZeroPsyEnergyV);
                     }
                     else
                         singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
@@ -3771,7 +3791,7 @@
                 else
                 {
                     singleDistComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroDistV;
-                    singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = psyEnergyV;
+                    singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroPsyEnergyV;
                     absSum[TEXT_CHROMA_V][tuIterator.m_section] = absSumTransformSkipV;
                     bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
                 }


More information about the x265-devel mailing list