[x265] fix bug in 73c6c9086577 for rdLevel=0

Thu Oct 2 02:29:45 CEST 2014

# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1412209540 -32400
#      Thu Oct 02 09:25:40 2014 +0900
# Node ID 2efc3c19dd26944506c2c5e801abc96b1c048b40
# Parent  d0fa09e9cca540c6eab84308dea481f8368b1cb1
fix bug in 73c6c9086577 for rdLevel=0

diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/Lib/TLibCommon/TComDataCU.cpp

--- a/source/Lib/TLibCommon/TComDataCU.cpp	Wed Oct 01 09:39:36 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Thu Oct 02 09:25:40 2014 +0900
@@ -454,19 +454,15 @@
     m_cuAboveRight  = cu->getCUAboveRight();
 }
 
-void TComDataCU::copyToSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth)
+void TComDataCU::copyFromPic(TComDataCU* ctu, CU* cuData)
 {
-    X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");
+    m_pic              = ctu->m_pic;
+    m_slice            = ctu->m_slice;
+    m_cuAddr           = ctu->getAddr();
+    m_absIdxInCTU      = cuData->encodeIdx;
 
-    uint32_t partOffset = cuData->numPartitions * partUnitIdx;
-
-    m_pic              = cu->m_pic;
-    m_slice            = cu->m_slice;
-    m_cuAddr           = cu->getAddr();
-    m_absIdxInCTU      = cuData->encodeIdx + partOffset;
-
-    m_cuPelX           = cu->getCUPelX() + ((partUnitIdx &  1) << (g_maxLog2CUSize - depth));
-    m_cuPelY           = cu->getCUPelY() + ((partUnitIdx >> 1) << (g_maxLog2CUSize - depth));
+    m_cuPelX           = ctu->getCUPelX() + g_zscanToPelX[m_absIdxInCTU];
+    m_cuPelY           = ctu->getCUPelY() + g_zscanToPelY[m_absIdxInCTU];
 
     m_psyEnergy        = 0;
     m_totalPsyCost     = MAX_INT64;
@@ -478,18 +474,17 @@
     m_coeffBits        = 0;
     m_numPartitions    = cuData->numPartitions;
 
-    TComDataCU* otherCU = m_pic->getCU(m_cuAddr);
     int sizeInChar  = sizeof(char) * m_numPartitions;
 
-    memcpy(m_skipFlag, otherCU->getSkipFlag() + m_absIdxInCTU, sizeof(*m_skipFlag) * m_numPartitions);
-    memcpy(m_qp, otherCU->getQP() + m_absIdxInCTU, sizeInChar);
+    memcpy(m_skipFlag, ctu->getSkipFlag() + m_absIdxInCTU, sizeof(*m_skipFlag) * m_numPartitions);
+    memcpy(m_qp, ctu->getQP() + m_absIdxInCTU, sizeInChar);
 
-    memcpy(m_partSizes, otherCU->getPartitionSize() + m_absIdxInCTU, sizeof(*m_partSizes) * m_numPartitions);
-    memcpy(m_predModes, otherCU->getPredictionMode() + m_absIdxInCTU, sizeof(*m_predModes) * m_numPartitions);
+    memcpy(m_partSizes, ctu->getPartitionSize() + m_absIdxInCTU, sizeof(*m_partSizes) * m_numPartitions);
+    memcpy(m_predModes, ctu->getPredictionMode() + m_absIdxInCTU, sizeof(*m_predModes) * m_numPartitions);
 
-    memcpy(m_lumaIntraDir, otherCU->getLumaIntraDir() + m_absIdxInCTU, sizeInChar);
-    memcpy(m_depth, otherCU->getDepth() + m_absIdxInCTU, sizeInChar);
-    memcpy(m_log2CUSize, otherCU->getLog2CUSize() + m_absIdxInCTU, sizeInChar);
+    memcpy(m_lumaIntraDir, ctu->getLumaIntraDir() + m_absIdxInCTU, sizeInChar);
+    memcpy(m_depth, ctu->getDepth() + m_absIdxInCTU, sizeInChar);
+    memcpy(m_log2CUSize, ctu->getLog2CUSize() + m_absIdxInCTU, sizeInChar);
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -2411,6 +2406,8 @@
 void TComDataCU::loadCTUData(uint32_t maxCUSize)
 {
     // Initialize the coding blocks inside the CTB
+    int picWidth  = m_pic->m_origPicYuv->m_picWidth;
+    int picHeight = m_pic->m_origPicYuv->m_picHeight;
     for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--)
     {
         uint32_t blockSize  = 1 << log2CUSize;
@@ -2425,8 +2422,8 @@
                 uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + (depth_idx << 2);
                 uint32_t px = m_cuPelX + sb_x * blockSize;
                 uint32_t py = m_cuPelY + sb_y * blockSize;
-                int32_t present_flag = px < m_pic->m_origPicYuv->m_picWidth && py < m_pic->m_origPicYuv->m_picHeight;
-                int32_t split_mandatory_flag = present_flag && !last_level_flag && (px + blockSize > m_pic->m_origPicYuv->m_picWidth || py + blockSize > m_pic->m_origPicYuv->m_picHeight);
+                int32_t present_flag = px < picWidth && py < picHeight;
+                int32_t split_mandatory_flag = present_flag && !last_level_flag && (px + blockSize > picWidth || py + blockSize > picHeight);
                 
                 /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
                 uint32_t xOffset = (sb_x * blockSize) >> 3;
diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Wed Oct 01 09:39:36 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h	Thu Oct 02 09:25:40 2014 +0900
@@ -276,7 +276,7 @@
     void          initSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth, int qp);
     void          loadCTUData(uint32_t maxCUSize);
 
-    void          copyToSubCU(TComDataCU* ctu, CU* cuData, uint32_t partUnitIdx, uint32_t depth);
+    void          copyFromPic(TComDataCU* ctu, CU* cuData);
     void          copyPartFrom(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth, bool isRDObasedAnalysis = true);
 
     void          copyToPic(uint32_t depth);
diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed Oct 01 09:39:36 2014 +0530
+++ b/source/encoder/analysis.cpp	Thu Oct 02 09:25:40 2014 +0900
@@ -1178,10 +1178,10 @@
             TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
             TComDataCU* aboveRight = outTempCU->getCUAboveRight();
             TComDataCU* left = outTempCU->getCULeft();
-            TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
+            TComDataCU* ctu = pic->getPicSym()->getCU(cuAddr);
 
-            totalCostCU += rootCU->m_avgCost[depth] * rootCU->m_count[depth];
-            totalCountCU += rootCU->m_count[depth];
+            totalCostCU += ctu->m_avgCost[depth] * ctu->m_count[depth];
+            totalCountCU += ctu->m_count[depth];
             if (above)
             {
                 totalCostNeigh += above->m_avgCost[depth] * above->m_count[depth];
@@ -1250,10 +1250,10 @@
                         tempavgCost = m_rdCost.m_psyRd ? subBestPartCU->m_totalPsyCost : subBestPartCU->m_totalRDCost;
                     else
                         tempavgCost = subBestPartCU->m_totalRDCost;
-                    TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
-                    uint64_t temp = rootCU->m_avgCost[nextDepth] * rootCU->m_count[nextDepth];
-                    rootCU->m_count[nextDepth] += 1;
-                    rootCU->m_avgCost[nextDepth] = (temp + tempavgCost) / rootCU->m_count[nextDepth];
+                    TComDataCU* ctu = pic->getPicSym()->getCU(cuAddr);
+                    uint64_t temp = ctu->m_avgCost[nextDepth] * ctu->m_count[nextDepth];
+                    ctu->m_count[nextDepth] += 1;
+                    ctu->m_avgCost[nextDepth] = (temp + tempavgCost) / ctu->m_count[nextDepth];
                 }
 
                 /* Adding costs from best SUbCUs */
@@ -1319,10 +1319,10 @@
             if (!depth)
             {
                 uint64_t tempavgCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
-                TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
-                uint64_t temp = rootCU->m_avgCost[depth] * rootCU->m_count[depth];
-                rootCU->m_count[depth] += 1;
-                rootCU->m_avgCost[depth] = (temp + tempavgCost) / rootCU->m_count[depth];
+                TComDataCU* ctu = pic->getPicSym()->getCU(cuAddr);
+                uint64_t temp = ctu->m_avgCost[depth] * ctu->m_count[depth];
+                ctu->m_count[depth] += 1;
+                ctu->m_avgCost[depth] = (temp + tempavgCost) / ctu->m_count[depth];
             }
 
             uint64_t tempCost = m_rdCost.m_psyRd ? outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
@@ -1346,7 +1346,7 @@
     outBestCU->copyToPic(depth);
 
     if (!m_param->rdLevel && !depth)
-        encodeResidue(outBestCU, outBestCU, cu, 0, 0);
+        encodeResidue(pic->getPicSym()->getCU(cuAddr), cu, 0, 0);
     else if (m_param->rdLevel)
     {
         /* Copy Yuv data to picture Yuv */
@@ -2227,32 +2227,36 @@
         cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
 }
 
-void Analysis::encodeResidue(TComDataCU* ctu, TComDataCU* cu, CU* cuData, uint32_t absPartIdx, uint32_t depth)
+void Analysis::encodeResidue(TComDataCU* ctu, CU* cuData, uint32_t absPartIdx, uint32_t depth)
 {
-    Frame* pic = cu->m_pic;
+    Frame* pic = ctu->m_pic;
+    uint32_t cuAddr = ctu->getAddr();
 
     if (depth < ctu->getDepth(absPartIdx) && depth < g_maxCUDepth)
     {
-        Slice* slice = cu->m_slice;
+        Slice* slice = ctu->m_slice;
         uint32_t nextDepth = depth + 1;
-        TComDataCU* subTempPartCU = m_tempCU[nextDepth];
         uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
         uint32_t xmax = slice->m_sps->picWidthInLumaSamples  - ctu->getCUPelX();
         uint32_t ymax = slice->m_sps->picHeightInLumaSamples - ctu->getCUPelY();
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
         {
-            CU *child_cu = cu->m_cuLocalData + cuData->childIdx + partUnitIdx;
-            if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
-            {
-                subTempPartCU->copyToSubCU(cu, child_cu, partUnitIdx, nextDepth);
-                encodeResidue(ctu, subTempPartCU, child_cu, absPartIdx, nextDepth);
-            }
+            CU *child_cu = ctu->m_cuLocalData + cuData->childIdx + partUnitIdx;
+            if (child_cu->flags & CU::PRESENT)
+                encodeResidue(ctu, child_cu, absPartIdx, nextDepth);
         }
 
         return;
     }
 
-    uint32_t cuAddr = cu->getAddr();
+    TComDataCU* cu;
+    if (depth)
+    {
+        cu = m_tempCU[depth];
+        cu->copyFromPic(ctu, cuData);
+    }
+    else
+        cu = ctu;
 
     m_quant.setQPforQuant(cu);
 
diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Wed Oct 01 09:39:36 2014 +0530
+++ b/source/encoder/analysis.h	Thu Oct 02 09:25:40 2014 +0900
@@ -153,7 +153,7 @@
 
     void checkBestMode(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth);
     void encodeIntraInInter(TComDataCU* cu, CU* cuData, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv);
-    void encodeResidue(TComDataCU* ctu, TComDataCU* cu, CU* cuData, uint32_t absPartIdx, uint32_t depth);
+    void encodeResidue(TComDataCU* ctu, CU* cuData, uint32_t absPartIdx, uint32_t depth);
     void checkDQP(TComDataCU* cu);
     void deriveTestModeAMP(TComDataCU* bestCU, PartSize parentSize, bool &bTestAMP_Hor, bool &bTestAMP_Ver,
                            bool &bTestMergeAMP_Hor, bool &bTestMergeAMP_Ver);
diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Wed Oct 01 09:39:36 2014 +0530
+++ b/source/encoder/entropy.cpp	Thu Oct 02 09:25:40 2014 +0900
@@ -481,16 +481,16 @@
     }
 }
 
-void Entropy::encodeCTU(TComDataCU* cu)
+void Entropy::encodeCTU(TComDataCU* ctu)
 {
-    bool bEncodeDQP = cu->m_slice->m_pps->bUseDQP;
-    encodeCU(cu, 0, 0, bEncodeDQP, cu->m_cuLocalData);
+    bool bEncodeDQP = ctu->m_slice->m_pps->bUseDQP;
+    encodeCU(ctu, 0, 0, bEncodeDQP, ctu->m_cuLocalData);
 }
 
 /* encode a CU block recursively */
-void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP, CU* cuData)
+void Entropy::encodeCU(TComDataCU* ctu, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP, CU* cuData)
 {
-    Slice* slice = cu->m_slice;
+    Slice* slice = ctu->m_slice;
 
     if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
         bEncodeDQP = true;
@@ -503,59 +503,58 @@
         uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
         {
-            CU *childCU = cu->m_cuLocalData + cuData->childIdx + partUnitIdx;
-            int cuPresentFlagChild = !(childCU->flags & CU::PRESENT);
-            if (!cuPresentFlagChild)
-                encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
+            CU *childCU = ctu->m_cuLocalData + cuData->childIdx + partUnitIdx;
+            if (childCU->flags & CU::PRESENT)
+                encodeCU(ctu, absPartIdx, depth + 1, bEncodeDQP, childCU);
         }
         return;
     }
 
     // We need to split, so don't try these modes.
     if (cuSplitFlag) 
-        codeSplitFlag(cu, absPartIdx, depth);
+        codeSplitFlag(ctu, absPartIdx, depth);
 
-    if (depth < cu->getDepth(absPartIdx) && depth < g_maxCUDepth)
+    if (depth < ctu->getDepth(absPartIdx) && depth < g_maxCUDepth)
     {
         uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
 
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
         {
-            CU *childCU = cu->m_cuLocalData + cuData->childIdx + partUnitIdx;
-            encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU);
+            CU *childCU = ctu->m_cuLocalData + cuData->childIdx + partUnitIdx;
+            encodeCU(ctu, absPartIdx, depth + 1, bEncodeDQP, childCU);
         }
         return;
     }
 
     if (slice->m_pps->bTransquantBypassEnabled)
-        codeCUTransquantBypassFlag(cu->getCUTransquantBypass(absPartIdx));
+        codeCUTransquantBypassFlag(ctu->getCUTransquantBypass(absPartIdx));
 
     if (!slice->isIntra())
-        codeSkipFlag(cu, absPartIdx);
+        codeSkipFlag(ctu, absPartIdx);
 
-    if (cu->isSkipped(absPartIdx))
+    if (ctu->isSkipped(absPartIdx))
     {
-        codeMergeIndex(cu, absPartIdx);
-        finishCU(cu, absPartIdx, depth);
+        codeMergeIndex(ctu, absPartIdx);
+        finishCU(ctu, absPartIdx, depth);
         return;
     }
 
     if (!slice->isIntra())
-        codePredMode(cu->getPredictionMode(absPartIdx));
+        codePredMode(ctu->getPredictionMode(absPartIdx));
 
-    codePartSize(cu, absPartIdx, depth);
+    codePartSize(ctu, absPartIdx, depth);
 
     // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
-    codePredInfo(cu, absPartIdx);
+    codePredInfo(ctu, absPartIdx);
 
     uint32_t tuDepthRange[2];
-    cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, absPartIdx);
+    ctu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, absPartIdx);
 
     // Encode Coefficients, allow codeCoeff() to modify m_bEncodeDQP
-    codeCoeff(cu, absPartIdx, depth, bEncodeDQP, tuDepthRange);
+    codeCoeff(ctu, absPartIdx, depth, bEncodeDQP, tuDepthRange);
 
     // --- write terminating bit ---
-    finishCU(cu, absPartIdx, depth);
+    finishCU(ctu, absPartIdx, depth);
 }
 
 /* finish encoding a cu and handle end-of-slice conditions */
diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/encoder/entropy.h
--- a/source/encoder/entropy.h	Wed Oct 01 09:39:36 2014 +0530
+++ b/source/encoder/entropy.h	Thu Oct 02 09:25:40 2014 +0900
@@ -193,7 +193,7 @@
     void encodeBinsEP(uint32_t binValues, int numBins);
     void encodeBinTrm(uint32_t binValue);
 
-    void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP, CU *cuData);
+    void encodeCU(TComDataCU* ctu, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP, CU *cuData);
     void finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);
 
     void writeOut();