[x265] refine picture boundary check

Satoshi Nakagawa nakagawa424 at oki.com
Sat May 3 16:36:46 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1399127183 -32400
#      Sat May 03 23:26:23 2014 +0900
# Node ID e4a50243560679581e0bfedf3918ac015802be28
# Parent  d72770a77ff8355f17325f2afbfed23d42a1db3c
refine picture boundary check

- cleanup slice end check
- fix split bits for TOPSKIP

diff -r d72770a77ff8 -r e4a502435606 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp	Thu May 01 17:41:42 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.cpp	Sat May 03 23:26:23 2014 +0900
@@ -100,22 +100,16 @@
     ::memset(m_bufV, 0, (m_cwidth * m_cheight) * sizeof(pixel));
 }
 
-void TComYuv::copyToPicYuv(TComPicYuv* destPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx, uint32_t depth, uint32_t partIdx)
+void TComYuv::copyToPicYuv(TComPicYuv* destPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx)
 {
-    int width = m_width >> depth;
-    int part = partitionFromSizes(width, m_height >> depth);
-    pixel* srcY = getLumaAddr(partIdx, width);
     pixel* dstY = destPicYuv->getLumaAddr(cuAddr, absZOrderIdx);
 
-    primitives.luma_copy_pp[part](dstY, destPicYuv->getStride(), srcY, getStride());
+    primitives.luma_copy_pp[m_part](dstY, destPicYuv->getStride(), m_bufY, getStride());
 
-    width = m_cwidth >> depth;
-    pixel* srcU = getCbAddr(partIdx, width);
-    pixel* srcV = getCrAddr(partIdx, width);
     pixel* dstU = destPicYuv->getCbAddr(cuAddr, absZOrderIdx);
     pixel* dstV = destPicYuv->getCrAddr(cuAddr, absZOrderIdx);
-    primitives.chroma[m_csp].copy_pp[part](dstU, destPicYuv->getCStride(), srcU, getCStride());
-    primitives.chroma[m_csp].copy_pp[part](dstV, destPicYuv->getCStride(), srcV, getCStride());
+    primitives.chroma[m_csp].copy_pp[m_part](dstU, destPicYuv->getCStride(), m_bufU, getCStride());
+    primitives.chroma[m_csp].copy_pp[m_part](dstV, destPicYuv->getCStride(), m_bufV, getCStride());
 }
 
 void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx)
diff -r d72770a77ff8 -r e4a502435606 source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h	Thu May 01 17:41:42 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.h	Sat May 03 23:26:23 2014 +0900
@@ -136,7 +136,7 @@
     // ------------------------------------------------------------------------------------------------------------------
 
     //  Copy YUV buffer to picture buffer
-    void    copyToPicYuv(TComPicYuv* destPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx, uint32_t depth, uint32_t partIdx);
+    void    copyToPicYuv(TComPicYuv* destPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx);
 
     //  Copy YUV buffer from picture buffer
     void    copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx);
diff -r d72770a77ff8 -r e4a502435606 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Thu May 01 17:41:42 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Sat May 03 23:26:23 2014 +0900
@@ -381,7 +381,7 @@
 
     if (m_bestCU[0]->getSlice()->getSliceType() == I_SLICE)
     {
-        xCompressIntraCU(m_bestCU[0], m_tempCU[0], 0);
+        xCompressIntraCU(m_bestCU[0], m_tempCU[0], 0, false);
 #if LOG_CU_STATISTICS
         int i = 0, part;
         do
@@ -414,10 +414,10 @@
 
             /* At the start of analysis, the best CU is a null pointer
             On return, it points to the CU encode with best chosen mode*/
-            xCompressInterCU(outBestCU, m_tempCU[0], cu, 0, 0, 4);
+            xCompressInterCU(outBestCU, m_tempCU[0], cu, 0, false, 0, 4);
         }
         else
-            xCompressCU(m_bestCU[0], m_tempCU[0], 0);
+            xCompressCU(m_bestCU[0], m_tempCU[0], 0, false);
 #if LOG_CU_STATISTICS
         int i = 0, part;
         do
@@ -473,7 +473,7 @@
     }
 
     // Encode CU data
-    xEncodeCU(cu, 0, 0);
+    xEncodeCU(cu, 0, 0, false);
 }
 
 // ====================================================================================================================
@@ -552,7 +552,7 @@
  *- for loop of QP value to compress the current CU with all possible QP
 */
 
-void TEncCu::xCompressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth)
+void TEncCu::xCompressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture)
 {
     //PPAScopeEvent(TEncCu_xCompressIntraCU + depth);
 
@@ -569,29 +569,19 @@
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
     }
 
-    // variable for Early CU determination
-    bool bSubBranch = true;
+    TComSlice* slice = outTempCU->getSlice();
+    if (!bInsidePicture)
+    {
+        uint32_t lpelx = outBestCU->getCUPelX();
+        uint32_t tpely = outBestCU->getCUPelY();
+        uint32_t rpelx = lpelx + outBestCU->getCUSize(0);
+        uint32_t bpely = tpely + outBestCU->getCUSize(0);
+        bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
+                          bpely <= slice->getSPS()->getPicHeightInLumaSamples());
+    }
 
-    // variable for Cbf fast mode PU decision
-    bool bBoundary = false;
-
-    uint32_t lpelx = outBestCU->getCUPelX();
-    uint32_t rpelx = lpelx + outBestCU->getCUSize(0) - 1;
-    uint32_t tpelx = outBestCU->getCUPelY();
-    uint32_t bpely = tpelx + outBestCU->getCUSize(0) - 1;
-
-    // If slice start or slice end is within this cu...
-    TComSlice * slice = outTempCU->getPic()->getSlice();
-    bool bSliceEnd = (slice->getSliceCurEndCUAddr() > outTempCU->getSCUAddr() && slice->getSliceCurEndCUAddr() < outTempCU->getSCUAddr() + outTempCU->getTotalNumPart());
-    bool bInsidePicture = (rpelx < outBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples()) && (bpely < outBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples());
-
-    //Data for splitting
-    uint8_t nextDepth = depth + 1;
-    uint32_t partUnitIdx = 0;
-    TComDataCU* subBestPartCU[4], *subTempPartCU[4];
-
-    //We need to split; so dont try these modes
-    if (!bSliceEnd && bInsidePicture)
+    // We need to split, so don't try these modes.
+    if (bInsidePicture)
     {
         outTempCU->initEstData(depth);
 
@@ -600,7 +590,7 @@
 
         if (depth == g_maxCUDepth - g_addCUDepth)
         {
-            if (outTempCU->getCUSize(0) > (1 << outTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize()))
+            if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
             {
                 xCheckRDCostIntra(outBestCU, outTempCU, SIZE_NxN);
             }
@@ -610,34 +600,26 @@
         m_entropyCoder->encodeSplitFlag(outBestCU, 0, depth);
         outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
         outBestCU->m_totalCost  = m_rdCost->calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
-
-        // Early CU determination
-        if (outBestCU->isSkipped(0))
-            bSubBranch = false;
-        else
-            bSubBranch = true;
-    }
-    else if (!(bSliceEnd && bInsidePicture))
-    {
-        bBoundary = true;
     }
 
     outTempCU->initEstData(depth);
 
     // further split
-    if (bSubBranch && depth < g_maxCUDepth - g_addCUDepth)
+    if (depth < g_maxCUDepth - g_addCUDepth)
     {
+        uint8_t     nextDepth     = depth + 1;
+        TComDataCU* subBestPartCU = m_bestCU[nextDepth];
+        TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+        uint32_t partUnitIdx = 0;
         for (; partUnitIdx < 4; partUnitIdx++)
         {
-            subBestPartCU[partUnitIdx] = m_bestCU[nextDepth];
-            subTempPartCU[partUnitIdx] = m_tempCU[nextDepth];
+            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
 
-            subBestPartCU[partUnitIdx]->initSubCU(outTempCU, partUnitIdx, nextDepth);     // clear sub partition datas or init.
-            subTempPartCU[partUnitIdx]->initSubCU(outTempCU, partUnitIdx, nextDepth);     // clear sub partition datas or init.
-
-            bool bInSlice = subBestPartCU[partUnitIdx]->getSCUAddr() < slice->getSliceCurEndCUAddr();
-            if (bInSlice && (subBestPartCU[partUnitIdx]->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) && (subBestPartCU[partUnitIdx]->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples()))
+            if (bInsidePicture ||
+                ((subBestPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
+                 (subBestPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples())))
             {
+                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
                 if (0 == partUnitIdx) //initialize RD with previous depth buffer
                 {
                     m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
@@ -647,33 +629,32 @@
                     m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
                 }
 
-                xCompressIntraCU(subBestPartCU[partUnitIdx], subTempPartCU[partUnitIdx], nextDepth);
-                outTempCU->copyPartFrom(subBestPartCU[partUnitIdx], partUnitIdx, nextDepth); // Keep best part data to current temporary data.
-                xCopyYuv2Tmp(subBestPartCU[partUnitIdx]->getTotalNumPart() * partUnitIdx, nextDepth);
+                xCompressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture);
+                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
+                xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
             }
-            else if (bInSlice)
+            else
             {
-                subBestPartCU[partUnitIdx]->copyToPic(nextDepth);
-                outTempCU->copyPartFrom(subBestPartCU[partUnitIdx], partUnitIdx, nextDepth);
+                subBestPartCU->copyToPic(nextDepth);
+                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
             }
         }
 
-        if (!bBoundary)
+        if (bInsidePicture)
         {
             m_entropyCoder->resetBits();
             m_entropyCoder->encodeSplitFlag(outTempCU, 0, depth);
-
             outTempCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
         }
         outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
 
-        if ((g_maxCUSize >> depth) == outTempCU->getSlice()->getPPS()->getMinCuDQPSize() && outTempCU->getSlice()->getPPS()->getUseDQP())
+        if ((g_maxCUSize >> depth) == slice->getPPS()->getMinCuDQPSize() && slice->getPPS()->getUseDQP())
         {
             bool hasResidual = false;
-            for (uint32_t uiBlkIdx = 0; uiBlkIdx < outTempCU->getTotalNumPart(); uiBlkIdx++)
+            for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
             {
-                if (outTempCU->getCbf(uiBlkIdx, TEXT_LUMA) || outTempCU->getCbf(uiBlkIdx, TEXT_CHROMA_U) |
-                    outTempCU->getCbf(uiBlkIdx, TEXT_CHROMA_V))
+                if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
+                    outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
                 {
                     hasResidual = true;
                     break;
@@ -694,15 +675,14 @@
         }
 
         m_rdSbacCoders[nextDepth][CI_NEXT_BEST]->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
-        xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current prediction with split prediction.
+        xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
     }
-
     outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
 
+    if (!bInsidePicture) return;
+
     // Copy Yuv data to picture Yuv
-    xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpelx);
-
-    if (bBoundary || (bSliceEnd && bInsidePicture)) return;
+    xCopyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
 
     // Assert if Best prediction mode is NONE
     // Selected mode's RD-cost must be not MAX_INT64.
@@ -711,19 +691,20 @@
     assert(outBestCU->m_totalCost != MAX_INT64);
 }
 
-void TEncCu::xCompressCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, PartSize parentSize)
+void TEncCu::xCompressCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, PartSize parentSize)
 {
     //PPAScopeEvent(TEncCu_xCompressCU + depth);
 
     TComPic* pic = outBestCU->getPic();
 
-    // get Original YUV data from picture
     if (depth == 0)
     {
+        // get original YUV data from picture
         m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
     }
     else
     {
+        // copy partition YUV from depth 0 CTU cache
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
     }
 
@@ -734,26 +715,24 @@
     bool doNotBlockPu = true;
     bool earlyDetectionSkipMode = false;
 
-    bool bBoundary = false;
-    uint32_t lpelx = outBestCU->getCUPelX();
-    uint32_t rpelx = lpelx + outBestCU->getCUSize(0) - 1;
-    uint32_t tpely = outBestCU->getCUPelY();
-    uint32_t bpely = tpely + outBestCU->getCUSize(0) - 1;
-
-    // If slice start or slice end is within this cu...
-    TComSlice* slice = outTempCU->getPic()->getSlice();
-    bool bSliceEnd = (slice->getSliceCurEndCUAddr() > outTempCU->getSCUAddr() &&
-                      slice->getSliceCurEndCUAddr() < outTempCU->getSCUAddr() + outTempCU->getTotalNumPart());
-    bool bInsidePicture = (rpelx < outBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples()) &&
-        (bpely < outBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples());
+    TComSlice* slice = outTempCU->getSlice();
+    if (!bInsidePicture)
+    {
+        uint32_t lpelx = outBestCU->getCUPelX();
+        uint32_t tpely = outBestCU->getCUPelY();
+        uint32_t rpelx = lpelx + outBestCU->getCUSize(0);
+        uint32_t bpely = tpely + outBestCU->getCUSize(0);
+        bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
+                          bpely <= slice->getSPS()->getPicHeightInLumaSamples());
+    }
 
     // We need to split, so don't try these modes.
-    if (!bSliceEnd && bInsidePicture)
+    if (bInsidePicture)
     {
         outTempCU->initEstData(depth);
 
         // do inter modes, SKIP and 2Nx2N
-        if (outBestCU->getSlice()->getSliceType() != I_SLICE)
+        if (slice->getSliceType() != I_SLICE)
         {
             // 2Nx2N
             if (m_param->bEnableEarlySkip)
@@ -783,7 +762,7 @@
             outTempCU->initEstData(depth);
 
             // do inter modes, NxN, 2NxN, and Nx2N
-            if (outBestCU->getSlice()->getSliceType() != I_SLICE)
+            if (slice->getSliceType() != I_SLICE)
             {
                 // 2Nx2N, NxN
                 if (!(outBestCU->getCUSize(0) == 8))
@@ -819,7 +798,7 @@
                 }
 
                 // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
-                if (pic->getSlice()->getSPS()->getAMPAcc(depth))
+                if (slice->getSPS()->getAMPAcc(depth))
                 {
                     bool bTestAMP_Hor = false, bTestAMP_Ver = false;
                     bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false;
@@ -910,7 +889,7 @@
 
             // do normal intra modes
             // speedup for inter frames
-            if (outBestCU->getSlice()->getSliceType() == I_SLICE ||
+            if (slice->getSliceType() == I_SLICE ||
                 outBestCU->getCbf(0, TEXT_LUMA) != 0   ||
                 outBestCU->getCbf(0, TEXT_CHROMA_U) != 0   ||
                 outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) // avoid very complex intra if it is unlikely
@@ -920,7 +899,7 @@
 
                 if (depth == g_maxCUDepth - g_addCUDepth)
                 {
-                    if (outTempCU->getCUSize(0) > (1 << outTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize()))
+                    if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
                     {
                         xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_NxN);
                         outTempCU->initEstData(depth);
@@ -928,9 +907,9 @@
                 }
             }
             // test PCM
-            if (pic->getSlice()->getSPS()->getUsePCM()
-                && outTempCU->getCUSize(0) <= (1 << pic->getSlice()->getSPS()->getPCMLog2MaxSize())
-                && outTempCU->getCUSize(0) >= (1 << pic->getSlice()->getSPS()->getPCMLog2MinSize()))
+            if (slice->getSPS()->getUsePCM()
+                && outTempCU->getCUSize(0) <= (1 << slice->getSPS()->getPCMLog2MaxSize())
+                && outTempCU->getCUSize(0) >= (1 << slice->getSPS()->getPCMLog2MinSize()))
             {
                 uint32_t rawbits = (2 * X265_DEPTH + X265_DEPTH) * outBestCU->getCUSize(0) * outBestCU->getCUSize(0) / 2;
                 uint32_t bestbits = outBestCU->m_totalBits;
@@ -952,10 +931,6 @@
         else
             bSubBranch = true;
     }
-    else if (!(bSliceEnd && bInsidePicture))
-    {
-        bBoundary = true;
-    }
 
     // copy original YUV samples to PCM buffer
     if (outBestCU->isLosslessCoded(0) && (outBestCU->getIPCMFlag(0) == false))
@@ -975,12 +950,12 @@
         for (; partUnitIdx < 4; partUnitIdx++)
         {
             subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
-            subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
 
-            bool bInSlice = subBestPartCU->getSCUAddr() < slice->getSliceCurEndCUAddr();
-            if (bInSlice && (subBestPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
-                (subBestPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples()))
+            if (bInsidePicture ||
+                ((subBestPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
+                 (subBestPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples())))
             {
+                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
                 if (0 == partUnitIdx) //initialize RD with previous depth buffer
                 {
                     m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
@@ -990,27 +965,26 @@
                     m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
                 }
 
-                xCompressCU(subBestPartCU, subTempPartCU, nextDepth);
+                xCompressCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture);
                 outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
                 xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
             }
-            else if (bInSlice)
+            else
             {
                 subBestPartCU->copyToPic(nextDepth);
                 outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
             }
         }
 
-        if (!bBoundary)
+        if (bInsidePicture)
         {
             m_entropyCoder->resetBits();
             m_entropyCoder->encodeSplitFlag(outTempCU, 0, depth);
-
             outTempCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
         }
         outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
 
-        if ((g_maxCUSize >> depth) == outTempCU->getSlice()->getPPS()->getMinCuDQPSize() && outTempCU->getSlice()->getPPS()->getUseDQP())
+        if ((g_maxCUSize >> depth) == slice->getPPS()->getMinCuDQPSize() && slice->getPPS()->getUseDQP())
         {
             bool hasResidual = false;
             for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
@@ -1023,8 +997,7 @@
                 }
             }
 
-            uint32_t targetPartIdx;
-            targetPartIdx = 0;
+            uint32_t targetPartIdx = 0;
             if (hasResidual)
             {
                 bool foundNonZeroCbf = false;
@@ -1041,10 +1014,11 @@
         xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
     }
     outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
+
+    if (!bInsidePicture) return;
+
     // Copy Yuv data to picture Yuv
-    xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
-
-    if (bBoundary || (bSliceEnd && bInsidePicture)) return;
+    xCopyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
 
     // Assert if Best prediction mode is NONE
     // Selected mode's RD-cost must be not MAX_INT64.
@@ -1131,58 +1105,67 @@
  * \param depth
  * \returns void
  */
-void TEncCu::xEncodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth)
+void TEncCu::xEncodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture)
 {
     TComPic* pic = cu->getPic();
 
-    bool bBoundary = false;
-    uint32_t lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
-    uint32_t rpelx = lpelx + (g_maxCUSize >> depth) - 1;
-    uint32_t tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-    uint32_t bpely = tpely + (g_maxCUSize >> depth) - 1;
-
-    TComSlice* slice = cu->getPic()->getSlice();
-
-    // If slice start is within this cu...
+    TComSlice* slice = cu->getSlice();
+    if (!bInsidePicture)
+    {
+        uint32_t lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
+        uint32_t tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
+        uint32_t rpelx = lpelx + (g_maxCUSize >> depth);
+        uint32_t bpely = tpely + (g_maxCUSize >> depth);
+        bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
+                          bpely <= slice->getSPS()->getPicHeightInLumaSamples());
+    }
 
     // We need to split, so don't try these modes.
-    if ((rpelx < slice->getSPS()->getPicWidthInLumaSamples()) && (bpely < slice->getSPS()->getPicHeightInLumaSamples()))
+    if (bInsidePicture)
     {
         m_entropyCoder->encodeSplitFlag(cu, absPartIdx, depth);
     }
-    else
-    {
-        bBoundary = true;
-    }
 
-    if ((g_maxCUSize >> depth) >= cu->getSlice()->getPPS()->getMinCuDQPSize() && cu->getSlice()->getPPS()->getUseDQP())
+    if ((g_maxCUSize >> depth) >= slice->getPPS()->getMinCuDQPSize() && slice->getPPS()->getUseDQP())
     {
         setdQPFlag(true);
     }
 
-    if (((depth < cu->getDepth(absPartIdx)) && (depth < (g_maxCUDepth - g_addCUDepth))) || bBoundary)
+    if (!bInsidePicture)
     {
         uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
 
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
         {
-            lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
-            tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-            bool bInSlice = cu->getSCUAddr() + absPartIdx < slice->getSliceCurEndCUAddr();
-            if (bInSlice && (lpelx < slice->getSPS()->getPicWidthInLumaSamples()) && (tpely < slice->getSPS()->getPicHeightInLumaSamples()))
+            uint32_t lpelx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
+            uint32_t tpely = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
+            if ((lpelx < slice->getSPS()->getPicWidthInLumaSamples()) &&
+                (tpely < slice->getSPS()->getPicHeightInLumaSamples()))
             {
-                xEncodeCU(cu, absPartIdx, depth + 1);
+                xEncodeCU(cu, absPartIdx, depth + 1, bInsidePicture);
             }
         }
 
         return;
     }
 
-    if (cu->getSlice()->getPPS()->getTransquantBypassEnableFlag())
+    if ((depth < cu->getDepth(absPartIdx)) && (depth < (g_maxCUDepth - g_addCUDepth)))
+    {
+        uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
+
+        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
+        {
+            xEncodeCU(cu, absPartIdx, depth + 1, bInsidePicture);
+        }
+
+        return;
+    }
+
+    if (slice->getPPS()->getTransquantBypassEnableFlag())
     {
         m_entropyCoder->encodeCUTransquantBypassFlag(cu, absPartIdx);
     }
-    if (!cu->getSlice()->isIntra())
+    if (!slice->isIntra())
     {
         m_entropyCoder->encodeSkipFlag(cu, absPartIdx);
     }
@@ -1542,40 +1525,9 @@
     }
 }
 
-void TEncCu::xCopyYuv2Pic(TComPic* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth, uint32_t srcDepth, TComDataCU* cu, uint32_t lpelx, uint32_t tpely)
+void TEncCu::xCopyYuv2Pic(TComPic* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth)
 {
-    uint32_t rpelx = lpelx + (g_maxCUSize >> depth) - 1;
-    uint32_t bpely = tpely + (g_maxCUSize >> depth) - 1;
-    TComSlice* slice = cu->getPic()->getSlice();
-    bool bSliceEnd = slice->getSliceCurEndCUAddr() > (cu->getAddr()) * cu->getPic()->getNumPartInCU() + absPartIdx &&
-        slice->getSliceCurEndCUAddr() < (cu->getAddr()) * cu->getPic()->getNumPartInCU() + absPartIdx + (cu->getPic()->getNumPartInCU() >> (depth << 1));
-
-    if (!bSliceEnd && (rpelx < slice->getSPS()->getPicWidthInLumaSamples()) && (bpely < slice->getSPS()->getPicHeightInLumaSamples()))
-    {
-        uint32_t absPartIdxInRaster = g_zscanToRaster[absPartIdx];
-        uint32_t srcBlkWidth = outPic->getNumPartInCUSize() >> (srcDepth);
-        uint32_t blkWidth    = outPic->getNumPartInCUSize() >> (depth);
-        uint32_t partIdxX = ((absPartIdxInRaster % outPic->getNumPartInCUSize()) % srcBlkWidth) / blkWidth;
-        uint32_t partIdxY = ((absPartIdxInRaster / outPic->getNumPartInCUSize()) % srcBlkWidth) / blkWidth;
-        uint32_t partIdx = partIdxY * (srcBlkWidth / blkWidth) + partIdxX;
-        m_bestRecoYuv[srcDepth]->copyToPicYuv(outPic->getPicYuvRec(), cuAddr, absPartIdx, depth - srcDepth, partIdx);
-    }
-    else
-    {
-        uint32_t qNumParts = (cu->getPic()->getNumPartInCU() >> (depth << 1)) >> 2;
-
-        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
-        {
-            uint32_t subCULPelX = lpelx + (g_maxCUSize >> (depth + 1)) * (partUnitIdx &  1);
-            uint32_t subCUTPelY = tpely + (g_maxCUSize >> (depth + 1)) * (partUnitIdx >> 1);
-
-            bool bInSlice = cu->getAddr() * cu->getPic()->getNumPartInCU() + absPartIdx < slice->getSliceCurEndCUAddr();
-            if (bInSlice && (subCULPelX < slice->getSPS()->getPicWidthInLumaSamples()) && (subCUTPelY < slice->getSPS()->getPicHeightInLumaSamples()))
-            {
-                xCopyYuv2Pic(outPic, cuAddr, absPartIdx, depth + 1, srcDepth, cu, subCULPelX, subCUTPelY); // Copy Yuv data to picture Yuv
-            }
-        }
-    }
+    m_bestRecoYuv[depth]->copyToPicYuv(outPic->getPicYuvRec(), cuAddr, absPartIdx);
 }
 
 void TEncCu::xCopyYuv2Tmp(uint32_t partUnitIdx, uint32_t nextDepth)
diff -r d72770a77ff8 -r e4a502435606 source/Lib/TLibEncoder/TEncCu.h
--- a/source/Lib/TLibEncoder/TEncCu.h	Thu May 01 17:41:42 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.h	Sat May 03 23:26:23 2014 +0900
@@ -158,10 +158,10 @@
 protected:
 
     void finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);
-    void xCompressCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, PartSize parentSize = SIZE_NONE);
-    void xCompressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth);
-    void xCompressInterCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU*& cu, uint32_t depth, uint32_t partitionIndex, uint8_t minDepth);
-    void xEncodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);
+    void xCompressCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, PartSize parentSize = SIZE_NONE);
+    void xCompressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture);
+    void xCompressInterCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU*& cu, uint32_t depth, bool bInsidePicture, uint32_t partitionIndex, uint8_t minDepth);
+    void xEncodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture);
     void xCheckBestMode(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth);
 
     void xCheckRDCostMerge2Nx2N(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode,
@@ -177,8 +177,7 @@
     void xCheckDQP(TComDataCU* cu);
 
     void xCheckIntraPCM(TComDataCU*& outBestCU, TComDataCU*& outTempCU);
-    void xCopyYuv2Pic(TComPic* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth, uint32_t uiSrcDepth, TComDataCU* cu,
-                      uint32_t lpelx, uint32_t tpely);
+    void xCopyYuv2Pic(TComPic* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth);
     void xCopyYuv2Tmp(uint32_t uhPartUnitIdx, uint32_t depth);
 
     bool getdQPFlag()        { return m_bEncodeDQP; }
diff -r d72770a77ff8 -r e4a502435606 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Thu May 01 17:41:42 2014 -0500
+++ b/source/encoder/compress.cpp	Sat May 03 23:26:23 2014 +0900
@@ -333,7 +333,7 @@
     x265_emms();
 }
 
-void TEncCu::xCompressInterCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU*& cu, uint32_t depth, uint32_t PartitionIndex, uint8_t minDepth)
+void TEncCu::xCompressInterCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU*& cu, uint32_t depth, bool bInsidePicture, uint32_t PartitionIndex, uint8_t minDepth)
 {
     TComPic* pic = outTempCU->getPic();
 
@@ -350,32 +350,30 @@
 
     // variables for fast encoder decision
     bool bSubBranch = true;
-    bool bBoundary = false;
-    uint32_t lpelx = outTempCU->getCUPelX();
-    uint32_t rpelx = lpelx + outTempCU->getCUSize(0) - 1;
-    uint32_t tpely = outTempCU->getCUPelY();
-    uint32_t bpely = tpely + outTempCU->getCUSize(0) - 1;
-    TComDataCU* subTempPartCU, * subBestPartCU;
     int qp = outTempCU->getQP(0);
 
-    // If slice start or slice end is within this cu...
-    TComSlice * slice = outTempCU->getPic()->getSlice();
-    bool bSliceEnd = slice->getSliceCurEndCUAddr() > outTempCU->getSCUAddr() &&
-        slice->getSliceCurEndCUAddr() < outTempCU->getSCUAddr() + outTempCU->getTotalNumPart();
-    bool bInsidePicture = (rpelx < outTempCU->getSlice()->getSPS()->getPicWidthInLumaSamples()) &&
-        (bpely < outTempCU->getSlice()->getSPS()->getPicHeightInLumaSamples());
+    TComSlice* slice = outTempCU->getSlice();
+    if (!bInsidePicture)
+    {
+        uint32_t lpelx = outTempCU->getCUPelX();
+        uint32_t tpely = outTempCU->getCUPelY();
+        uint32_t rpelx = lpelx + outTempCU->getCUSize(0);
+        uint32_t bpely = tpely + outTempCU->getCUSize(0);
+        bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
+                          bpely <= slice->getSPS()->getPicHeightInLumaSamples());
+    }
 
     if (depth == 0 && m_param->rdLevel == 0)
     {
-        m_origYuv[depth]->copyToPicYuv(cu->getPic()->getPicYuvRec(), cu->getAddr(), 0, 0, 0);
+        m_origYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cu->getAddr(), 0);
     }
     // We need to split, so don't try these modes.
     TComYuv* tempYuv = NULL;
 #if TOPSKIP
     if (depth == 0)
     {
-        TComDataCU* colocated0 = outTempCU->getSlice()->getNumRefIdx(REF_PIC_LIST_0) > 0 ? outTempCU->getSlice()->getRefPic(REF_PIC_LIST_0, 0)->getCU(outTempCU->getAddr()) : NULL;
-        TComDataCU* colocated1 = outTempCU->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 0 ? outTempCU->getSlice()->getRefPic(REF_PIC_LIST_1, 0)->getCU(outTempCU->getAddr()) : NULL;
+        TComDataCU* colocated0 = slice->getNumRefIdx(REF_PIC_LIST_0) > 0 ? slice->getRefPic(REF_PIC_LIST_0, 0)->getCU(outTempCU->getAddr()) : NULL;
+        TComDataCU* colocated1 = slice->getNumRefIdx(REF_PIC_LIST_1) > 0 ? slice->getRefPic(REF_PIC_LIST_1, 0)->getCU(outTempCU->getAddr()) : NULL;
         char currentQP = outTempCU->getQP(0);
         char previousQP = colocated0->getQP(0);
         uint8_t delta = 0, minDepth0 = 4, minDepth1 = 4;
@@ -407,17 +405,17 @@
     if (!(depth < minDepth)) //topskip
 #endif // if TOPSKIP
     {
-        if (!bSliceEnd && bInsidePicture)
+        if (bInsidePicture)
         {
             /* Initialise all Mode-CUs based on parentCU */
             if (depth == 0)
             {
-                m_interCU_2Nx2N[depth]->initCU(cu->getPic(), cu->getAddr());
-                m_interCU_Nx2N[depth]->initCU(cu->getPic(), cu->getAddr());
-                m_interCU_2NxN[depth]->initCU(cu->getPic(), cu->getAddr());
-                m_intraInInterCU[depth]->initCU(cu->getPic(), cu->getAddr());
-                m_mergeCU[depth]->initCU(cu->getPic(), cu->getAddr());
-                m_bestMergeCU[depth]->initCU(cu->getPic(), cu->getAddr());
+                m_interCU_2Nx2N[depth]->initCU(pic, cu->getAddr());
+                m_interCU_Nx2N[depth]->initCU(pic, cu->getAddr());
+                m_interCU_2NxN[depth]->initCU(pic, cu->getAddr());
+                m_intraInInterCU[depth]->initCU(pic, cu->getAddr());
+                m_mergeCU[depth]->initCU(pic, cu->getAddr());
+                m_bestMergeCU[depth]->initCU(pic, cu->getAddr());
             }
             else
             {
@@ -452,24 +450,24 @@
                 {
                     xComputeCostInter(m_interCU_Nx2N[depth], m_modePredYuv[1][depth], SIZE_Nx2N);
                     xComputeCostInter(m_interCU_2NxN[depth], m_modePredYuv[2][depth], SIZE_2NxN);
+                    if (m_interCU_Nx2N[depth]->m_totalCost < outBestCU->m_totalCost)
+                    {
+                        outBestCU = m_interCU_Nx2N[depth];
+
+                        tempYuv = m_modePredYuv[1][depth];
+                        m_modePredYuv[1][depth] = m_bestPredYuv[depth];
+                        m_bestPredYuv[depth] = tempYuv;
+                    }
+                    if (m_interCU_2NxN[depth]->m_totalCost < outBestCU->m_totalCost)
+                    {
+                        outBestCU = m_interCU_2NxN[depth];
+
+                        tempYuv = m_modePredYuv[2][depth];
+                        m_modePredYuv[2][depth] = m_bestPredYuv[depth];
+                        m_bestPredYuv[depth] = tempYuv;
+                    }
                 }
 
-                if (m_interCU_Nx2N[depth]->m_totalCost < outBestCU->m_totalCost)
-                {
-                    outBestCU = m_interCU_Nx2N[depth];
-
-                    tempYuv = m_modePredYuv[1][depth];
-                    m_modePredYuv[1][depth] = m_bestPredYuv[depth];
-                    m_bestPredYuv[depth] = tempYuv;
-                }
-                if (m_interCU_2NxN[depth]->m_totalCost < outBestCU->m_totalCost)
-                {
-                    outBestCU = m_interCU_2NxN[depth];
-
-                    tempYuv = m_modePredYuv[2][depth];
-                    m_modePredYuv[2][depth] = m_bestPredYuv[depth];
-                    m_bestPredYuv[depth] = tempYuv;
-                }
                 if (m_param->rdLevel > 2)
                 {
                     //calculate the motion compensation for chroma for the best mode selected
@@ -495,7 +493,7 @@
                 }
 
                 /* Check for Intra in inter frames only if its a P-slice*/
-                if (outBestCU->getSlice()->getSliceType() == P_SLICE)
+                if (slice->getSliceType() == P_SLICE)
                 {
                     /*compute intra cost */
 
@@ -558,7 +556,7 @@
                         xEncodeIntraInInter(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],  m_bestRecoYuv[depth]);
                     }
                 }
-                if (m_param->rdLevel == 1)
+                else if (m_param->rdLevel == 1)
                 {
                     if (m_bestMergeCU[depth]->m_sa8dCost < outBestCU->m_totalCost)
                     {
@@ -587,7 +585,7 @@
                         m_search->generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], false);
                     }
                 }
-                if (m_param->rdLevel == 0)
+                else if (m_param->rdLevel == 0)
                 {
                     if (outBestCU->getPredictionMode(0) == MODE_INTER)
                     {
@@ -627,10 +625,6 @@
                 outBestCU->m_totalCost  = m_rdCost->calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
             }
         }
-        else if (!(bSliceEnd && bInsidePicture))
-        {
-            bBoundary = true;
-        }
     }
 
     // further split
@@ -651,7 +645,7 @@
             TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
             TComDataCU* aboveRight = outTempCU->getCUAboveRight();
             TComDataCU* left = outTempCU->getCULeft();
-            TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
+            TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
 
             totalCostCU += rootCU->m_avgCost[depth] * rootCU->m_count[depth];
             totalCountCU += rootCU->m_count[depth];
@@ -689,22 +683,23 @@
 
                 /* Copy Yuv data to picture Yuv */
                 if (m_param->rdLevel != 0)
-                    xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
+                    xCopyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
                 return;
             }
         }
 #endif // if EARLY_EXIT
         outTempCU->initEstData(depth, qp);
-        uint8_t nextDepth = (uint8_t)(depth + 1);
-        subTempPartCU = m_tempCU[nextDepth];
+        uint8_t     nextDepth = (uint8_t)(depth + 1);
+        TComDataCU* subBestPartCU;
+        TComDataCU* subTempPartCU = m_tempCU[nextDepth];
         for (uint32_t nextDepth_partIndex = 0; nextDepth_partIndex < 4; nextDepth_partIndex++)
         {
             subBestPartCU = NULL;
             subTempPartCU->initSubCU(outTempCU, nextDepth_partIndex, nextDepth, qp); // clear sub partition datas or init.
 
-            bool bInSlice = subTempPartCU->getSCUAddr() < slice->getSliceCurEndCUAddr();
-            if (bInSlice && (subTempPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
-                (subTempPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples()))
+            if (bInsidePicture ||
+                ((subTempPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
+                 (subTempPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples())))
             {
                 if (0 == nextDepth_partIndex) //initialize RD with previous depth buffer
                 {
@@ -714,12 +709,12 @@
                 {
                     m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
                 }
-                xCompressInterCU(subBestPartCU, subTempPartCU, outTempCU, nextDepth, nextDepth_partIndex, minDepth);
+                xCompressInterCU(subBestPartCU, subTempPartCU, outTempCU, nextDepth, bInsidePicture, nextDepth_partIndex, minDepth);
 #if EARLY_EXIT
                 if (subBestPartCU->getPredictionMode(0) != MODE_INTRA)
                 {
                     uint64_t tempavgCost = subBestPartCU->m_totalCost;
-                    TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
+                    TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
                     uint64_t temp = rootCU->m_avgCost[depth + 1] * rootCU->m_count[depth + 1];
                     rootCU->m_count[depth + 1] += 1;
                     rootCU->m_avgCost[depth + 1] = (temp + tempavgCost) / rootCU->m_count[depth + 1];
@@ -728,18 +723,18 @@
                 /* Adding costs from best SUbCUs */
                 outTempCU->copyPartFrom(subBestPartCU, nextDepth_partIndex, nextDepth, true); // Keep best part data to current temporary data.
                 if (m_param->rdLevel != 0)
-                    xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * nextDepth_partIndex, nextDepth);
-                if (m_param->rdLevel == 0)
+                    m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * nextDepth_partIndex);
+                else
                     m_bestPredYuv[nextDepth]->copyToPartYuv(m_tmpPredYuv[depth], subBestPartCU->getTotalNumPart() * nextDepth_partIndex);
             }
-            else if (bInSlice)
+            else
             {
                 subTempPartCU->copyToPic((uint8_t)nextDepth);
                 outTempCU->copyPartFrom(subTempPartCU, nextDepth_partIndex, nextDepth, false);
             }
         }
 
-        if (!bBoundary)
+        if (bInsidePicture)
         {
             if (m_param->rdLevel > 1)
             {
@@ -753,13 +748,13 @@
         else
             outTempCU->m_totalCost = m_rdCost->calcRdSADCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
 
-        if ((g_maxCUSize >> depth) == outTempCU->getSlice()->getPPS()->getMinCuDQPSize() && outTempCU->getSlice()->getPPS()->getUseDQP())
+        if ((g_maxCUSize >> depth) == slice->getPPS()->getMinCuDQPSize() && slice->getPPS()->getUseDQP())
         {
             bool hasResidual = false;
-            for (uint32_t uiBlkIdx = 0; uiBlkIdx < outTempCU->getTotalNumPart(); uiBlkIdx++)
+            for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
             {
-                if (outTempCU->getCbf(uiBlkIdx, TEXT_LUMA) || outTempCU->getCbf(uiBlkIdx, TEXT_CHROMA_U) ||
-                    outTempCU->getCbf(uiBlkIdx, TEXT_CHROMA_V))
+                if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
+                    outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
                 {
                     hasResidual = true;
                     break;
@@ -789,7 +784,7 @@
             if (depth == 0)
             {
                 uint64_t tempavgCost = outBestCU->m_totalCost;
-                TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
+                TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
                 uint64_t temp = rootCU->m_avgCost[depth] * rootCU->m_count[depth];
                 rootCU->m_count[depth] += 1;
                 rootCU->m_avgCost[depth] = (temp + tempavgCost) / rootCU->m_count[depth];
@@ -828,10 +823,11 @@
     else if (m_param->rdLevel != 0)
     {
         /* Copy Yuv data to picture Yuv */
-        xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
+        if (bInsidePicture)
+            xCopyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
     }
 
-    if (bBoundary || (bSliceEnd && bInsidePicture)) return;
+    if (!bInsidePicture) return;
 
     /* Assert if Best prediction mode is NONE
      * Selected mode's RD-cost must be not MAX_INT64 */
@@ -845,7 +841,7 @@
     uint8_t nextDepth = (uint8_t)(depth + 1);
     TComDataCU* subTempPartCU = m_tempCU[nextDepth];
     TComPic* pic = cu->getPic();
-    TComSlice* slice = cu->getPic()->getSlice();
+    TComSlice* slice = cu->getSlice();
 
     if (((depth < lcu->getDepth(absPartIdx)) && (depth < (g_maxCUDepth - g_addCUDepth))))
     {
@@ -854,8 +850,8 @@
         {
             uint32_t lpelx = lcu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
             uint32_t tpely = lcu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-            bool bInSlice = lcu->getSCUAddr() + absPartIdx < slice->getSliceCurEndCUAddr();
-            if (bInSlice && (lpelx < slice->getSPS()->getPicWidthInLumaSamples()) && (tpely < slice->getSPS()->getPicHeightInLumaSamples()))
+            if ((lpelx < slice->getSPS()->getPicWidthInLumaSamples()) &&
+                (tpely < slice->getSPS()->getPicHeightInLumaSamples()))
             {
                 subTempPartCU->copyToSubCU(cu, partUnitIdx, depth + 1);
                 encodeResidue(lcu, subTempPartCU, absPartIdx, depth + 1);
@@ -927,13 +923,13 @@
                 reco = m_bestRecoYuv[depth]->getCrAddr();
                 reco = m_bestRecoYuv[depth]->getCrAddr();
                 primitives.chroma[m_param->internalCsp].add_ps[part](reco, dststride, pred, res, src1stride, src2stride);
-                m_bestRecoYuv[depth]->copyToPicYuv(lcu->getPic()->getPicYuvRec(), lcu->getAddr(), absPartIdx, 0, 0);
+                m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
                 return;
             }
         }
 
         //Generate Recon
-        TComPicYuv* rec = lcu->getPic()->getPicYuvRec();
+        TComPicYuv* rec = pic->getPicYuvRec();
         int part = partitionFromSizes(cu->getCUSize(0), cu->getCUSize(0));
         pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
         pixel* dst = rec->getLumaAddr(cu->getAddr(), absPartIdx);
@@ -956,7 +952,7 @@
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
         m_search->generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth],  m_tmpRecoYuv[depth], false);
         xCheckDQP(cu);
-        m_tmpRecoYuv[depth]->copyToPicYuv(cu->getPic()->getPicYuvRec(), lcu->getAddr(), absPartIdx, 0, 0);
+        m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
         cu->copyCodedToPic(depth);
     }
 }



More information about the x265-devel mailing list