<div dir="ltr">Please ignore above patch</div><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Sep 22, 2014 at 9:06 AM,  <span dir="ltr"><<a href="mailto:santhoshini@multicorewareinc.com" target="_blank">santhoshini@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Santhoshini Sekar <<a href="mailto:santhoshini@multicorewareinc.com">santhoshini@multicorewareinc.com</a>><br>
# Date 1411356953 -19800<br>
#      Mon Sep 22 09:05:53 2014 +0530<br>
# Node ID f70fd79cb3e1a0cb60b1c7ea5aac9a52922703c3<br>
# Parent  c8f53398f8ceb9e536c2f1569fe4a2a2756aa014<br>
TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure<br>
<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComDataCU.cpp<br>
--- a/source/Lib/TLibCommon/TComDataCU.cpp      Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/Lib/TLibCommon/TComDataCU.cpp      Mon Sep 22 09:05:53 2014 +0530<br>
@@ -387,7 +387,7 @@<br>
 }<br>
<br>
 // initialize Sub partition<br>
-void TComDataCU::initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp)<br>
+void TComDataCU::initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp, CU* cuData)<br>
 {<br>
     X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");<br>
     uint8_t log2CUSize = g_maxLog2CUSize - depth;<br>
@@ -396,7 +396,7 @@<br>
     m_pic              = cu->m_pic;<br>
     m_slice            = cu->m_slice;<br>
     m_cuAddr           = cu->getAddr();<br>
-    m_absIdxInLCU      = cu->getZorderIdxInCU() + partOffset;<br>
+    m_absIdxInLCU      = cuData->encodeIdx * 4 + partOffset;<br>
<br>
     m_cuPelX           = cu->getCUPelX() + ((partUnitIdx &  1) << log2CUSize);<br>
     m_cuPelY           = cu->getCUPelY() + ((partUnitIdx >> 1) << log2CUSize);<br>
@@ -453,7 +453,7 @@<br>
     m_cuAboveRight  = cu->getCUAboveRight();<br>
 }<br>
<br>
-void TComDataCU::copyToSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth)<br>
+void TComDataCU::copyToSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, CU* cuData)<br>
 {<br>
     X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");<br>
<br>
@@ -462,7 +462,7 @@<br>
     m_pic              = cu->m_pic;<br>
     m_slice            = cu->m_slice;<br>
     m_cuAddr           = cu->getAddr();<br>
-    m_absIdxInLCU      = cu->getZorderIdxInCU() + partOffset;<br>
+    m_absIdxInLCU      = cuData->encodeIdx * 4 + partOffset;<br>
<br>
     m_cuPelX           = cu->getCUPelX() + ((partUnitIdx &  1) << (g_maxLog2CUSize - depth));<br>
     m_cuPelY           = cu->getCUPelY() + ((partUnitIdx >> 1) << (g_maxLog2CUSize - depth));<br>
@@ -1067,9 +1067,9 @@<br>
     }<br>
     else<br>
     {<br>
-        if (getZorderIdxInCU() > 0)<br>
+        if (m_CULocalData->encodeIdx *4 > 0)<br>
         {<br>
-            return m_pic->getCU(getAddr())->getLastCodedQP(getZorderIdxInCU());<br>
+            return m_pic->getCU(getAddr())->getLastCodedQP(m_CULocalData->encodeIdx *4);<br>
         }<br>
         else if (getAddr() > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled &&<br>
                                     getAddr() % m_pic->getFrameWidthInCU() == 0))<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComDataCU.h<br>
--- a/source/Lib/TLibCommon/TComDataCU.h        Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/Lib/TLibCommon/TComDataCU.h        Mon Sep 22 09:05:53 2014 +0530<br>
@@ -273,9 +273,9 @@<br>
<br>
     void          initCU(Frame* pic, uint32_t cuAddr);<br>
     void          initEstData();<br>
-    void          initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp);<br>
+    void          initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp, CU* cuData);<br>
<br>
-    void          copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, uint32_t depth);<br>
+    void          copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, uint32_t depth, CU* cuData);<br>
     void          copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, bool isRDObasedAnalysis = true);<br>
<br>
     void          copyToPic(uint32_t depth);<br>
@@ -288,8 +288,6 @@<br>
<br>
     uint32_t&     getAddr()                        { return m_cuAddr; }<br>
<br>
-    uint32_t&     getZorderIdxInCU()               { return m_absIdxInLCU; }<br>
-<br>
     uint32_t      getSCUAddr() const               { return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInLCU; }<br>
<br>
<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComPattern.cpp<br>
--- a/source/Lib/TLibCommon/TComPattern.cpp     Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/Lib/TLibCommon/TComPattern.cpp     Mon Sep 22 09:05:53 2014 +0530<br>
@@ -50,7 +50,7 @@<br>
 // ====================================================================================================================<br>
<br>
 void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,<br>
-                                 pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt, int dirMode)<br>
+                                 pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt, int dirMode, CU* cuData)<br>
 {<br>
     pixel* roiOrigin;<br>
     pixel* adiTemp;<br>
@@ -63,7 +63,7 @@<br>
     uint32_t tuSize = intraNeighbors.tuSize;<br>
     uint32_t tuSize2 = tuSize << 1;<br>
<br>
-    roiOrigin = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);<br>
+    roiOrigin = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), cuData->encodeIdx * 4 + zOrderIdxInPart);<br>
     adiTemp   = adiBuf;<br>
<br>
     fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);<br>
@@ -163,7 +163,7 @@<br>
     }<br>
 }<br>
<br>
-void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, uint32_t chromaId)<br>
+void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, uint32_t chromaId, CU* cuData)<br>
 {<br>
     pixel*  roiOrigin;<br>
     pixel*  adiTemp;<br>
@@ -175,7 +175,7 @@<br>
     initIntraNeighbors(cu, zOrderIdxInPart, partDepth, false, &intraNeighbors);<br>
     uint32_t tuSize = intraNeighbors.tuSize;<br>
<br>
-    roiOrigin = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);<br>
+    roiOrigin = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), cuData->encodeIdx * 4 + zOrderIdxInPart);<br>
     adiTemp   = getAdiChromaBuf(chromaId, tuSize, adiBuf);<br>
<br>
     fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComPattern.h<br>
--- a/source/Lib/TLibCommon/TComPattern.h       Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/Lib/TLibCommon/TComPattern.h       Mon Sep 22 09:05:53 2014 +0530<br>
@@ -53,6 +53,7 @@<br>
<br>
 class TComDataCU;<br>
<br>
+struct CU;<br>
 struct IntraNeighbors<br>
 {<br>
     int  numIntraNeighbor;<br>
@@ -84,11 +85,12 @@<br>
     /// set parameters from pixel buffers for accessing neighboring pixels<br>
     static void initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,<br>
                                pixel* refAbove, pixel* refLeft,<br>
-                               pixel* refAboveFlt, pixel* refLeftFlt, int dirMode);<br>
+                               pixel* refAboveFlt, pixel* refLeftFlt, int dirMode,<br>
+                               CU* cuData);<br>
<br>
     /// set chroma parameters from CU data for accessing ADI data<br>
     static void initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,<br>
-                                     pixel* adiBuf, uint32_t chromaId);<br>
+                                     pixel* adiBuf, uint32_t chromaId, CU* cuData);<br>
<br>
     static void initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, bool isLuma, IntraNeighbors *IntraNeighbors);<br>
<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/analysis.cpp<br>
--- a/source/encoder/analysis.cpp       Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/encoder/analysis.cpp       Mon Sep 22 09:05:53 2014 +0530<br>
@@ -424,7 +424,7 @@<br>
     //PPAScopeEvent(CompressIntraCU + depth);<br>
     Frame* pic = outBestCU->m_pic;<br>
     uint32_t cuAddr = outBestCU->getAddr();<br>
-    uint32_t absPartIdx = outBestCU->getZorderIdxInCU();<br>
+    uint32_t absPartIdx = cu->encodeIdx * 4;<br>
<br>
     if (depth == 0)<br>
         // get original YUV data from picture<br>
@@ -469,10 +469,10 @@<br>
         {<br>
             CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx + partUnitIdx;<br>
             int qp = outTempCU->getQP(0);<br>
-            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp, cu); // clear sub partition datas or init.<br>
             if (child_cu->flags & CU::PRESENT)<br>
             {<br>
-                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp, cu); // clear sub partition datas or init.<br>
                 if (0 == partUnitIdx) //initialize RD with previous depth buffer<br>
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
                 else<br>
@@ -560,15 +560,15 @@<br>
     int32_t ctuToDepthIndex = g_maxCUDepth - 1;<br>
<br>
     if (depth)<br>
-        m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());<br>
+        m_origYuv[0]->copyPartToYuv(m_origYuv[depth], cu->encodeIdx * 4);<br>
     else<br>
-        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());<br>
+        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outBestCU->getAddr(), cu->encodeIdx * 4);<br>
<br>
     Slice* slice = outTempCU->m_slice;<br>
     int32_t cu_split_flag = !(cu->flags & CU::LEAF);<br>
     int32_t cu_unsplit_flag = !(cu->flags & CU::SPLIT_MANDATORY);<br>
<br>
-    if (cu_unsplit_flag && ((zOrder == outBestCU->getZorderIdxInCU()) && (depth == sharedDepth[zOrder])))<br>
+    if (cu_unsplit_flag && ((zOrder == cu->encodeIdx * 4) && (depth == sharedDepth[zOrder])))<br>
     {<br>
         m_quant.setQPforQuant(outTempCU);<br>
         checkIntra(outBestCU, outTempCU, (PartSize)sharedPartSizes[zOrder], cu, &sharedModes[zOrder]);<br>
@@ -602,10 +602,10 @@<br>
         {<br>
             CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx + partUnitIdx;<br>
             int qp = outTempCU->getQP(0);<br>
-            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp, cu); // clear sub partition datas or init.<br>
             if (child_cu->flags & CU::PRESENT)<br>
             {<br>
-                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp, cu); // clear sub partition datas or init.<br>
<br>
                 if (partUnitIdx) // initialize RD with previous depth buffer<br>
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);<br>
@@ -668,7 +668,7 @@<br>
     outBestCU->copyToPic(depth);<br>
     if (!cu_unsplit_flag)<br>
         return;<br>
-    m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());<br>
+    m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), outBestCU->getAddr(), cu->encodeIdx * 4);<br>
<br>
 #if CHECKED_BUILD || _DEBUG<br>
     X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");<br>
@@ -696,11 +696,11 @@<br>
     outTempCU->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);<br>
<br>
     if (sharedModes)<br>
-        sharedEstIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange, sharedModes);<br>
+        sharedEstIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange, sharedModes, cu);<br>
     else<br>
-        estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange);<br>
+        estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange, cu);<br>
<br>
-    estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+    estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], cu);<br>
<br>
     m_entropyCoder->resetBits();<br>
     if (outTempCU->m_slice->m_pps->bTransquantBypassEnabled)<br>
@@ -731,11 +731,11 @@<br>
     checkBestMode(outBestCU, outTempCU, depth);<br>
 }<br>
<br>
-void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cu_t, int bInsidePicture, uint32_t PartitionIndex, uint32_t minDepth)<br>
+void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cuData, int bInsidePicture, uint32_t PartitionIndex, uint32_t minDepth)<br>
 {<br>
     Frame* pic = outTempCU->m_pic;<br>
     uint32_t cuAddr = outTempCU->getAddr();<br>
-    uint32_t absPartIdx = outTempCU->getZorderIdxInCU();<br>
+    uint32_t absPartIdx = cuData->encodeIdx * 4;<br>
<br>
     if (depth == 0)<br>
         // get original YUV data from picture<br>
@@ -753,8 +753,8 @@<br>
 #endif<br>
<br>
     Slice* slice = outTempCU->m_slice;<br>
-    int cu_split_flag = !(cu_t->flags & CU::LEAF);<br>
-    int cu_unsplit_flag = !(cu_t->flags & CU::SPLIT_MANDATORY);<br>
+    int cu_split_flag = !(cuData->flags & CU::LEAF);<br>
+    int cu_unsplit_flag = !(cuData->flags & CU::SPLIT_MANDATORY);<br>
<br>
     if (depth == 0 && m_param->rdLevel == 0)<br>
     {<br>
@@ -810,16 +810,16 @@<br>
             }<br>
             else<br>
             {<br>
-                m_interCU_2Nx2N[depth]->initSubCU(cu, PartitionIndex, depth, qp);<br>
-                m_interCU_2NxN[depth]->initSubCU(cu, PartitionIndex, depth, qp);<br>
-                m_interCU_Nx2N[depth]->initSubCU(cu, PartitionIndex, depth, qp);<br>
-                m_intraInInterCU[depth]->initSubCU(cu, PartitionIndex, depth, qp);<br>
-                m_mergeCU[depth]->initSubCU(cu, PartitionIndex, depth, qp);<br>
-                m_bestMergeCU[depth]->initSubCU(cu, PartitionIndex, depth, qp);<br>
+                m_interCU_2Nx2N[depth]->initSubCU(cu, PartitionIndex, depth, qp, cuData);<br>
+                m_interCU_2NxN[depth]->initSubCU(cu, PartitionIndex, depth, qp, cuData);<br>
+                m_interCU_Nx2N[depth]->initSubCU(cu, PartitionIndex, depth, qp, cuData);<br>
+                m_intraInInterCU[depth]->initSubCU(cu, PartitionIndex, depth, qp, cuData);<br>
+                m_mergeCU[depth]->initSubCU(cu, PartitionIndex, depth, qp, cuData);<br>
+                m_bestMergeCU[depth]->initSubCU(cu, PartitionIndex, depth, qp, cuData);<br>
             }<br>
<br>
             /* Compute  Merge Cost */<br>
-            checkMerge2Nx2N_rd0_4(m_bestMergeCU[depth], m_mergeCU[depth], m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth]);<br>
+            checkMerge2Nx2N_rd0_4(m_bestMergeCU[depth], m_mergeCU[depth], m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth], cuData);<br>
             bool earlyskip = false;<br>
             if (m_param->rdLevel >= 1)<br>
                 earlyskip = (m_param->bEnableEarlySkip && m_bestMergeCU[depth]->isSkipped(0));<br>
@@ -828,7 +828,7 @@<br>
             {<br>
                 /* Compute 2Nx2N mode costs */<br>
                 {<br>
-                    checkInter_rd0_4(m_interCU_2Nx2N[depth], m_modePredYuv[0][depth], SIZE_2Nx2N);<br>
+                    checkInter_rd0_4(m_interCU_2Nx2N[depth], m_modePredYuv[0][depth], SIZE_2Nx2N, cuData);<br>
                     /* Choose best mode; initialise outBestCU to 2Nx2N */<br>
                     outBestCU = m_interCU_2Nx2N[depth];<br>
                     std::swap(m_bestPredYuv[depth], m_modePredYuv[0][depth]);<br>
@@ -837,8 +837,8 @@<br>
                 /* Compute Rect costs */<br>
                 if (m_param->bEnableRectInter)<br>
                 {<br>
-                    checkInter_rd0_4(m_interCU_Nx2N[depth], m_modePredYuv[1][depth], SIZE_Nx2N);<br>
-                    checkInter_rd0_4(m_interCU_2NxN[depth], m_modePredYuv[2][depth], SIZE_2NxN);<br>
+                    checkInter_rd0_4(m_interCU_Nx2N[depth], m_modePredYuv[1][depth], SIZE_Nx2N, cuData);<br>
+                    checkInter_rd0_4(m_interCU_2NxN[depth], m_modePredYuv[2][depth], SIZE_2NxN, cuData);<br>
                     if (m_interCU_Nx2N[depth]->m_sa8dCost < outBestCU->m_sa8dCost)<br>
                     {<br>
                         outBestCU = m_interCU_Nx2N[depth];<br>
@@ -857,12 +857,12 @@<br>
                     int numPart = outBestCU->getNumPartInter();<br>
                     for (int partIdx = 0; partIdx < numPart; partIdx++)<br>
                     {<br>
-                        prepMotionCompensation(outBestCU, partIdx);<br>
+                        prepMotionCompensation(outBestCU, partIdx, cuData);<br>
                         motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);<br>
                     }<br>
<br>
                     encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
-                                              m_bestResiYuv[depth], m_bestRecoYuv[depth]);<br>
+                        m_bestResiYuv[depth], m_bestRecoYuv[depth], cuData);<br>
                     uint64_t bestMergeCost = m_rdCost.m_psyRd ? m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;<br>
                     uint64_t bestCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;<br>
                     if (bestMergeCost < bestCost)<br>
@@ -890,12 +890,12 @@<br>
                     }<br>
                     if (bdoIntra)<br>
                     {<br>
-                        checkIntraInInter_rd0_4(m_intraInInterCU[depth], SIZE_2Nx2N);<br>
+                        checkIntraInInter_rd0_4(m_intraInInterCU[depth], SIZE_2Nx2N, cuData);<br>
                         uint64_t intraInInterCost, bestCost;<br>
                         if (m_param->rdLevel > 2)<br>
                         {<br>
                             encodeIntraInInter(m_intraInInterCU[depth], m_origYuv[depth], m_modePredYuv[5][depth],<br>
-                                               m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+                                               m_tmpResiYuv[depth], m_tmpRecoYuv[depth], cuData);<br>
                             intraInInterCost = m_rdCost.m_psyRd ? m_intraInInterCU[depth]->m_totalPsyCost : m_intraInInterCU[depth]->m_totalRDCost;<br>
                             bestCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;<br>
                         }<br>
@@ -927,17 +927,17 @@<br>
                         int numPart = outBestCU->getNumPartInter();<br>
                         for (int partIdx = 0; partIdx < numPart; partIdx++)<br>
                         {<br>
-                            prepMotionCompensation(outBestCU, partIdx);<br>
+                            prepMotionCompensation(outBestCU, partIdx, cuData);<br>
                             motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);<br>
                         }<br>
<br>
                         encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
-                                                  m_bestResiYuv[depth], m_bestRecoYuv[depth]);<br>
+                                                  m_bestResiYuv[depth], m_bestRecoYuv[depth], cuData);<br>
                         m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);<br>
                     }<br>
                     else if (outBestCU->getPredictionMode(0) == MODE_INTRA)<br>
                     {<br>
-                        encodeIntraInInter(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],  m_bestRecoYuv[depth]);<br>
+                        encodeIntraInInter(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],  m_bestRecoYuv[depth], cuData);<br>
                         m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);<br>
                     }<br>
                 }<br>
@@ -954,15 +954,15 @@<br>
                         int numPart = outBestCU->getNumPartInter();<br>
                         for (int partIdx = 0; partIdx < numPart; partIdx++)<br>
                         {<br>
-                            prepMotionCompensation(outBestCU, partIdx);<br>
+                            prepMotionCompensation(outBestCU, partIdx, cuData);<br>
                             motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);<br>
                         }<br>
<br>
                         m_tmpResiYuv[depth]->subtract(m_origYuv[depth], m_bestPredYuv[depth], outBestCU->getLog2CUSize(0));<br>
-                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);<br>
+                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], cuData);<br>
                     }<br>
                     else<br>
-                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);<br>
+                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], cuData);<br>
                 }<br>
                 else if (m_param->rdLevel == 0)<br>
                 {<br>
@@ -971,7 +971,7 @@<br>
                         int numPart = outBestCU->getNumPartInter();<br>
                         for (int partIdx = 0; partIdx < numPart; partIdx++)<br>
                         {<br>
-                            prepMotionCompensation(outBestCU, partIdx);<br>
+                            prepMotionCompensation(outBestCU, partIdx, cuData);<br>
                             motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);<br>
                         }<br>
                     }<br>
@@ -1081,10 +1081,10 @@<br>
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];<br>
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)<br>
         {<br>
-            CU *child_cu = cuPicsym->m_CULocalData + cu_t->childIdx + partUnitIdx;<br>
+            CU *child_cu = cuPicsym->m_CULocalData + cuData->childIdx + partUnitIdx;<br>
<br>
             TComDataCU* subBestPartCU = NULL;<br>
-            subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+            subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp, cuData); // clear sub partition datas or init.<br>
<br>
             if (child_cu->flags & CU::PRESENT)<br>
             {<br>
@@ -1202,7 +1202,7 @@<br>
     outBestCU->copyToPic(depth);<br>
<br>
     if (m_param->rdLevel == 0 && depth == 0)<br>
-        encodeResidue(outBestCU, outBestCU, 0, 0);<br>
+        encodeResidue(outBestCU, outBestCU, 0, 0, cuData);<br>
     else if (m_param->rdLevel != 0)<br>
     {<br>
         /* Copy Yuv data to picture Yuv */<br>
@@ -1244,7 +1244,7 @@<br>
<br>
     Frame* pic = outBestCU->m_pic;<br>
     uint32_t cuAddr = outBestCU->getAddr();<br>
-    uint32_t absPartIdx = outBestCU->getZorderIdxInCU();<br>
+    uint32_t absPartIdx = cu->encodeIdx * 4;<br>
<br>
     if (depth == 0)<br>
         // get original YUV data from picture<br>
@@ -1270,14 +1270,14 @@<br>
         if (slice->m_sliceType != I_SLICE)<br>
         {<br>
             // by Merge for inter_2Nx2N<br>
-            checkMerge2Nx2N_rd5_6(outBestCU, outTempCU, &earlyDetectionSkipMode, m_bestPredYuv[depth], m_bestRecoYuv[depth]);<br>
+            checkMerge2Nx2N_rd5_6(outBestCU, outTempCU, &earlyDetectionSkipMode, m_bestPredYuv[depth], m_bestRecoYuv[depth], cu);<br>
<br>
             outTempCU->initEstData();<br>
<br>
             if (!m_param->bEnableEarlySkip)<br>
             {<br>
                 // 2Nx2N, NxN<br>
-                checkInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N);<br>
+                checkInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N, cu);<br>
                 outTempCU->initEstData();<br>
                 if (m_param->bEnableCbfFastMode)<br>
                     doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
@@ -1296,7 +1296,7 @@<br>
                 {<br>
                     if (depth == g_maxCUDepth && doNotBlockPu)<br>
                     {<br>
-                        checkInter_rd5_6(outBestCU, outTempCU, SIZE_NxN);<br>
+                        checkInter_rd5_6(outBestCU, outTempCU, SIZE_NxN, cu);<br>
                         outTempCU->initEstData();<br>
                     }<br>
                 }<br>
@@ -1306,14 +1306,14 @@<br>
                     // 2NxN, Nx2N<br>
                     if (doNotBlockPu)<br>
                     {<br>
-                        checkInter_rd5_6(outBestCU, outTempCU, SIZE_Nx2N);<br>
+                        checkInter_rd5_6(outBestCU, outTempCU, SIZE_Nx2N, cu);<br>
                         outTempCU->initEstData();<br>
                         if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_Nx2N)<br>
                             doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
                     }<br>
                     if (doNotBlockPu)<br>
                     {<br>
-                        checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxN);<br>
+                        checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxN, cu);<br>
                         outTempCU->initEstData();<br>
                         if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxN)<br>
                             doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
@@ -1333,14 +1333,14 @@<br>
                     {<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnU);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnU, cu);<br>
                             outTempCU->initEstData();<br>
                             if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnU)<br>
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
                         }<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnD);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnD, cu);<br>
                             outTempCU->initEstData();<br>
                             if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnD)<br>
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
@@ -1350,14 +1350,14 @@<br>
                     {<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnU, true);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnU, cu, true);<br>
                             outTempCU->initEstData();<br>
                             if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnU)<br>
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
                         }<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnD, true);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxnD, cu, true);<br>
                             outTempCU->initEstData();<br>
                             if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnD)<br>
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
@@ -1369,14 +1369,14 @@<br>
                     {<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nLx2N);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nLx2N, cu);<br>
                             outTempCU->initEstData();<br>
                             if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_nLx2N)<br>
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
                         }<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nRx2N);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nRx2N, cu);<br>
                             outTempCU->initEstData();<br>
                         }<br>
                     }<br>
@@ -1384,14 +1384,14 @@<br>
                     {<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nLx2N, true);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nLx2N, cu, true);<br>
                             outTempCU->initEstData();<br>
                             if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_nLx2N)<br>
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;<br>
                         }<br>
                         if (doNotBlockPu)<br>
                         {<br>
-                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nRx2N, true);<br>
+                            checkInter_rd5_6(outBestCU, outTempCU, SIZE_nRx2N, cu, true);<br>
                             outTempCU->initEstData();<br>
                         }<br>
                     }<br>
@@ -1404,14 +1404,14 @@<br>
                  outBestCU->getCbf(0, TEXT_CHROMA_U) != 0   ||<br>
                  outBestCU->getCbf(0, TEXT_CHROMA_V) != 0)  && doIntra)<br>
             {<br>
-                checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N);<br>
+                checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N, cu);<br>
                 outTempCU->initEstData();<br>
<br>
                 if (depth == g_maxCUDepth)<br>
                 {<br>
                     if (cu->log2CUSize > slice->m_sps->quadtreeTULog2MinSize)<br>
                     {<br>
-                        checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_NxN);<br>
+                        checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_NxN, cu);<br>
                         outTempCU->initEstData();<br>
                     }<br>
                 }<br>
@@ -1445,11 +1445,11 @@<br>
             CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx + partUnitIdx;<br>
<br>
             int qp = outTempCU->getQP(0);<br>
-            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp, cu); // clear sub partition datas or init.<br>
<br>
             if (child_cu->flags & CU::PRESENT)<br>
             {<br>
-                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp, cu); // clear sub partition datas or init.<br>
<br>
                 if (0 == partUnitIdx) //initialize RD with previous depth buffer<br>
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
@@ -1525,7 +1525,7 @@<br>
 #endif<br>
 }<br>
<br>
-void Analysis::checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& yuvReconBest)<br>
+void Analysis::checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& yuvReconBest, CU* cuData)<br>
 {<br>
     X265_CHECK(outTempCU->m_slice->m_sliceType != I_SLICE, "Evaluating merge in I slice\n");<br>
     TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists<br>
@@ -1561,7 +1561,7 @@<br>
<br>
             // do MC only for Luma part<br>
             /* Set CU parameters for motion compensation */<br>
-            prepMotionCompensation(outTempCU, 0);<br>
+            prepMotionCompensation(outTempCU, 0, cuData);<br>
             motionCompensation(outTempCU, m_tmpPredYuv[depth], REF_PIC_LIST_X, true, false);<br>
             uint32_t bitsCand = getTUBits(mergeCand, maxNumMergeCand);<br>
             outTempCU->m_totalBits = bitsCand;<br>
@@ -1600,7 +1600,7 @@<br>
             int numPart = outBestCU->getNumPartInter();<br>
             for (int partIdx = 0; partIdx < numPart; partIdx++)<br>
             {<br>
-                prepMotionCompensation(outBestCU, partIdx);<br>
+                prepMotionCompensation(outBestCU, partIdx, cuData);<br>
                 motionCompensation(outBestCU, bestPredYuv, REF_PIC_LIST_X, false, true);<br>
             }<br>
<br>
@@ -1615,7 +1615,7 @@<br>
             }<br>
<br>
             // Encode with residue<br>
-            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], cuData);<br>
<br>
             uint64_t tempCost = m_rdCost.m_psyRd ? outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;<br>
             uint64_t bestCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;<br>
@@ -1629,7 +1629,7 @@<br>
     }<br>
 }<br>
<br>
-void Analysis::checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode, TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest)<br>
+void Analysis::checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode, TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest, CU* cuData)<br>
 {<br>
     X265_CHECK(outTempCU->m_slice->m_sliceType != I_SLICE, "I slice not expected\n");<br>
     TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists<br>
@@ -1674,7 +1674,7 @@<br>
                     outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[mergeCand][1], SIZE_2Nx2N, 0, 0); // interprets depth relative to outTempCU level<br>
<br>
                     // do MC<br>
-                    prepMotionCompensation(outTempCU, 0);<br>
+                    prepMotionCompensation(outTempCU, 0, cuData);<br>
                     motionCompensation(outTempCU, m_tmpPredYuv[depth], REF_PIC_LIST_X, true, true);<br>
                     // estimate residual and encode everything<br>
                     if (noResidual)<br>
@@ -1688,7 +1688,8 @@<br>
                                                   m_tmpPredYuv[depth],<br>
                                                   m_tmpResiYuv[depth],<br>
                                                   m_bestResiYuv[depth],<br>
-                                                  m_tmpRecoYuv[depth]);<br>
+                                                  m_tmpRecoYuv[depth],<br>
+                                                  cuData);<br>
<br>
<br>
                     /* Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low? */<br>
@@ -1733,7 +1734,7 @@<br>
     }<br>
 }<br>
<br>
-void Analysis::checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYuv, PartSize partSize, bool bUseMRG)<br>
+void Analysis::checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYuv, PartSize partSize, CU* cuData, bool bUseMRG)<br>
 {<br>
     uint32_t depth = outTempCU->getDepth(0);<br>
<br>
@@ -1743,7 +1744,7 @@<br>
<br>
     // do motion compensation only for Luma since luma cost alone is calculated<br>
     outTempCU->m_totalBits = 0;<br>
-    if (predInterSearch(outTempCU, outPredYuv, bUseMRG, false))<br>
+    if (predInterSearch(outTempCU, outPredYuv, bUseMRG, false, cuData))<br>
     {<br>
         int sizeIdx = outTempCU->getLog2CUSize(0) - 2;<br>
         uint32_t distortion = primitives.sa8d[sizeIdx](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),<br>
@@ -1758,7 +1759,7 @@<br>
     }<br>
 }<br>
<br>
-void Analysis::checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, bool bUseMRG)<br>
+void Analysis::checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU* cuData, bool bUseMRG)<br>
 {<br>
     uint32_t depth = outTempCU->getDepth(0);<br>
<br>
@@ -1767,15 +1768,15 @@<br>
     outTempCU->setPredModeSubParts(MODE_INTER, 0, depth);<br>
     outTempCU->setCUTransquantBypassSubParts(!!m_param->bLossless, 0, depth);<br>
<br>
-    if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))<br>
+    if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true, cuData))<br>
     {<br>
-        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], cuData);<br>
         checkDQP(outTempCU);<br>
         checkBestMode(outBestCU, outTempCU, depth);<br>
     }<br>
 }<br>
<br>
-void Analysis::checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize)<br>
+void Analysis::checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize, CU* cuData)<br>
 {<br>
     uint32_t depth = cu->getDepth(0);<br>
<br>
@@ -1789,7 +1790,7 @@<br>
     const uint32_t partOffset  = 0;<br>
<br>
     // Reference sample smoothing<br>
-    TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX);<br>
+    TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX, cuData);<br>
<br>
     pixel* fenc     = m_origYuv[depth]->getLumaAddr();<br>
     uint32_t stride = m_modePredYuv[5][depth]->getStride();<br>
@@ -1941,7 +1942,7 @@<br>
     cu->setLumaIntraDirSubParts(bmode, partOffset, depth + initTrDepth);<br>
 }<br>
<br>
-void Analysis::checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize)<br>
+void Analysis::checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU* cuData)<br>
 {<br>
     uint32_t depth = outTempCU->getDepth(0);<br>
<br>
@@ -1956,9 +1957,9 @@<br>
     uint32_t tuDepthRange[2];<br>
     outTempCU->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);<br>
<br>
-    estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange);<br>
+    estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange, cuData);<br>
<br>
-    estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+    estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], cuData);<br>
<br>
     m_entropyCoder->resetBits();<br>
     if (outTempCU->m_slice->m_pps->bTransquantBypassEnabled)<br>
@@ -1994,7 +1995,7 @@<br>
     checkBestMode(outBestCU, outTempCU, depth);<br>
 }<br>
<br>
-void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv,  ShortYuv* outResiYuv, TComYuv* outReconYuv)<br>
+void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv,  ShortYuv* outResiYuv, TComYuv* outReconYuv, CU* cuData)<br>
 {<br>
     uint64_t puCost = 0;<br>
     uint32_t puBits = 0;<br>
@@ -2009,7 +2010,7 @@<br>
     uint32_t tuDepthRange[2];<br>
     cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);<br>
<br>
-    uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange);<br>
+    uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange, cuData);<br>
     xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);<br>
<br>
     //=== update PU data ====<br>
@@ -2018,7 +2019,7 @@<br>
     //===== set distortion (rate and r-d costs are determined later) =====<br>
     cu->m_totalDistortion = puDistY;<br>
<br>
-    estIntraPredChromaQT(cu, fencYuv, predYuv, outResiYuv, outReconYuv);<br>
+    estIntraPredChromaQT(cu, fencYuv, predYuv, outResiYuv, outReconYuv, cuData);<br>
     m_entropyCoder->resetBits();<br>
     if (cu->m_slice->m_pps->bTransquantBypassEnabled)<br>
         m_entropyCoder->codeCUTransquantBypassFlag(cu->getCUTransquantBypass(0));<br>
@@ -2050,7 +2051,7 @@<br>
         cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion, cu->m_totalBits);<br>
 }<br>
<br>
-void Analysis::encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth)<br>
+void Analysis::encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, CU* cuData)<br>
 {<br>
     Frame* pic = cu->m_pic;<br>
<br>
@@ -2063,10 +2064,11 @@<br>
         uint32_t xmax = slice->m_sps->picWidthInLumaSamples  - lcu->getCUPelX();<br>
         uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY();        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)<br>
         {<br>
+            CU *child_cu = cu->m_CULocalData + cuData->childIdx + partUnitIdx;<br>
             if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)<br>
             {<br>
-                subTempPartCU->copyToSubCU(cu, partUnitIdx, nextDepth);<br>
-                encodeResidue(lcu, subTempPartCU, absPartIdx, nextDepth);<br>
+                subTempPartCU->copyToSubCU(cu, partUnitIdx, nextDepth, child_cu);<br>
+                encodeResidue(lcu, subTempPartCU, absPartIdx, nextDepth, child_cu);<br>
             }<br>
         }<br>
<br>
@@ -2108,7 +2110,7 @@<br>
             uint32_t tuDepthRange[2];<br>
             cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);<br>
             // Residual encoding<br>
-            residualTransformQuantInter(cu, 0, m_origYuv[0], m_tmpResiYuv[depth], cu->getDepth(0), tuDepthRange);<br>
+            residualTransformQuantInter(cu, 0, m_origYuv[0], m_tmpResiYuv[depth], cu->getDepth(0), tuDepthRange, cuData);<br>
             checkDQP(cu);<br>
<br>
             if (lcu->getMergeFlag(absPartIdx) && cu->getPartitionSize(0) == SIZE_2Nx2N && !cu->getQtRootCbf(0))<br>
@@ -2168,7 +2170,7 @@<br>
     else<br>
     {<br>
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);<br>
-        generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+        generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], cuData);<br>
         checkDQP(cu);<br>
         m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);<br>
         cu->copyCodedToPic(depth);<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/analysis.h<br>
--- a/source/encoder/analysis.h Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/encoder/analysis.h Mon Sep 22 09:05:53 2014 +0530<br>
@@ -113,21 +113,21 @@<br>
     void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU *cu, uint8_t* sharedModes);<br>
     void compressSharedIntraCTU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, TComDataCU* cuPicsym, CU *cu, uint8_t* sharedDepth, char* sharedPartSizes, uint8_t* sharedModes, uint32_t &zOrder);<br>
<br>
-    void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cu_t,<br>
+    void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cuData,<br>
                                int bInsidePicture, uint32_t partitionIndex, uint32_t minDepth);<br>
     void compressInterCU_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, TComDataCU* cuPicsym, CU *cu,<br>
                                PartSize parentSize = SIZE_NONE);<br>
-    void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);<br>
+    void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv, CU* cuData);<br>
     void checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode,<br>
-                               TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest);<br>
-    void checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYUV, PartSize partSize, bool bUseMRG = false);<br>
-    void checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, bool bUseMRG = false);<br>
-    void checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize);<br>
-    void checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);<br>
+                               TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest, CU* cuData);<br>
+    void checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYUV, PartSize partSize, CU* cuData, bool bUseMRG = false);<br>
+    void checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU* cuData, bool bUseMRG = false);<br>
+    void checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize, CU* cuData);<br>
+    void checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU* cuData);<br>
<br>
     void checkBestMode(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth);<br>
-    void encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv);<br>
-    void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);<br>
+    void encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv, CU* cuData);<br>
+    void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, CU* cuData);<br>
     void checkDQP(TComDataCU* cu);<br>
     void deriveTestModeAMP(TComDataCU* bestCU, PartSize parentSize, bool &bTestAMP_Hor, bool &bTestAMP_Ver,<br>
                            bool &bTestMergeAMP_Hor, bool &bTestMergeAMP_Ver);<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/predict.cpp<br>
--- a/source/encoder/predict.cpp        Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/encoder/predict.cpp        Mon Sep 22 09:05:53 2014 +0530<br>
@@ -170,12 +170,12 @@<br>
     return false;<br>
 }<br>
<br>
-void Predict::prepMotionCompensation(TComDataCU* cu, int partIdx)<br>
+void Predict::prepMotionCompensation(TComDataCU* cu, int partIdx, CU* cuData)<br>
 {<br>
     m_slice = cu->m_slice;<br>
     cu->getPartIndexAndSize(partIdx, m_partAddr, m_width, m_height);<br>
     m_cuAddr = cu->getAddr();<br>
-    m_zOrderIdxinCU = cu->getZorderIdxInCU();<br>
+    m_zOrderIdxinCU = cuData->encodeIdx * 4;<br>
<br>
     m_mvField[0] = cu->getCUMvField(REF_PIC_LIST_0);<br>
     m_mvField[1] = cu->getCUMvField(REF_PIC_LIST_1);<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/predict.h<br>
--- a/source/encoder/predict.h  Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/encoder/predict.h  Mon Sep 22 09:05:53 2014 +0530<br>
@@ -88,7 +88,7 @@<br>
     void initTempBuff(int csp);<br>
<br>
     // prepMotionCompensation needs to be called to prepare MC with CU-relevant data */<br>
-    void prepMotionCompensation(TComDataCU* cu, int partIdx);<br>
+    void prepMotionCompensation(TComDataCU* cu, int partIdx, CU* cuData);<br>
     void motionCompensation(TComDataCU* cu, TComYuv* predYuv, int picList, bool bLuma, bool bChroma);<br>
<br>
     // Angular Intra<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/search.cpp<br>
--- a/source/encoder/search.cpp Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/encoder/search.cpp Mon Sep 22 09:05:53 2014 +0530<br>
@@ -288,14 +288,13 @@<br>
<br>
 /* returns distortion */<br>
 uint32_t Search::xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,<br>
-                                     ShortYuv* resiYuv, int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf)<br>
+                                     ShortYuv* resiYuv, int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, CU* cuData)<br>
 {<br>
     uint32_t stride       = fencYuv->getStride();<br>
     pixel*   fenc         = fencYuv->getLumaAddr(absPartIdx);<br>
     pixel*   pred         = predYuv->getLumaAddr(absPartIdx);<br>
     int16_t* residual     = resiYuv->getLumaAddr(absPartIdx);<br>
-<br>
-    uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;<br>
+    uint32_t zorder           = cuData->encodeIdx * 4 + absPartIdx;<br>
     pixel*   reconIPred       = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
     uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();<br>
     bool     useTransformSkip = !!cu->getTransformSkip(absPartIdx, TEXT_LUMA);<br>
@@ -338,7 +337,7 @@<br>
 }<br>
<br>
 uint32_t Search::xIntraCodingChromaBlk(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, int16_t* reconQt,<br>
-                                       uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, uint32_t chromaId, uint32_t log2TrSizeC)<br>
+                                       uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, uint32_t chromaId, uint32_t log2TrSizeC, CU* cuData)<br>
 {<br>
     TextType ttype        = (TextType)chromaId;<br>
     uint32_t stride       = fencYuv->getCStride();<br>
@@ -346,7 +345,7 @@<br>
     pixel*   pred         = predYuv->getChromaAddr(chromaId, absPartIdx);<br>
     int16_t* residual     = resiYuv->getChromaAddr(chromaId, absPartIdx);<br>
<br>
-    uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;<br>
+    uint32_t zorder           = cuData->encodeIdx * 4 + absPartIdx;<br>
     pixel*   reconIPred       = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);<br>
     uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();<br>
     bool     useTransformSkipC = !!cu->getTransformSkip(absPartIdx, ttype);<br>
@@ -394,7 +393,7 @@<br>
<br>
 /* returns distortion. TODO reorder params */<br>
 uint32_t Search::xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,<br>
-                                     ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2])<br>
+                                     ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2], CU* cuData)<br>
 {<br>
     uint32_t fullDepth   = cu->getDepth(0) + trDepth;<br>
     uint32_t log2TrSize  = g_maxLog2CUSize - fullDepth;<br>
@@ -454,7 +453,7 @@<br>
<br>
         // init availability pattern<br>
         uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);<br>
-        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);<br>
+        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode, cuData);<br>
<br>
         // get prediction signal<br>
         predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);<br>
@@ -496,11 +495,11 @@<br>
                     cu->setCUTransquantBypassSubParts(bIsLossLess, absPartIdx, fullDepth);<br>
<br>
                 // code luma block with given intra prediction mode and store Cbf<br>
-                singleDistYTmp = xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, recon, reconStride, coeff, singleCbfYTmp);<br>
+                singleDistYTmp = xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, recon, reconStride, coeff, singleCbfYTmp, cuData);<br>
                 singlePsyEnergyYTmp = 0;<br>
                 if (m_rdCost.m_psyRd)<br>
                 {<br>
-                    uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<br>
+                    uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;<br>
                     singlePsyEnergyYTmp = m_rdCost.psyCost(log2TrSize - 2, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),<br>
                         cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getStride());<br>
                 }<br>
@@ -540,7 +539,7 @@<br>
<br>
             if (bestModeId == firstCheckId)<br>
             {<br>
-                xLoadIntraResultQT(cu, absPartIdx, log2TrSize, reconQt, reconQtStride);<br>
+                xLoadIntraResultQT(cu, absPartIdx, log2TrSize, reconQt, reconQtStride, cuData);<br>
                 cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);<br>
                 m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_TEMP_BEST]);<br>
             }<br>
@@ -557,10 +556,10 @@<br>
<br>
             // code luma block with given intra prediction mode and store Cbf<br>
             cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);<br>
-            singleDistY = xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffY, singleCbfY);<br>
+            singleDistY = xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffY, singleCbfY, cuData);<br>
             if (m_rdCost.m_psyRd)<br>
             {<br>
-                uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<br>
+                uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;<br>
                 singlePsyEnergyY = m_rdCost.psyCost(log2TrSize - 2, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),<br>
                     cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getStride());<br>
             }<br>
@@ -600,7 +599,7 @@<br>
         for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)<br>
         {<br>
             cu->m_psyEnergy = 0;<br>
-            splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, splitBits, depthRange);<br>
+            splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, splitBits, depthRange, cuData);<br>
             splitPsyEnergyY += cu->m_psyEnergy;<br>
             splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);<br>
         }<br>
@@ -641,7 +640,7 @@<br>
<br>
         // set reconstruction for next intra prediction blocks<br>
         uint32_t qtLayer   = log2TrSize - 2;<br>
-        uint32_t zorder    = cu->getZorderIdxInCU() + absPartIdx;<br>
+        uint32_t zorder    = cuData->encodeIdx * 4 + absPartIdx;<br>
         int16_t* reconQt   = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);<br>
         X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");<br>
         const uint32_t reconQtStride = MAX_CU_SIZE;<br>
@@ -659,7 +658,7 @@<br>
 }<br>
<br>
 void Search::residualTransformQuantIntra(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,<br>
-                                         ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2])<br>
+                                         ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], CU* cuData)<br>
 {<br>
     uint32_t fullDepth   = cu->getDepth(0) +  trDepth;<br>
     uint32_t log2TrSize  = g_maxLog2CUSize - fullDepth;<br>
@@ -689,14 +688,14 @@<br>
         uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;<br>
         coeff_t* coeff        = cu->getCoeffY() + coeffOffsetY;<br>
<br>
-        uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;<br>
+        uint32_t zorder           = cuData->encodeIdx * 4 + absPartIdx;<br>
         pixel*   reconIPred       = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
         uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();<br>
<br>
         bool     useTransformSkip = !!cu->getTransformSkip(absPartIdx, TEXT_LUMA);<br>
<br>
         // init availability pattern<br>
-        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);<br>
+        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode, cuData);<br>
         // get prediction signal<br>
         predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);<br>
<br>
@@ -745,7 +744,7 @@<br>
<br>
         for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)<br>
         {<br>
-            residualTransformQuantIntra(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, reconYuv, depthRange);<br>
+            residualTransformQuantIntra(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, reconYuv, depthRange, cuData);<br>
             splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);<br>
         }<br>
<br>
@@ -781,24 +780,24 @@<br>
     }<br>
 }<br>
<br>
-void Search::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride)<br>
+void Search::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride, CU* cuData)<br>
 {<br>
     // copy reconstruction<br>
     int sizeIdx = log2TrSize - 2;<br>
-    uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;<br>
+    uint32_t zorder           = cuData->encodeIdx * 4 + absPartIdx;<br>
     pixel*   reconIPred       = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
     uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();<br>
     primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
 }<br>
<br>
 void Search::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId,<br>
-                                      int16_t* reconQt, uint32_t reconQtStride)<br>
+                                      int16_t* reconQt, uint32_t reconQtStride, CU* cuData)<br>
 {<br>
     X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");<br>
<br>
     // copy reconstruction<br>
     int sizeIdxC = log2TrSizeC - 2;<br>
-    uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;<br>
+    uint32_t zorder           = cuData->encodeIdx * 4 + absPartIdx;<br>
     pixel*   reconIPred       = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);<br>
     uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();<br>
     primitives.square_copy_sp[sizeIdxC](reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
@@ -841,7 +840,7 @@<br>
 }<br>
<br>
 /* returns distortion */<br>
-uint32_t Search::xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv)<br>
+uint32_t Search::xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, CU* cuData)<br>
 {<br>
     uint32_t fullDepth = cu->getDepth(0) + trDepth;<br>
     uint32_t trMode    = cu->getTransformIdx(absPartIdx);<br>
@@ -899,7 +898,7 @@<br>
                 pixel*   pred        = predYuv->getChromaAddr(chromaId, absPartIdxC);<br>
<br>
                 // init availability pattern<br>
-                TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId);<br>
+                TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId, cuData);<br>
                 pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);<br>
<br>
                 uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);<br>
@@ -943,7 +942,7 @@<br>
<br>
                         cu->setTransformSkipPartRange(chromaModeId, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);<br>
<br>
-                        singleDistCTmp = xIntraCodingChromaBlk(cu, absPartIdxC, fencYuv, predYuv, resiYuv, recon, reconStride, coeff, singleCbfCTmp, chromaId, log2TrSizeC);<br>
+                        singleDistCTmp = xIntraCodingChromaBlk(cu, absPartIdxC, fencYuv, predYuv, resiYuv, recon, reconStride, coeff, singleCbfCTmp, chromaId, log2TrSizeC, cuData);<br>
                         cu->setCbfPartRange(singleCbfCTmp << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);<br>
<br>
                         if (chromaModeId == 1 && !singleCbfCTmp)<br>
@@ -954,7 +953,7 @@<br>
                             uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsChroma(cu, absPartIdxC, log2TrSizeC, chromaId, coeff) : 0;<br>
                             if (m_rdCost.m_psyRd)<br>
                             {<br>
-                                uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;<br>
+                                uint32_t zorder = cuData->encodeIdx * 4 + absPartIdxC;<br>
                                 singlePsyEnergyTmp = m_rdCost.psyCost(log2TrSizeC - 2, fencYuv->getChromaAddr(chromaId, absPartIdxC), fencYuv->getCStride(),<br>
                                     cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getCStride());<br>
                                 singleCostTmp = m_rdCost.calcPsyRdCost(singleDistCTmp, bitsTmp, singlePsyEnergyTmp);<br>
@@ -979,7 +978,7 @@<br>
<br>
                     if (bestModeId == firstCheckId)<br>
                     {<br>
-                        xLoadIntraResultChromaQT(cu, absPartIdxC, log2TrSizeC, chromaId, reconQt, reconQtStride);<br>
+                        xLoadIntraResultChromaQT(cu, absPartIdxC, log2TrSizeC, chromaId, reconQt, reconQtStride, cuData);<br>
                         cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);<br>
                         m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_TEMP_BEST]);<br>
                     }<br>
@@ -1000,10 +999,10 @@<br>
                 else<br>
                 {<br>
                     cu->setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);<br>
-                    outDist += xIntraCodingChromaBlk(cu, absPartIdxC, fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffC, singleCbfC, chromaId, log2TrSizeC);<br>
+                    outDist += xIntraCodingChromaBlk(cu, absPartIdxC, fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffC, singleCbfC, chromaId, log2TrSizeC, cuData);<br>
                     if (m_rdCost.m_psyRd)<br>
                     {<br>
-                        uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;<br>
+                        uint32_t zorder = cuData->encodeIdx * 4 + absPartIdxC;<br>
                         singlePsyEnergyTmp = m_rdCost.psyCost(log2TrSizeC - 2, fencYuv->getChromaAddr(chromaId, absPartIdxC), fencYuv->getCStride(),<br>
                             cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getCStride());<br>
                     }<br>
@@ -1027,7 +1026,7 @@<br>
         uint32_t absPartIdxSub = absPartIdx;<br>
         for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)<br>
         {<br>
-            outDist += xRecurIntraChromaCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv);<br>
+            outDist += xRecurIntraChromaCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, cuData);<br>
             splitPsyEnergy += cu->m_psyEnergy;<br>
             splitCbfU |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_U, trDepth + 1);<br>
             splitCbfV |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_V, trDepth + 1);<br>
@@ -1091,7 +1090,7 @@<br>
 }<br>
<br>
 void Search::residualQTIntraChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx,<br>
-                                   TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)<br>
+                                   TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData)<br>
 {<br>
     uint32_t fullDepth = cu->getDepth(0) + trDepth;<br>
     uint32_t trMode    = cu->getTransformIdx(absPartIdx);<br>
@@ -1135,7 +1134,7 @@<br>
                 pixel*   recon          = reconYuv->getChromaAddr(chromaId, absPartIdxC);<br>
                 uint32_t coeffOffsetC   = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));<br>
                 coeff_t* coeff          = cu->getCoeff(ttype) + coeffOffsetC;<br>
-                uint32_t zorder         = cu->getZorderIdxInCU() + absPartIdxC;<br>
+                uint32_t zorder         = cuData->encodeIdx * 4 + absPartIdxC;<br>
                 pixel*   reconIPred     = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);<br>
                 uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();<br>
<br>
@@ -1149,7 +1148,7 @@<br>
                     chromaPredMode = cu->getLumaIntraDir((m_csp == X265_CSP_I444) ? absPartIdxC : 0);<br>
                 chromaPredMode = (m_csp == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;<br>
                 // init availability pattern<br>
-                TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId);<br>
+                TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId, cuData);<br>
                 pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);<br>
<br>
                 // get prediction signal<br>
@@ -1197,7 +1196,7 @@<br>
         uint32_t absPartIdxSub = absPartIdx;<br>
         for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)<br>
         {<br>
-            residualQTIntraChroma(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, reconYuv);<br>
+            residualQTIntraChroma(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, reconYuv, cuData);<br>
             splitCbfU |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_U, trDepth + 1);<br>
             splitCbfV |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_V, trDepth + 1);<br>
         }<br>
@@ -1210,7 +1209,7 @@<br>
     }<br>
 }<br>
<br>
-void Search::estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2])<br>
+void Search::estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], CU* cuData)<br>
 {<br>
     uint32_t depth        = cu->getDepth(0);<br>
     uint32_t initTrDepth  = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;<br>
@@ -1227,7 +1226,7 @@<br>
     for (uint32_t pu = 0; pu < numPU; pu++, partOffset += qNumParts)<br>
     {<br>
         // Reference sample smoothing<br>
-        TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX);<br>
+        TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX, cuData);<br>
<br>
         // determine set of modes to be tested (using prediction signal only)<br>
         pixel*   fenc   = fencYuv->getLumaAddr(partOffset);<br>
@@ -1346,7 +1345,7 @@<br>
             m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
             cu->setLumaIntraDirSubParts(rdModeList[i], partOffset, depth + initTrDepth);<br>
             cost = bits = 0;<br>
-            xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, false, cost, bits, depthRange);<br>
+            xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, false, cost, bits, depthRange, cuData);<br>
             COPY2_IF_LT(bcost, cost, bmode, rdModeList[i]);<br>
         }<br>
<br>
@@ -1355,14 +1354,14 @@<br>
         m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
<br>
         // update distortion (rate and r-d costs are determined later)<br>
-        cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, true, cost, bits, depthRange);<br>
+        cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, true, cost, bits, depthRange, cuData);<br>
<br>
         xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);<br>
<br>
         // set reconstruction for next intra prediction blocks<br>
         if (pu != numPU - 1)<br>
         {<br>
-            uint32_t zorder      = cu->getZorderIdxInCU() + partOffset;<br>
+            uint32_t zorder      = cuData->encodeIdx * 4 + partOffset;<br>
             pixel*   dst         = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
             pixel*   src         = reconYuv->getLumaAddr(partOffset);<br>
             primitives.square_copy_pp[log2TrSize - 2](dst, dststride, src, srcstride);<br>
@@ -1386,7 +1385,7 @@<br>
     x265_emms();<br>
 }<br>
<br>
-void Search::sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], uint8_t* sharedModes)<br>
+void Search::sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], uint8_t* sharedModes, CU* cuData)<br>
 {<br>
     uint32_t depth       = cu->getDepth(0);<br>
     uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;<br>
@@ -1409,12 +1408,12 @@<br>
         m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
<br>
         // update overall distortion (rate and r-d costs are determined later)<br>
-        cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, true, puCost, bits, depthRange);<br>
+        cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, true, puCost, bits, depthRange, cuData);<br>
         xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);<br>
<br>
         if (pu != numPU - 1)<br>
         {<br>
-            uint32_t zorder      = cu->getZorderIdxInCU() + partOffset;<br>
+            uint32_t zorder      = cuData->encodeIdx * 4 + partOffset;<br>
             pixel*   dst         = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
             pixel*   src         = reconYuv->getLumaAddr(partOffset);<br>
             primitives.luma_copy_pp[log2TrSize - 2](dst, dststride, src, srcstride);<br>
@@ -1441,7 +1440,7 @@<br>
     m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
 }<br>
<br>
-void Search::getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv)<br>
+void Search::getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, CU* cuData)<br>
 {<br>
     uint32_t bestMode  = 0;<br>
     uint64_t bestCost  = MAX_INT64;<br>
@@ -1461,8 +1460,8 @@<br>
     int32_t sizeIdx = log2TrSizeC - 2;<br>
     pixelcmp_t sa8d = primitives.sa8d[sizeIdx];<br>
<br>
-    TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 1);<br>
-    TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 2);<br>
+    TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 1, cuData);<br>
+    TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 2, cuData);<br>
     cu->getAllowedChromaDir(0, modeList);<br>
<br>
     // check chroma modes<br>
@@ -1494,7 +1493,7 @@<br>
     cu->setChromIntraDirSubParts(bestMode, 0, cu->getDepth(0));<br>
 }<br>
<br>
-void Search::estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)<br>
+void Search::estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv,CU* cuData)<br>
 {<br>
     uint32_t depth       = cu->getDepth(0);<br>
     uint32_t initTrDepth = (cu->getPartitionSize(0) != SIZE_2Nx2N) && (cu->getChromaFormat() == X265_CSP_I444 ? 1 : 0);<br>
@@ -1529,7 +1528,7 @@<br>
             // chroma coding<br>
             cu->setChromIntraDirSubParts(modeList[mode], absPartIdxC, depth + initTrDepth);<br>
<br>
-            uint32_t dist = xRecurIntraChromaCodingQT(cu, initTrDepth, absPartIdxC, fencYuv, predYuv, resiYuv);<br>
+            uint32_t dist = xRecurIntraChromaCodingQT(cu, initTrDepth, absPartIdxC, fencYuv, predYuv, resiYuv, cuData);<br>
<br>
             if (cu->m_slice->m_pps->bTransformSkipEnabled)<br>
                 m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
@@ -1556,7 +1555,7 @@<br>
<br>
         if (!tuIterator.isLastSection())<br>
         {<br>
-            uint32_t zorder      = cu->getZorderIdxInCU() + absPartIdxC;<br>
+            uint32_t zorder      = cuData->encodeIdx * 4 + absPartIdxC;<br>
             uint32_t dststride   = cu->m_pic->getPicYuvRec()->getCStride();<br>
             uint32_t srcstride   = reconYuv->getCStride();<br>
             pixel *src, *dst;<br>
@@ -1602,7 +1601,7 @@<br>
 }<br>
<br>
 /* estimation of best merge coding */<br>
-uint32_t Search::mergeEstimation(TComDataCU* cu, int puIdx, MergeData& m)<br>
+uint32_t Search::mergeEstimation(TComDataCU* cu, int puIdx, MergeData& m, CU* cuData)<br>
 {<br>
     X265_CHECK(cu->getPartitionSize(0) != SIZE_2Nx2N, "merge tested on non-2Nx2N partition\n");<br>
<br>
@@ -1636,7 +1635,7 @@<br>
         cu->getCUMvField(REF_PIC_LIST_1)->m_mv[m.absPartIdx] = m.mvFieldNeighbours[mergeCand][1].mv;<br>
         cu->getCUMvField(REF_PIC_LIST_1)->m_refIdx[m.absPartIdx] = (char)m.mvFieldNeighbours[mergeCand][1].refIdx;<br>
<br>
-        prepMotionCompensation(cu, puIdx);<br>
+        prepMotionCompensation(cu, puIdx, cuData);<br>
         motionCompensation(cu, &m_predTempYuv, REF_PIC_LIST_X, true, false);<br>
         uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());<br>
         uint32_t bitsCand = getTUBits(mergeCand, m.maxNumMergeCand);<br>
@@ -1658,7 +1657,7 @@<br>
<br>
 /* search of the best candidate for inter prediction<br>
  * returns true if predYuv was filled with a motion compensated prediction */<br>
-bool Search::predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool bMergeOnly, bool bChroma)<br>
+bool Search::predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool bMergeOnly, bool bChroma, CU* cuData)<br>
 {<br>
     MV amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];<br>
     MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];<br>
@@ -1687,7 +1686,7 @@<br>
         int      roiWidth, roiHeight;<br>
         cu->getPartIndexAndSize(partIdx, partAddr, roiWidth, roiHeight);<br>
<br>
-        pixel* pu = fenc->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + partAddr);<br>
+        pixel* pu = fenc->getLumaAddr(cu->getAddr(), cuData->encodeIdx * 4 + partAddr);<br>
         m_me.setSourcePU(pu - fenc->getLumaAddr(), roiWidth, roiHeight);<br>
<br>
         uint32_t mrgCost = MAX_UINT;<br>
@@ -1698,7 +1697,7 @@<br>
             merge.absPartIdx = partAddr;<br>
             merge.width = roiWidth;<br>
             merge.height = roiHeight;<br>
-            mrgCost = mergeEstimation(cu, partIdx, merge);<br>
+            mrgCost = mergeEstimation(cu, partIdx, merge, cuData);<br>
<br>
             if (bMergeOnly && cu->getLog2CUSize(0) > 3)<br>
             {<br>
@@ -1716,7 +1715,7 @@<br>
                 cu->getCUMvField(REF_PIC_LIST_1)->setAllMvField(merge.mvField[1], partSize, partAddr, 0, partIdx);<br>
                 totalmebits += merge.bits;<br>
<br>
-                prepMotionCompensation(cu, partIdx);<br>
+                prepMotionCompensation(cu, partIdx, cuData);<br>
                 motionCompensation(cu, predYuv, REF_PIC_LIST_X, true, bChroma);<br>
                 continue;<br>
             }<br>
@@ -1758,7 +1757,7 @@<br>
<br>
                     cu->clipMv(mvCand);<br>
<br>
-                    prepMotionCompensation(cu, partIdx);<br>
+                    prepMotionCompensation(cu, partIdx, cuData);<br>
                     predInterLumaBlk(slice->m_refPicList[l][ref]->getPicYuvRec(), &m_predTempYuv, &mvCand);<br>
                     uint32_t cost = m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());<br>
                     cost = (uint32_t)m_rdCost.calcRdSADCost(cost, MVP_IDX_BITS);<br>
@@ -1806,7 +1805,7 @@<br>
             TComPicYuv *refPic0 = slice->m_refPicList[0][list[0].ref]->getPicYuvRec();<br>
             TComPicYuv *refPic1 = slice->m_refPicList[1][list[1].ref]->getPicYuvRec();<br>
<br>
-            prepMotionCompensation(cu, partIdx);<br>
+            prepMotionCompensation(cu, partIdx, cuData);<br>
             predInterLumaBlk(refPic0, &m_predYuv[0], &list[0].mv);<br>
             predInterLumaBlk(refPic1, &m_predYuv[1], &list[1].mv);<br>
<br>
@@ -1932,7 +1931,7 @@<br>
<br>
             totalmebits += list[1].bits;<br>
         }<br>
-        prepMotionCompensation(cu, partIdx);<br>
+        prepMotionCompensation(cu, partIdx, cuData);<br>
         motionCompensation(cu, predYuv, REF_PIC_LIST_X, true, bChroma);<br>
     }<br>
<br>
@@ -2092,7 +2091,7 @@<br>
<br>
 /** encode residual and calculate rate-distortion for a CU block */<br>
 void Search::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,<br>
-                                       ShortYuv* outBestResiYuv, TComYuv* outReconYuv)<br>
+                                       ShortYuv* outBestResiYuv, TComYuv* outReconYuv, CU* cuData)<br>
 {<br>
     X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");<br>
<br>
@@ -2135,7 +2134,7 @@<br>
         uint64_t cost = 0;<br>
         uint32_t zeroDistortion = 0;<br>
         uint32_t bits = 0;<br>
-        uint32_t distortion = xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth, cost, bits, &zeroDistortion, tuDepthRange);<br>
+        uint32_t distortion = xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth, cost, bits, &zeroDistortion, tuDepthRange, cuData);<br>
<br>
         m_entropyCoder->resetBits();<br>
         m_entropyCoder->codeQtRootCbfZero();<br>
@@ -2205,7 +2204,7 @@<br>
         m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
         uint64_t cost = 0;<br>
         uint32_t bits = 0;<br>
-        xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth, cost, bits, NULL, tuDepthRange);<br>
+        xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth, cost, bits, NULL, tuDepthRange, cuData);<br>
         xSetResidualQTData(cu, 0, NULL, depth, false);<br>
         m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);<br>
     }<br>
@@ -2240,7 +2239,7 @@<br>
         cu->clearCbf(0, depth);<br>
 }<br>
<br>
-void Search::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)<br>
+void Search::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData)<br>
 {<br>
     m_quant.setQPforQuant(cu);<br>
<br>
@@ -2249,7 +2248,7 @@<br>
<br>
     if (cu->getPredictionMode(0) == MODE_INTER)<br>
     {<br>
-        residualTransformQuantInter(cu, 0, fencYuv, resiYuv, cu->getDepth(0), tuDepthRange);<br>
+        residualTransformQuantInter(cu, 0, fencYuv, resiYuv, cu->getDepth(0), tuDepthRange, cuData);<br>
         if (cu->getQtRootCbf(0))<br>
             reconYuv->addClip(predYuv, resiYuv, cu->getLog2CUSize(0));<br>
         else<br>
@@ -2262,14 +2261,14 @@<br>
     else if (cu->getPredictionMode(0) == MODE_INTRA)<br>
     {<br>
         uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;<br>
-        residualTransformQuantIntra(cu, initTrDepth, 0, fencYuv, predYuv, resiYuv, reconYuv, tuDepthRange);<br>
-        getBestIntraModeChroma(cu, fencYuv, predYuv);<br>
-        residualQTIntraChroma(cu, 0, 0, fencYuv, predYuv, resiYuv, reconYuv);<br>
+        residualTransformQuantIntra(cu, initTrDepth, 0, fencYuv, predYuv, resiYuv, reconYuv, tuDepthRange, cuData);<br>
+        getBestIntraModeChroma(cu, fencYuv, predYuv, cuData);<br>
+        residualQTIntraChroma(cu, 0, 0, fencYuv, predYuv, resiYuv, reconYuv, cuData);<br>
     }<br>
 }<br>
<br>
 void Search::residualTransformQuantInter(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, ShortYuv* resiYuv,<br>
-                                         const uint32_t depth, uint32_t depthRange[2])<br>
+                                         const uint32_t depth, uint32_t depthRange[2], CU* cuData)<br>
 {<br>
     X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "invalid depth\n");<br>
     const uint32_t trMode = depth - cu->getDepth(0);<br>
@@ -2379,7 +2378,9 @@<br>
     {<br>
         const uint32_t qPartNumSubdiv = cu->m_pic->getNumPartInCU() >> ((depth + 1) << 1);<br>
         for (uint32_t i = 0; i < 4; ++i)<br>
-            residualTransformQuantInter(cu, absPartIdx + i * qPartNumSubdiv, fencYuv, resiYuv, depth + 1, depthRange);<br>
+        {<br>
+            residualTransformQuantInter(cu, absPartIdx + i * qPartNumSubdiv, fencYuv, resiYuv, depth + 1, depthRange, cuData);<br>
+        }<br>
<br>
         uint32_t ycbf = 0;<br>
         uint32_t ucbf = 0;<br>
@@ -2401,7 +2402,7 @@<br>
 }<br>
<br>
 uint32_t Search::xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,<br>
-                                     uint32_t depth, uint64_t& rdCost, uint32_t& outBits, uint32_t* outZeroDist, uint32_t depthRange[2])<br>
+                                     uint32_t depth, uint64_t& rdCost, uint32_t& outBits, uint32_t* outZeroDist, uint32_t depthRange[2], CU* cuData)<br>
 {<br>
     X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "depth not matching\n");<br>
     const uint32_t trMode = depth - cu->getDepth(0);<br>
@@ -2568,7 +2569,7 @@<br>
             if (m_rdCost.m_psyRd)<br>
             {<br>
                 pixel*   pred = predYuv->getLumaAddr(absPartIdx);<br>
-                uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<br>
+                uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;<br>
                 pixel*   reconIPred = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
                 uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();<br>
                 uint32_t stride = fencYuv->getStride();<br>
@@ -2667,7 +2668,7 @@<br>
                     if (m_rdCost.m_psyRd)<br>
                     {<br>
                         pixel*   pred = predYuv->getCbAddr(absPartIdxC);<br>
-                        uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;<br>
+                        uint32_t zorder = cuData->encodeIdx * 4+ absPartIdxC;<br>
                         pixel*   reconIPred = cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<br>
                         uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();<br>
                         uint32_t stride = fencYuv->getCStride();<br>
@@ -2749,7 +2750,7 @@<br>
                     if (m_rdCost.m_psyRd)<br>
                     {<br>
                         pixel*   pred = predYuv->getCrAddr(absPartIdxC);<br>
-                        uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;<br>
+                        uint32_t zorder = cuData->encodeIdx * 4 + absPartIdxC;<br>
                         pixel*   reconIPred = cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<br>
                         uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();<br>
                         uint32_t stride = fencYuv->getCStride();<br>
@@ -2855,7 +2856,7 @@<br>
                 if (m_rdCost.m_psyRd)<br>
                 {<br>
                     pixel*   pred = predYuv->getLumaAddr(absPartIdx);<br>
-                    uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;<br>
+                    uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;<br>
                     pixel*   reconIPred = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);<br>
                     uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();<br>
                     uint32_t stride = fencYuv->getStride();<br>
@@ -2943,7 +2944,7 @@<br>
                     if (m_rdCost.m_psyRd)<br>
                     {<br>
                         pixel*   pred = predYuv->getCbAddr(absPartIdxC);<br>
-                        uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;<br>
+                        uint32_t zorder = cuData->encodeIdx * 4 + absPartIdxC;<br>
                         pixel*   reconIPred = cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<br>
                         uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();<br>
                         uint32_t stride = fencYuv->getCStride();<br>
@@ -2984,7 +2985,7 @@<br>
                     if (m_rdCost.m_psyRd)<br>
                     {<br>
                         pixel*   pred = predYuv->getCrAddr(absPartIdxC);<br>
-                        uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;<br>
+                        uint32_t zorder = cuData->encodeIdx * 4 + absPartIdxC;<br>
                         pixel*   reconIPred = cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<br>
                         uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();<br>
                         uint32_t stride = fencYuv->getCStride();<br>
@@ -3127,7 +3128,7 @@<br>
         for (uint32_t i = 0; i < 4; ++i)<br>
         {<br>
             cu->m_psyEnergy = 0;<br>
-            subdivDist += xEstimateResidualQT(cu, absPartIdx + i * qPartNumSubdiv, fencYuv, predYuv, resiYuv, depth + 1, subDivCost, subdivBits, bCheckFull ? NULL : outZeroDist, depthRange);<br>
+            subdivDist += xEstimateResidualQT(cu, absPartIdx + i * qPartNumSubdiv, fencYuv, predYuv, resiYuv, depth + 1, subDivCost, subdivBits, bCheckFull ? NULL : outZeroDist, depthRange, cuData);<br>
             subDivPsyEnergy += cu->m_psyEnergy;<br>
         }<br>
<br>
diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/search.h<br>
--- a/source/encoder/search.h   Sat Sep 20 15:41:08 2014 +0100<br>
+++ b/source/encoder/search.h   Mon Sep 22 09:05:53 2014 +0530<br>
@@ -79,19 +79,19 @@<br>
<br>
     bool     initSearch(x265_param *param, ScalingList& scalingList);<br>
<br>
-    void     estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2]);<br>
-    void     sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], uint8_t* sharedModes);<br>
-    void     estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);<br>
+    void     estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], CU* cuData);<br>
+    void     sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], uint8_t* sharedModes, CU* cuData);<br>
+    void     estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData);<br>
<br>
     // estimation inter prediction (non-skip)<br>
-    bool     predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool bMergeOnly, bool bChroma);<br>
+    bool     predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool bMergeOnly, bool bChroma, CU* cuData);<br>
<br>
     // encode residual and compute rd-cost for inter mode<br>
-    void     encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, TComYuv* reconYuv);<br>
+    void     encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, TComYuv* reconYuv, CU* cuData);<br>
     void     encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv);<br>
<br>
-    void     generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);<br>
-    void     residualTransformQuantInter(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, ShortYuv* resiYuv, uint32_t depth, uint32_t depthRange[2]);<br>
+    void     generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData);<br>
+    void     residualTransformQuantInter(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, ShortYuv* resiYuv, uint32_t depth, uint32_t depthRange[2], CU* cuData);<br>
<br>
     uint32_t getIntraModeBits(TComDataCU* cu, uint32_t mode, uint32_t partOffset, uint32_t depth);<br>
     uint32_t getIntraRemModeBits(TComDataCU * cu, uint32_t partOffset, uint32_t depth, uint32_t preds[3], uint64_t& mpms);<br>
@@ -110,30 +110,30 @@<br>
     uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2]);<br>
     uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);<br>
     uint32_t xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,<br>
-                                 int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf);<br>
+                                 int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, CU* cuData);<br>
<br>
     uint32_t xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, uint32_t depth,<br>
-                                 uint64_t &rdCost, uint32_t &outBits, uint32_t *zeroDist, uint32_t tuDepthRange[2]);<br>
+                                 uint64_t &rdCost, uint32_t &outBits, uint32_t *zeroDist, uint32_t tuDepthRange[2], CU* cuData);<br>
<br>
     uint32_t xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,<br>
-                                 ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2]);<br>
+                                 ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2], CU* cuData);<br>
<br>
-    uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv);<br>
+    uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, CU* cuData);<br>
<br>
     uint32_t xIntraCodingChromaBlk(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,<br>
-                                   int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, uint32_t chromaId, uint32_t log2TrSizeC);<br>
+                                   int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, uint32_t chromaId, uint32_t log2TrSizeC, CU* cuData);<br>
<br>
     void     residualTransformQuantIntra(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,<br>
-                                         TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2]);<br>
+                                         TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], CU* cuData);<br>
<br>
     void     residualQTIntraChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,<br>
-                                   TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);<br>
+                                   TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData);<br>
<br>
     void     xEncodeResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bSubdivAndCbf, TextType ttype, uint32_t depthRange[2]);<br>
     void     xSetIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);<br>
<br>
-    void     xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride);<br>
-    void     xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, int16_t* reconQt, uint32_t reconQtStride);<br>
+    void     xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride, CU* cuData);<br>
+    void     xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, int16_t* reconQt, uint32_t reconQtStride, CU* cuData);<br>
<br>
     void     offsetSubTUCBFs(TComDataCU* cu, TextType ttype, uint32_t trDepth, uint32_t absPartIdx);<br>
<br>
@@ -170,13 +170,13 @@<br>
     void     checkBestMVP(MV* amvpCand, MV cMv, MV& mvPred, int& mvpIdx, uint32_t& outBits, uint32_t& outCost);<br>
     void     getBlkBits(PartSize cuMode, bool bPSlice, int partIdx, uint32_t lastMode, uint32_t blockBit[3]);<br>
     uint32_t getInterSymbolBits(TComDataCU* cu, uint32_t depthRange[2]);<br>
-    uint32_t mergeEstimation(TComDataCU* cu, int partIdx, MergeData& m);<br>
+    uint32_t mergeEstimation(TComDataCU* cu, int partIdx, MergeData& m, CU* cuData);<br>
     void     setSearchRange(TComDataCU* cu, MV mvp, int merange, MV& mvmin, MV& mvmax);<br>
<br>
     /* intra helper functions */<br>
     enum { MAX_RD_INTRA_MODES = 16 };<br>
     void     updateCandList(uint32_t mode, uint64_t cost, int maxCandCount, uint32_t* candModeList, uint64_t* candCostList);<br>
-    void     getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv);<br>
+    void     getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, CU* cuData);<br>
 };<br>
 }<br>
<br>
</blockquote></div><br></div>