[x265] inline simple functions

Wed Sep 17 11:51:37 CEST 2014

# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1410947343 -32400
#      Wed Sep 17 18:49:03 2014 +0900
# Node ID b00d1f46a7632572df3be47decee9be9881c511c
# Parent  199e8f2e0d54abd16657ccd0952bdc25cadf8420
inline simple functions

diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.cpp

--- a/source/Lib/TLibCommon/TComDataCU.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -88,9 +88,6 @@
     m_DataCUMemPool.m_tqBypassYuvMemBlock  = NULL;
 }
 
-TComDataCU::~TComDataCU()
-{}
-
 
 bool TComDataCU::initialize(uint32_t numPartition, uint32_t sizeL, uint32_t sizeC, uint32_t numBlocks, bool isLossless)
 {
@@ -1086,15 +1083,6 @@
     }
 }
 
-/** Check whether the CU is coded in lossless coding mode
- * \param   absPartIdx
- * \returns true if the CU is coded in lossless coding mode; false if otherwise
- */
-bool TComDataCU::isLosslessCoded(uint32_t absPartIdx)
-{
-    return m_slice->m_pps->bTransquantBypassEnabled && getCUTransquantBypass(absPartIdx);
-}
-
 /** Get allowed chroma intra modes
 *\param   absPartIdx
 *\param   uiModeList  pointer to chroma intra modes array
@@ -1224,11 +1212,6 @@
     return ctx;
 }
 
-uint32_t TComDataCU::getCtxInterDir(uint32_t absPartIdx)
-{
-    return getDepth(absPartIdx);
-}
-
 void TComDataCU::clearCbf(uint32_t absPartIdx, uint32_t depth)
 {
     uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
@@ -2111,11 +2094,6 @@
     return numMvc;
 }
 
-bool TComDataCU::isBipredRestriction()
-{
-    return getLog2CUSize(0) == 3 && getPartitionSize(0) != SIZE_2Nx2N;
-}
-
 void TComDataCU::clipMv(MV& outMV)
 {
     int mvshift = 2;
@@ -2130,15 +2108,6 @@
     outMV.y = X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y));
 }
 
-/** Test whether the current block is skipped
- * \param partIdx Block index
- * \returns Flag indicating whether the block is skipped
- */
-bool TComDataCU::isSkipped(uint32_t partIdx)
-{
-    return getSkipFlag(partIdx);
-}
-
 // ====================================================================================================================
 // Protected member functions
 // ====================================================================================================================
@@ -2438,9 +2407,4 @@
         result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
 }
 
-uint32_t TComDataCU::getSCUAddr()
-{
-    return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInLCU;
-}
-
 //! \}
diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h	Wed Sep 17 18:49:03 2014 +0900
@@ -248,7 +248,7 @@
 public:
 
     TComDataCU();
-    virtual ~TComDataCU();
+    ~TComDataCU() {}
 
     uint32_t      m_psyEnergy;
     uint64_t      m_totalPsyCost;
@@ -290,7 +290,8 @@
 
     uint32_t&     getZorderIdxInCU()               { return m_absIdxInLCU; }
 
-    uint32_t      getSCUAddr();
+    uint32_t      getSCUAddr() const               { return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInLCU; }
+
 
     uint32_t      getCUPelX()                      { return m_cuPelX; }
 
@@ -344,7 +345,7 @@
     char          getLastCodedQP(uint32_t absPartIdx);
     void          setQPSubCUs(int qp, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool &foundNonZeroCbf);
 
-    bool          isLosslessCoded(uint32_t absPartIdx);
+    bool          isLosslessCoded(uint32_t idx) const { return m_cuTransquantBypass[idx] && m_slice->m_pps->bTransquantBypassEnabled; }
 
     uint8_t*      getTransformIdx()                    { return m_trIdx; }
 
@@ -488,10 +489,9 @@
     // member functions for modes
     // -------------------------------------------------------------------------------------------------------------------
 
-    bool          isIntra(uint32_t partIdx)  { return m_predModes[partIdx] == MODE_INTRA; }
-
-    bool          isSkipped(uint32_t partIdx); ///< SKIP (no residual)
-    bool          isBipredRestriction();
+    bool          isIntra(uint32_t partIdx) const { return m_predModes[partIdx] == MODE_INTRA; }
+    bool          isSkipped(uint32_t idx) const { return m_skipFlag[idx]; }
+    bool          isBipredRestriction() const { return m_log2CUSize[0] == 3 && m_partSizes[0] != SIZE_2Nx2N; }
 
     // -------------------------------------------------------------------------------------------------------------------
     // member functions for symbol prediction (most probable / mode conversion)
@@ -506,7 +506,7 @@
 
     uint32_t      getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth);
     uint32_t      getCtxSkipFlag(uint32_t absPartIdx);
-    uint32_t      getCtxInterDir(uint32_t absPartIdx);
+    uint32_t      getCtxInterDir(uint32_t idx) const { return m_depth[idx]; }
 
     // -------------------------------------------------------------------------------------------------------------------
     // member functions for RD cost storage
diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -60,10 +60,6 @@
     m_buOffsetC = NULL;
 }
 
-TComPicYuv::~TComPicYuv()
-{
-}
-
 bool TComPicYuv::create(int picWidth, int picHeight, int picCsp, uint32_t maxCUSize, uint32_t maxFullDepth)
 {
     m_picWidth  = picWidth;
diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 17 18:49:03 2014 +0900
@@ -94,7 +94,7 @@
     int   m_numCuInHeight;
 
     TComPicYuv();
-    virtual ~TComPicYuv();
+    ~TComPicYuv() {}
 
     // ------------------------------------------------------------------------------------------------
     //  Memory management
diff -r 199e8f2e0d54 -r b00d1f46a763 source/common/deblock.cpp
--- a/source/common/deblock.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/common/deblock.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -525,8 +525,8 @@
                 if (cu->m_slice->m_pps->bTransquantBypassEnabled)
                 {
                     // check if each of PUs is lossless coded
-                    partPNoFilter = cuP->isLosslessCoded(partP);
-                    partQNoFilter = cuQ->isLosslessCoded(partQ);
+                    partPNoFilter = cuP->getCUTransquantBypass(partP);
+                    partQNoFilter = cuQ->getCUTransquantBypass(partQ);
                 }
 
                 if (d < beta)
@@ -623,8 +623,8 @@
             if (cu->m_slice->m_pps->bTransquantBypassEnabled)
             {
                 // check if each of PUs is lossless coded
-                partPNoFilter = cuP->isLosslessCoded(partP);
-                partQNoFilter = cuQ->isLosslessCoded(partQ);
+                partPNoFilter = cuP->getCUTransquantBypass(partP);
+                partQNoFilter = cuQ->getCUTransquantBypass(partQ);
             }
 
             for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
diff -r 199e8f2e0d54 -r b00d1f46a763 source/common/frame.cpp
--- a/source/common/frame.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/common/frame.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -55,9 +55,6 @@
     m_interData = NULL;
 }
 
-Frame::~Frame()
-{}
-
 bool Frame::create(x265_param *param, Window& display, Window& conformance)
 {
     m_conformanceWindow = conformance;
diff -r 199e8f2e0d54 -r b00d1f46a763 source/common/frame.h
--- a/source/common/frame.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/common/frame.h	Wed Sep 17 18:49:03 2014 +0900
@@ -87,7 +87,7 @@
     x265_inter_data*  m_interData;  // inter analysis information
 
     Frame();
-    virtual ~Frame();
+    ~Frame() {}
 
     bool        create(x265_param *param, Window& display, Window& conformance);
     bool        allocPicSym(x265_param *param);
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/analysis.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -299,35 +299,38 @@
 
 void Analysis::compressCU(TComDataCU* cu)
 {
+    Frame* pic = cu->m_pic;
+    uint32_t cuAddr = cu->getAddr();
+
     if (cu->m_slice->m_pps->bUseDQP)
         m_bEncodeDQP = true;
 
     // initialize CU data
-    m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
-    m_tempCU[0]->initCU(cu->m_pic, cu->getAddr());
+    m_bestCU[0]->initCU(pic, cuAddr);
+    m_tempCU[0]->initCU(pic, cuAddr);
 
     // analysis of CU
     uint32_t numPartition = cu->getTotalNumPart();
     if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
     {
-        if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_bestCU[0]->m_pic->m_intraData)
+        if (m_param->analysisMode == X265_ANALYSIS_LOAD && pic->m_intraData)
         {
             uint32_t zOrder = 0;
             compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, cu, cu->m_CULocalData, 
-                &m_bestCU[0]->m_pic->m_intraData->depth[cu->getAddr() * cu->m_numPartitions],
-                &m_bestCU[0]->m_pic->m_intraData->partSizes[cu->getAddr() * cu->m_numPartitions],
-                &m_bestCU[0]->m_pic->m_intraData->modes[cu->getAddr() * cu->m_numPartitions], zOrder);
+                &pic->m_intraData->depth[cuAddr * cu->m_numPartitions],
+                &pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions],
+                &pic->m_intraData->modes[cuAddr * cu->m_numPartitions], zOrder);
         }
         else
         {
             compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu, cu->m_CULocalData);
-            if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_bestCU[0]->m_pic->m_intraData)
+            if (m_param->analysisMode == X265_ANALYSIS_SAVE && pic->m_intraData)
             {
-                memcpy(&m_bestCU[0]->m_pic->m_intraData->depth[cu->getAddr() * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * cu->getTotalNumPart());
-                memcpy(&m_bestCU[0]->m_pic->m_intraData->modes[cu->getAddr() * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * cu->getTotalNumPart());
-                memcpy(&m_bestCU[0]->m_pic->m_intraData->partSizes[cu->getAddr() * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * cu->getTotalNumPart());
-                m_bestCU[0]->m_pic->m_intraData->cuAddr[cu->getAddr()] = cu->getAddr();
-                m_bestCU[0]->m_pic->m_intraData->poc[cu->getAddr()]    = cu->m_pic->m_POC;
+                memcpy(&pic->m_intraData->depth[cuAddr * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * cu->getTotalNumPart());
+                memcpy(&pic->m_intraData->modes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * cu->getTotalNumPart());
+                memcpy(&pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * cu->getTotalNumPart());
+                pic->m_intraData->cuAddr[cuAddr] = cuAddr;
+                pic->m_intraData->poc[cuAddr]    = cu->m_pic->m_POC;
             }
         }
         if (m_param->bLogCuStats || m_param->rc.bStatWrite)
@@ -415,17 +418,20 @@
         }
     }
 }
+
 void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, TComDataCU* cuPicsym, CU *cu)
 {
     //PPAScopeEvent(CompressIntraCU + depth);
     Frame* pic = outBestCU->m_pic;
+    uint32_t cuAddr = outBestCU->getAddr();
+    uint32_t absPartIdx = outBestCU->getZorderIdxInCU();
 
     if (depth == 0)
         // get original YUV data from picture
-        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
+        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), cuAddr, absPartIdx);
     else
         // copy partition YUV from depth 0 CTU cache
-        m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
+        m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
     Slice* slice = outTempCU->m_slice;
     // We need to split, so don't try these modes.
     int cu_split_flag = !(cu->flags & CU::LEAF);
@@ -447,12 +453,12 @@
             outBestCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits, outBestCU->m_psyEnergy);
         else
             outBestCU->m_totalRDCost  = m_rdCost.calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
+
+        // copy original YUV samples in lossless mode
+        if (outBestCU->isLosslessCoded(0))
+            fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
     }
 
-    // copy original YUV samples in lossless mode
-    if (outBestCU->isLosslessCoded(0))
-        fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
-
     // further split
     if (cu_split_flag)
     {
@@ -475,7 +481,7 @@
 
                 compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, cuPicsym, child_cu);
                 outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
-                copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
+                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
             }
             else
             {
@@ -527,7 +533,7 @@
     outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
 
     // Copy Yuv data to picture Yuv
-    copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
+    m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
 
 #if CHECKED_BUILD || _DEBUG
     X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
@@ -616,7 +622,7 @@
                 if (!subBestPartCU->m_totalRDCost) // if cost is 0, CU is best CU
                     outTempCU->m_totalRDCost = 0;  // set outTempCU cost to 0, so later check will use this CU as best CU
 
-                copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
+                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
             }
             else
             {
@@ -661,7 +667,7 @@
         checkBestMode(outBestCU, outTempCU, depth);
     }
     outBestCU->copyToPic(depth);
-    copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
+    m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
 
 #if CHECKED_BUILD || _DEBUG
     X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
@@ -727,11 +733,12 @@
 void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cu_t, int bInsidePicture, uint32_t PartitionIndex, uint32_t minDepth)
 {
     Frame* pic = outTempCU->m_pic;
+    uint32_t cuAddr = outTempCU->getAddr();
     uint32_t absPartIdx = outTempCU->getZorderIdxInCU();
 
     if (depth == 0)
         // get original YUV data from picture
-        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outTempCU->getAddr(), absPartIdx);
+        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), cuAddr, absPartIdx);
     else
         // copy partition YUV from depth 0 CTU cache
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
@@ -750,14 +757,14 @@
 
     if (depth == 0 && m_param->rdLevel == 0)
     {
-        m_origYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cu->getAddr(), 0);
+        m_origYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, 0);
     }
     // We need to split, so don't try these modes.
 #if TOPSKIP
     if (cu_unsplit_flag && !bInsidePictureParent)
     {
-        TComDataCU* colocated0 = slice->m_numRefIdx[0] > 0 ? slice->m_refPicList[0][0]->getCU(outTempCU->getAddr()) : NULL;
-        TComDataCU* colocated1 = slice->m_numRefIdx[1] > 0 ? slice->m_refPicList[1][0]->getCU(outTempCU->getAddr()) : NULL;
+        TComDataCU* colocated0 = slice->m_numRefIdx[0] > 0 ? slice->m_refPicList[0][0]->getCU(cuAddr) : NULL;
+        TComDataCU* colocated1 = slice->m_numRefIdx[1] > 0 ? slice->m_refPicList[1][0]->getCU(cuAddr) : NULL;
         char currentQP = outTempCU->getQP(0);
         char previousQP = colocated0->getQP(0);
         uint32_t delta = 0, minDepth0 = 4, minDepth1 = 4;
@@ -793,12 +800,12 @@
             /* Initialise all Mode-CUs based on parentCU */
             if (depth == 0)
             {
-                m_interCU_2Nx2N[depth]->initCU(pic, cu->getAddr());
-                m_interCU_Nx2N[depth]->initCU(pic, cu->getAddr());
-                m_interCU_2NxN[depth]->initCU(pic, cu->getAddr());
-                m_intraInInterCU[depth]->initCU(pic, cu->getAddr());
-                m_mergeCU[depth]->initCU(pic, cu->getAddr());
-                m_bestMergeCU[depth]->initCU(pic, cu->getAddr());
+                m_interCU_2Nx2N[depth]->initCU(pic, cuAddr);
+                m_interCU_Nx2N[depth]->initCU(pic, cuAddr);
+                m_interCU_2NxN[depth]->initCU(pic, cuAddr);
+                m_intraInInterCU[depth]->initCU(pic, cuAddr);
+                m_mergeCU[depth]->initCU(pic, cuAddr);
+                m_bestMergeCU[depth]->initCU(pic, cuAddr);
             }
             else
             {
@@ -1021,7 +1028,7 @@
             TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
             TComDataCU* aboveRight = outTempCU->getCUAboveRight();
             TComDataCU* left = outTempCU->getCULeft();
-            TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
+            TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
 
             totalCostCU += rootCU->m_avgCost[depth] * rootCU->m_count[depth];
             totalCountCU += rootCU->m_count[depth];
@@ -1063,7 +1070,7 @@
 
                 /* Copy Yuv data to picture Yuv */
                 if (m_param->rdLevel != 0)
-                    copyYuv2Pic(pic, outBestCU->getAddr(), absPartIdx, depth);
+                    m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
                 return;
             }
         }
@@ -1094,7 +1101,7 @@
                         tempavgCost = m_rdCost.m_psyRd ? subBestPartCU->m_totalPsyCost : subBestPartCU->m_totalRDCost;
                     else
                         tempavgCost = subBestPartCU->m_totalRDCost;
-                    TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
+                    TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
                     uint64_t temp = rootCU->m_avgCost[nextDepth] * rootCU->m_count[nextDepth];
                     rootCU->m_count[nextDepth] += 1;
                     rootCU->m_avgCost[nextDepth] = (temp + tempavgCost) / rootCU->m_count[nextDepth];
@@ -1167,7 +1174,7 @@
             if (depth == 0)
             {
                 uint64_t tempavgCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
-                TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
+                TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
                 uint64_t temp = rootCU->m_avgCost[depth] * rootCU->m_count[depth];
                 rootCU->m_count[depth] += 1;
                 rootCU->m_avgCost[depth] = (temp + tempavgCost) / rootCU->m_count[depth];
@@ -1199,7 +1206,7 @@
     {
         /* Copy Yuv data to picture Yuv */
         if (cu_unsplit_flag)
-            copyYuv2Pic(pic, outBestCU->getAddr(), absPartIdx, depth);
+            m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
     }
 
 #if CHECKED_BUILD || _DEBUG
@@ -1235,13 +1242,15 @@
     //PPAScopeEvent(CompressCU + depth);
 
     Frame* pic = outBestCU->m_pic;
+    uint32_t cuAddr = outBestCU->getAddr();
+    uint32_t absPartIdx = outBestCU->getZorderIdxInCU();
 
     if (depth == 0)
         // get original YUV data from picture
-        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
+        m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), cuAddr, absPartIdx);
     else
         // copy partition YUV from depth 0 CTU cache
-        m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
+        m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
 
     // variable for Cbf fast mode PU decision
     bool doNotBlockPu = true;
@@ -1418,12 +1427,12 @@
             outBestCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits, outBestCU->m_psyEnergy);
         else
             outBestCU->m_totalRDCost = m_rdCost.calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
+
+        // copy original YUV samples in lossless mode
+        if (outBestCU->isLosslessCoded(0))
+            fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
     }
 
-    // copy original YUV samples in lossless mode
-    if (outBestCU->isLosslessCoded(0))
-        fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
-
     // further split
     if (cu_split_flag && !outBestCU->isSkipped(0))
     {
@@ -1448,7 +1457,7 @@
 
                 compressInterCU_rd5_6(subBestPartCU, subTempPartCU, nextDepth, cuPicsym, child_cu);
                 outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
-                copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
+                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
             }
             else
             {
@@ -1499,7 +1508,7 @@
     outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
 
     // Copy Yuv data to picture Yuv
-    copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
+    m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
 
 #if CHECKED_BUILD || _DEBUG
     X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
@@ -2051,8 +2060,7 @@
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
         uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
         uint32_t xmax = slice->m_sps->picWidthInLumaSamples  - lcu->getCUPelX();
-        uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY();
-        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
+        uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY();        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
         {
             if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
             {
@@ -2064,6 +2072,8 @@
         return;
     }
 
+    uint32_t cuAddr = cu->getAddr();
+
     m_quant.setQPforQuant(cu);
 
     if (lcu->getPredictionMode(absPartIdx) == MODE_INTER)
@@ -2092,7 +2102,6 @@
             src2 = m_bestPredYuv[0]->getCrAddr(absPartIdx);
             src1 = m_origYuv[0]->getCrAddr(absPartIdx);
             dst = m_tmpResiYuv[depth]->getCrAddr();
-            dststride = m_tmpResiYuv[depth]->m_cwidth;
             primitives.chroma[m_param->internalCsp].sub_ps[sizeIdx](dst, dststride, src1, src2, src1stride, src2stride);
 
             uint32_t tuDepthRange[2];
@@ -2130,9 +2139,8 @@
                 pred = m_bestPredYuv[0]->getCrAddr(absPartIdx);
                 res = m_tmpResiYuv[depth]->getCrAddr();
                 reco = m_bestRecoYuv[depth]->getCrAddr();
-                reco = m_bestRecoYuv[depth]->getCrAddr();
                 primitives.chroma[m_param->internalCsp].add_ps[sizeIdx](reco, dststride, pred, res, src1stride, src2stride);
-                m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
+                m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
                 return;
             }
         }
@@ -2141,19 +2149,19 @@
         int part = partitionFromLog2Size(log2CUSize);
         TComPicYuv* rec = pic->getPicYuvRec();
         pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
-        pixel* dst = rec->getLumaAddr(cu->getAddr(), absPartIdx);
+        pixel* dst = rec->getLumaAddr(cuAddr, absPartIdx);
         uint32_t srcstride = m_bestPredYuv[0]->getStride();
         uint32_t dststride = rec->getStride();
         primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
 
         src = m_bestPredYuv[0]->getCbAddr(absPartIdx);
-        dst = rec->getCbAddr(cu->getAddr(), absPartIdx);
+        dst = rec->getCbAddr(cuAddr, absPartIdx);
         srcstride = m_bestPredYuv[0]->getCStride();
         dststride = rec->getCStride();
         primitives.chroma[m_param->internalCsp].copy_pp[part](dst, dststride, src, srcstride);
 
         src = m_bestPredYuv[0]->getCrAddr(absPartIdx);
-        dst = rec->getCrAddr(cu->getAddr(), absPartIdx);
+        dst = rec->getCrAddr(cuAddr, absPartIdx);
         primitives.chroma[m_param->internalCsp].copy_pp[part](dst, dststride, src, srcstride);
     }
     else
@@ -2161,7 +2169,7 @@
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
         generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
         checkDQP(cu);
-        m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
+        m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
         cu->copyCodedToPic(depth);
     }
 }
@@ -2240,16 +2248,6 @@
     }
 }
 
-void Analysis::copyYuv2Pic(Frame* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth)
-{
-    m_bestRecoYuv[depth]->copyToPicYuv(outPic->getPicYuvRec(), cuAddr, absPartIdx);
-}
-
-void Analysis::copyYuv2Tmp(uint32_t partUnitIdx, uint32_t nextDepth)
-{
-    m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[nextDepth - 1], partUnitIdx);
-}
-
 /* Function for filling original YUV samples of a CU in lossless mode */
 void Analysis::fillOrigYUVBuffer(TComDataCU* cu, TComYuv* fencYuv)
 {
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/analysis.h	Wed Sep 17 18:49:03 2014 +0900
@@ -129,8 +129,6 @@
     void encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv);
     void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);
     void checkDQP(TComDataCU* cu);
-    void copyYuv2Pic(Frame* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth);
-    void copyYuv2Tmp(uint32_t partUnitIdx, uint32_t depth);
     void deriveTestModeAMP(TComDataCU* bestCU, PartSize parentSize, bool &bTestAMP_Hor, bool &bTestAMP_Ver,
                            bool &bTestMergeAMP_Hor, bool &bTestMergeAMP_Ver);
     void fillOrigYUVBuffer(TComDataCU* outCU, TComYuv* origYuv);
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/encoder.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -75,10 +75,6 @@
     m_param = NULL;
 }
 
-Encoder::~Encoder()
-{
-}
-
 void Encoder::create()
 {
     if (!primitives.sad[0])
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/encoder.h
--- a/source/encoder/encoder.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/encoder.h	Wed Sep 17 18:49:03 2014 +0900
@@ -130,7 +130,7 @@
 
     Encoder();
 
-    virtual ~Encoder();
+    ~Encoder() {}
 
     void create();
     void destroy();
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/entropy.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -1143,11 +1143,6 @@
 }
 
 // SBAC RD
-void  Entropy::load(Entropy& src)
-{
-    this->copyFrom(src);
-}
-
 void  Entropy::loadIntraDirModeLuma(Entropy& src)
 {
     copyState(src);
@@ -1155,11 +1150,6 @@
     ::memcpy(&m_contextState[OFF_ADI_CTX], &src.m_contextState[OFF_ADI_CTX], sizeof(uint8_t) * NUM_ADI_CTX);
 }
 
-void  Entropy::store(Entropy& dest)
-{
-    dest.copyFrom(*this);
-}
-
 void Entropy::copyFrom(Entropy& src)
 {
     copyState(src);
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/entropy.h
--- a/source/encoder/entropy.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/entropy.h	Wed Sep 17 18:49:03 2014 +0900
@@ -117,7 +117,6 @@
     Entropy();
 
     void setBitstream(Bitstream* p)    { m_bitIf = p; }
-    bool isBitCounter() const          { return !m_bitIf; }
 
     uint32_t getNumberOfWrittenBits()
     {
@@ -130,9 +129,10 @@
     void resetEntropy(Slice *slice);
 
     // SBAC RD
-    void load(Entropy& src);
+    void load(Entropy& src)            { copyFrom(src); }
+
     void loadIntraDirModeLuma(Entropy& src);
-    void store(Entropy& dest);
+    void store(Entropy& dest)          { dest.copyFrom(*this); }
     void loadContexts(Entropy& src)    { copyContextsFrom(src); }
     void copyState(Entropy& other);
 
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/motion.h
--- a/source/encoder/motion.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/motion.h	Wed Sep 17 18:49:03 2014 +0900
@@ -65,7 +65,7 @@
 
     MotionEstimate();
 
-    virtual ~MotionEstimate();
+    ~MotionEstimate();
 
     void setSearchMethod(int i) { searchMethod = i; }
 
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/predict.cpp
--- a/source/encoder/predict.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/predict.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -363,7 +363,6 @@
 {
     int refStride = refPic->getCStride();
     int dstStride = dstPic->getCStride();
-
     int hChromaShift = CHROMA_H_SHIFT(m_csp);
     int vChromaShift = CHROMA_V_SHIFT(m_csp);
 
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/predict.h
--- a/source/encoder/predict.h	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/predict.h	Wed Sep 17 18:49:03 2014 +0900
@@ -83,7 +83,7 @@
     pixel*    m_refLeftFlt;
 
     Predict();
-    virtual ~Predict();
+    ~Predict();
 
     void initTempBuff(int csp);
 
diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Tue Sep 16 17:50:06 2014 +0530
+++ b/source/encoder/search.cpp	Wed Sep 17 18:49:03 2014 +0900
@@ -717,7 +717,7 @@
 
         if (numSig)
         {
-            m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTransformSkip, numSig);
+            m_quant.invtransformNxN(cu->getCUTransquantBypass(0), residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTransformSkip, numSig);
 
             // Generate Recon
             primitives.luma_add_ps[sizeIdx](recon, stride, pred, residual, stride, stride);
@@ -1168,7 +1168,7 @@
                 if (numSig)
                 {
                     // inverse transform
-                    m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), residual, stride, coeff, log2TrSizeC, ttype, true, useTransformSkipC, numSig);
+                    m_quant.invtransformNxN(cu->getCUTransquantBypass(0), residual, stride, coeff, log2TrSizeC, ttype, true, useTransformSkipC, numSig);
 
                     // reconstruction
                     primitives.chroma[X265_CSP_I444].add_ps[sizeIdxC](recon, stride, pred, residual, stride, stride);
@@ -2166,7 +2166,7 @@
         else
             zeroCost = m_rdCost.calcRdCost(zeroDistortion, zeroResiBits);
 
-        if (cu->isLosslessCoded(0))
+        if (bIsLosslessMode)
             zeroCost = cost + 1;
 
         if (zeroCost < cost)
@@ -2340,7 +2340,7 @@
         cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
 
         if (numSigY)
-            m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
+            m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
         else
             primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);
 
@@ -2368,12 +2368,12 @@
                 cu->setCbfPartRange(numSigV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
 
                 if (numSigU)
-                    m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
+                    m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
                 else
                     primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);
 
                 if (numSigV)
-                    m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
+                    m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
                 else
                     primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);
             }
@@ -2575,7 +2575,7 @@
 
         if (numSigY)
         {
-            m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY); //this is for inter mode only
+            m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY); //this is for inter mode only
 
             const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, curResiY, strideResiY);
             uint32_t nonZeroPsyEnergyY = 0;
@@ -2592,7 +2592,7 @@
                 nonZeroPsyEnergyY = m_rdCost.psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
                     cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getStride());
             }
-            if (cu->isLosslessCoded(0))
+            if (cu->getCUTransquantBypass(0))
             {
                 distY = nonZeroDistY;
                 psyEnergyY = nonZeroPsyEnergyY;
@@ -2671,7 +2671,7 @@
 
                 if (numSigU[tuIterator.section])
                 {
-                    m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset,
+                    m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiU, strideResiC, coeffCurU + subTUOffset,
                                             log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU[tuIterator.section]);
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  curResiU, strideResiC);
@@ -2692,7 +2692,7 @@
                                                              cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder),
                                                              cu->m_pic->getPicYuvRec()->getCStride());
                     }
-                    if (cu->isLosslessCoded(0))
+                    if (cu->getCUTransquantBypass(0))
                     {
                         distU = nonZeroDistU;
                         psyEnergyU = nonZeroPsyEnergyU;
@@ -2753,7 +2753,7 @@
 
                 if (numSigV[tuIterator.section])
                 {
-                    m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset,
+                    m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiV, strideResiC, coeffCurV + subTUOffset,
                                             log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV[tuIterator.section]);
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  curResiV, strideResiC);
@@ -2774,7 +2774,7 @@
                                                              cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder),
                                                              cu->m_pic->getPicYuvRec()->getCStride());
                     }
-                    if (cu->isLosslessCoded(0))
+                    if (cu->getCUTransquantBypass(0))
                     {
                         distV = nonZeroDistV;
                         psyEnergyV = nonZeroPsyEnergyV;
@@ -2862,7 +2862,7 @@
                 m_entropyCoder->codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
                 const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
 
-                m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
+                m_quant.invtransformNxN(cu->getCUTransquantBypass(0), tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
 
                 nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsResiY, trSize);
 
@@ -2949,7 +2949,7 @@
                     m_entropyCoder->codeCoeffNxN(cu, tsCoeffU, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
                     singleBitsComp[TEXT_CHROMA_U][tuIterator.section] = m_entropyCoder->getNumberOfWrittenBits();
 
-                    m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiU, trSizeC, tsCoeffU,
+                    m_quant.invtransformNxN(cu->getCUTransquantBypass(0), tsResiU, trSizeC, tsCoeffU,
                                             log2TrSizeC, TEXT_CHROMA_U, false, true, numSigTSkipU);
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  tsResiU, trSizeC);
@@ -2990,7 +2990,7 @@
                     m_entropyCoder->codeCoeffNxN(cu, tsCoeffV, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
                     singleBitsComp[TEXT_CHROMA_V][tuIterator.section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.section];
 
-                    m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiV, trSizeC, tsCoeffV,
+                    m_quant.invtransformNxN(cu->getCUTransquantBypass(0), tsResiV, trSizeC, tsCoeffV,
                                             log2TrSizeC, TEXT_CHROMA_V, false, true, numSigTSkipV);
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  tsResiV, trSizeC);