[x265-commits] [x265] asm: adjust x264_ prefixes

Mon Apr 28 19:30:04 CEST 2014

details:   http://hg.videolan.org/x265/rev/e519b32b70d0
branches:  
changeset: 6781:e519b32b70d0
user:      Steve Borho <steve at borho.org>
date:      Sun Apr 27 12:50:01 2014 -0500
description:
asm: adjust x264_ prefixes
Subject: [x265] slicetype: fix incorrect initialization of fenc->rowSatds when wpp is enabled.

details:   http://hg.videolan.org/x265/rev/c8bff937eee0
branches:  
changeset: 6782:c8bff937eee0
user:      Aarthi Thirumalai
date:      Sun Apr 27 15:30:32 2014 +0530
description:
slicetype: fix incorrect initialization of fenc->rowSatds when wpp is enabled.
Subject: [x265] slicetype: rename rowsCompleted to bFrameCompleted

details:   http://hg.videolan.org/x265/rev/563273f5772f
branches:  
changeset: 6783:563273f5772f
user:      Steve Borho <steve at borho.org>
date:      Sun Apr 27 12:55:20 2014 -0500
description:
slicetype: rename rowsCompleted to bFrameCompleted

one would expect rowsCompleted to be a counter, when in fact it is a bool value
signaling completion of the whole frame.
Subject: [x265] slicetype: remove unused auto var

details:   http://hg.videolan.org/x265/rev/36e53135da57
branches:  
changeset: 6784:36e53135da57
user:      Steve Borho <steve at borho.org>
date:      Sun Apr 27 13:21:40 2014 -0500
description:
slicetype: remove unused auto var
Subject: [x265] fix hash mismatch for 422 format with HM 14.0_RExt decoder

details:   http://hg.videolan.org/x265/rev/56b1d4a44798
branches:  
changeset: 6785:56b1d4a44798
user:      Ashok Kumar Mishra<ashok at multicorewareinc.com>
date:      Mon Apr 28 17:48:59 2014 +0530
description:
fix hash mismatch for 422 format with HM 14.0_RExt decoder
Subject: [x265] fix: 4:2:2 rdLevel <= 1

details:   http://hg.videolan.org/x265/rev/f799f8079b87
branches:  
changeset: 6786:f799f8079b87
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Fri Apr 25 15:57:44 2014 +0900
description:
fix: 4:2:2 rdLevel <= 1
Subject: [x265] fix g_chromaScale to be full length

details:   http://hg.videolan.org/x265/rev/6e233b6777c0
branches:  
changeset: 6787:6e233b6777c0
user:      Aarthi Thirumalai
date:      Mon Apr 28 11:44:06 2014 -0500
description:
fix g_chromaScale to be full length
Subject: [x265] remove list data from lowres costs prior to use

details:   http://hg.videolan.org/x265/rev/84d31cb2aeab
branches:  
changeset: 6788:84d31cb2aeab
user:      Aarthi Thirumalai
date:      Mon Apr 28 18:54:09 2014 +0530
description:
remove list data from lowres costs prior to use

diffstat:

 source/Lib/TLibCommon/TComLoopFilter.cpp |    3 +-
 source/Lib/TLibCommon/TComRom.cpp        |    8 +-
 source/Lib/TLibCommon/TComRom.h          |    2 +-
 source/Lib/TLibEncoder/TEncSearch.cpp    |  150 +++++++++++++++---------------
 source/common/x86/mc-a.asm               |    4 +-
 source/encoder/compress.cpp              |    6 +-
 source/encoder/frameencoder.cpp          |    2 +-
 source/encoder/slicetype.cpp             |   25 ++--
 source/encoder/slicetype.h               |    5 +-
 source/test/checkasm-a.asm               |    6 +-
 source/test/testharness.h                |    2 +-
 11 files changed, 111 insertions(+), 102 deletions(-)

diffs (truncated from 489 to 300 lines):

diff -r 7baf8b8ecfdc -r 84d31cb2aeab source/Lib/TLibCommon/TComLoopFilter.cpp

--- a/source/Lib/TLibCommon/TComLoopFilter.cpp	Fri Apr 25 11:01:12 2014 +0800
+++ b/source/Lib/TLibCommon/TComLoopFilter.cpp	Mon Apr 28 18:54:09 2014 +0530
@@ -224,11 +224,12 @@ void TComLoopFilter::xDeblockCU(TComData
     uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK / pelsInPart ? DEBLOCK_SMALLEST_BLOCK / pelsInPart : 1;
 
     uint32_t sizeInPU = pic->getNumPartInCUSize() >> (depth);
+    uint32_t shiftFactor = (edge == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
     const bool bAlwaysDoChroma = (cu->getChromaFormat() == CHROMA_444);
     for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
     {
         xEdgeFilterLuma(cu, absZOrderIdx, depth, dir, e);
-        if (bAlwaysDoChroma || (pelsInPart > DEBLOCK_SMALLEST_BLOCK) || (e % ((DEBLOCK_SMALLEST_BLOCK << 1) / pelsInPart)) == 0)
+        if (bAlwaysDoChroma || (pelsInPart > DEBLOCK_SMALLEST_BLOCK) || (e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) / pelsInPart)) == 0)
         {
             xEdgeFilterChroma(cu, absZOrderIdx, depth, dir, e);
         }
diff -r 7baf8b8ecfdc -r 84d31cb2aeab source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp	Fri Apr 25 11:01:12 2014 +0800
+++ b/source/Lib/TLibCommon/TComRom.cpp	Mon Apr 28 18:54:09 2014 +0530
@@ -420,10 +420,10 @@ const int16_t g_t32[32][32] =
 };
 const uint8_t g_chromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize] =
 {
-    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 },
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51 },
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51 }
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0,0,0,0,0,0,0,0,0,0,0,0},
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51},
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51 },
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51 }
 };
 
 const uint8_t g_chroma422IntraAngleMappingTable[36] =
diff -r 7baf8b8ecfdc -r 84d31cb2aeab source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h	Fri Apr 25 11:01:12 2014 +0800
+++ b/source/Lib/TLibCommon/TComRom.h	Mon Apr 28 18:54:09 2014 +0530
@@ -67,7 +67,7 @@ void initROM();
 void destroyROM();
 
 // ====================================================================================================================
-static const int chromaQPMappingTableSize = 58;
+static const int chromaQPMappingTableSize = 70;
 
 extern const uint8_t g_chromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize];
 extern const uint8_t g_chroma422IntraAngleMappingTable[36];
diff -r 7baf8b8ecfdc -r 84d31cb2aeab source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Fri Apr 25 11:01:12 2014 +0800
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Apr 28 18:54:09 2014 +0530
@@ -2812,6 +2812,8 @@ void TEncSearch::residualTransformQuantI
     assert(cu->getDepth(0) == cu->getDepth(absPartIdx));
     const uint32_t trMode = depth - cu->getDepth(0);
     const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
+    uint32_t  trSizeCLog2     = trSizeLog2 - m_hChromaShift;
+    const uint32_t setCbf     = 1 << trMode;
     int chFmt                 = cu->getChromaFormat();
 
     bool bSplitFlag = ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
@@ -2825,16 +2827,17 @@ void TEncSearch::residualTransformQuantI
 
     bool bCodeChroma = true;
     uint32_t trModeC = trMode;
-    uint32_t trSizeCLog2 = trSizeLog2 - 1;
-    if (trSizeLog2 == 2)
+    if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
     {
         trSizeCLog2++;
         trModeC--;
-        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
+        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
         bCodeChroma = ((absPartIdx % qpdiv) == 0);
     }
 
-    const uint32_t setCbf = 1 << trMode;
+    const bool splitIntoSubTUs = (chFmt == CHROMA_422);
+    uint32_t absPartIdxStep = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) +  trModeC) << 1);
+
     // code full block
     uint32_t absSumY = 0, absSumU = 0, absSumV = 0;
     int lastPosY = -1, lastPosU = -1, lastPosV = -1;
@@ -2847,18 +2850,12 @@ void TEncSearch::residualTransformQuantI
         coeff_t *coeffCurV = cu->getCoeffCr() + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
 
         int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;
-        uint32_t absTUPartIdxC = absPartIdx;
 
         trWidth  = trHeight  = 1 << trSizeLog2;
         trWidthC = trHeightC = 1 << trSizeCLog2;
         cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
 
         cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
-        if (bCodeChroma)
-        {
-            cu->setTransformSkipSubParts(0, TEXT_CHROMA_U, absPartIdx, cu->getDepth(0) + trModeC);
-            cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
-        }
 
         m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
         m_trQuant->selectLambda(TEXT_LUMA);
@@ -2868,25 +2865,6 @@ void TEncSearch::residualTransformQuantI
 
         cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
 
-        if (bCodeChroma)
-        {
-            int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
-            m_trQuant->selectLambda(TEXT_CHROMA);
-
-            absSumU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
-                                              trWidthC, TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
-
-            curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-            absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
-                                              trWidthC, TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
-
-            cu->setCbfSubParts(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdx, cu->getDepth(0) + trModeC);
-            cu->setCbfSubParts(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
-        }
-
         if (absSumY)
         {
             int16_t *curResiY = resiYuv->getLumaAddr(absTUPartIdx);
@@ -2903,49 +2881,84 @@ void TEncSearch::residualTransformQuantI
             assert(trWidth == trHeight);
             primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr, resiYuv->m_width, 0);
         }
+        cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
 
         if (bCodeChroma)
         {
-            if (absSumU)
+            TComTURecurse tuIterator;
+            initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
+
+            uint32_t widthC  = trWidthC;
+            uint32_t heightC = trWidthC;
+
+            do
             {
-                int16_t *pcResiCurrU = resiYuv->getCbAddr(absTUPartIdxC);
+                uint32_t absTUPartIdxC = tuIterator.m_absPartIdxTURelCU;
+                uint32_t subTUBufferOffset    = widthC * heightC * tuIterator.m_section;
+
+                cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absTUPartIdxC, tuIterator.m_absPartIdxStep);
+                cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absTUPartIdxC, tuIterator.m_absPartIdxStep);
 
                 int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
                 m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
 
-                int scalingListType = 3 + TEXT_CHROMA_U;
-                assert(scalingListType < 6);
-                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, resiYuv->m_cwidth, coeffCurU, trWidthC, scalingListType, false, lastPosU);
-            }
-            else
+                m_trQuant->selectLambda(TEXT_CHROMA);
+
+                absSumU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
+                                                  trWidthC, TEXT_CHROMA_U, absTUPartIdxC, &lastPosU, false, curuseRDOQ);
+
+                curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
+                absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
+                                                  trWidthC, TEXT_CHROMA_V, absTUPartIdxC, &lastPosV, false, curuseRDOQ);
+
+                cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absTUPartIdxC, tuIterator.m_absPartIdxStep);
+                cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V, absTUPartIdxC, tuIterator.m_absPartIdxStep);
+
+                if (absSumU)
+                {
+                    int16_t *pcResiCurrU = resiYuv->getCbAddr(absTUPartIdxC);
+
+                    curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
+                    m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
+
+                    int scalingListType = 3 + TEXT_CHROMA_U;
+                    assert(scalingListType < 6);
+                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC), REG_DCT, pcResiCurrU, resiYuv->m_cwidth, coeffCurU + subTUBufferOffset, trWidthC, scalingListType, false, lastPosU);
+                }
+                else
+                {
+                    int16_t *ptr = resiYuv->getCbAddr(absTUPartIdxC);
+                    assert(trWidthC == trHeightC);
+                    primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv->m_cwidth, 0);
+                }
+                if (absSumV)
+                {
+                    int16_t *curResiV = resiYuv->getCrAddr(absTUPartIdxC);
+                    curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
+                    m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
+
+                    int scalingListType = 3 + TEXT_CHROMA_V;
+                    assert(scalingListType < 6);
+                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC), REG_DCT, curResiV, resiYuv->m_cwidth, coeffCurV + subTUBufferOffset, trWidthC, scalingListType, false, lastPosV);
+                }
+                else
+                {
+                    int16_t *ptr =  resiYuv->getCrAddr(absTUPartIdxC);
+                    assert(trWidthC == trHeightC);
+                    primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv->m_cwidth, 0);
+                }
+                cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absTUPartIdxC, tuIterator.m_absPartIdxStep);
+                cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V, absTUPartIdxC, tuIterator.m_absPartIdxStep);
+            } while (isNextSection(&tuIterator));
+
+            if (splitIntoSubTUs)
             {
-                int16_t *ptr = resiYuv->getCbAddr(absTUPartIdxC);
-                assert(trWidthC == trHeightC);
-                primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv->m_cwidth, 0);
-            }
-            if (absSumV)
-            {
-                int16_t *curResiV = resiYuv->getCrAddr(absTUPartIdxC);
-                int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
-                int scalingListType = 3 + TEXT_CHROMA_V;
-                assert(scalingListType < 6);
-                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, resiYuv->m_cwidth, coeffCurV, trWidthC, scalingListType, false, lastPosV);
-            }
-            else
-            {
-                int16_t *ptr =  resiYuv->getCrAddr(absTUPartIdxC);
-                assert(trWidthC == trHeightC);
-                primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv->m_cwidth, 0);
+                offsetSubTUCBFs(cu, TEXT_CHROMA_U, trMode, absPartIdx);
+                offsetSubTUCBFs(cu, TEXT_CHROMA_V, trMode, absPartIdx);
             }
         }
-        cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
-        if (bCodeChroma)
-        {
-            cu->setCbfSubParts(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdx, cu->getDepth(0) + trModeC);
-            cu->setCbfSubParts(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
-        }
+        return;
     }
 
     // code sub-blocks
@@ -2976,15 +2989,6 @@ void TEncSearch::residualTransformQuantI
         }
         return;
     }
-
-    cu->setTrIdxSubParts(trMode, absPartIdx, depth);
-    cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
-
-    if (bCodeChroma)
-    {
-        cu->setCbfSubParts(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdx, cu->getDepth(0) + trModeC);
-        cu->setCbfSubParts(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
-    }
 }
 
 void TEncSearch::xEstimateResidualQT(TComDataCU*    cu,
@@ -3001,7 +3005,7 @@ void TEncSearch::xEstimateResidualQT(TCo
     assert(cu->getDepth(0) == cu->getDepth(absPartIdx));
     const uint32_t trMode = depth - cu->getDepth(0);
     const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
-    uint32_t  trSizeCLog2     = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUSize() >> m_hChromaShift) >> depth] + 2;
+    uint32_t  trSizeCLog2     = trSizeLog2 - m_hChromaShift;
     const uint32_t subTUDepth = trMode + 1;
     const uint32_t setCbf     = 1 << trMode;
     int chFmt                 = cu->getChromaFormat();
@@ -3777,7 +3781,7 @@ void TEncSearch::xEncodeResidualQT(TComD
     const uint32_t trMode      = cu->getTransformIdx(absPartIdx);
     const bool     bSubdiv     = curTrMode != trMode;
     const uint32_t trSizeLog2  = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
-    uint32_t       trSizeCLog2 = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUSize() >> m_hChromaShift) >> depth] + 2;
+    uint32_t       trSizeCLog2 = trSizeLog2 - m_hChromaShift;
     int            chFmt       = cu->getChromaFormat();
 
     if (bSubdivAndCbf && trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
@@ -3887,7 +3891,7 @@ void TEncSearch::xSetResidualQTData(TCom
     if (curTrMode == trMode)
     {
         const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
-        uint32_t  trSizeCLog2     = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUSize() >> cu->getHorzChromaShift()) >> depth] + 2;
+        uint32_t  trSizeCLog2     = trSizeLog2 - m_hChromaShift;
         const uint32_t qtlayer    = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
 
         bool bCodeChroma = true;
diff -r 7baf8b8ecfdc -r 84d31cb2aeab source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Fri Apr 25 11:01:12 2014 +0800
+++ b/source/common/x86/mc-a.asm	Mon Apr 28 18:54:09 2014 +0530
@@ -3320,10 +3320,10 @@ cglobal pixel_avg2_w16_cache64_ssse3
     mov   eax, r2m
     and   eax, 0x3f
     cmp   eax, 0x30
-    jb x264_pixel_avg2_w16_sse2
+    jb x265_pixel_avg2_w16_sse2
     or    eax, r4m
     and   eax, 7
-    jz x264_pixel_avg2_w16_sse2
+    jz x265_pixel_avg2_w16_sse2
 %endif