<div dir="ltr">Thanks - code maintainability is still an issue though. For better performance, we can store the pps->bConstrainedIntraPred as a member variable Predict::bConstrainedIntra, so it stays in the cache.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Dec 23, 2014 at 1:23 PM, Satoshi Nakagawa <span dir="ltr"><<a href="mailto:nakagawa424@oki.com" target="_blank">nakagawa424@oki.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div link="blue" vlink="purple" lang="JA"><div><p class="MsoNormal"><span style="font-size:10.0pt;font-family:"Arial","sans-serif";color:#1f497d" lang="EN-US">CIP is picture level flag, and typically OFF.<u></u><u></u></span></p><p class="MsoNormal"><span style="font-size:10.0pt;font-family:"Arial","sans-serif";color:#1f497d" lang="EN-US">Separate functions are to simply non-CIP path.<u></u><u></u></span></p><p class="MsoNormal"><span style="font-size:10.0pt;font-family:"Arial","sans-serif";color:#1f497d" lang="EN-US">Code size is small and *CIP() functions will not be loaded to cache.<u></u><u></u></span></p><p class="MsoNormal"><span style="font-size:10.0pt;font-family:"Arial","sans-serif";color:#1f497d" lang="EN-US"><u></u> <u></u></span></p><p class="MsoNormal"><span style="font-size:10.0pt;font-family:"Arial","sans-serif";color:#1f497d" lang="EN-US"><u></u> <u></u></span></p><div style="border:none;border-left:solid blue 1.5pt;padding:0mm 0mm 0mm 4.0pt"><div><div style="border:none;border-top:solid #b5c4df 1.0pt;padding:3.0pt 0mm 0mm 0mm"><p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:"Tahoma","sans-serif"" lang="EN-US">From:</span></b><span style="font-size:10.0pt;font-family:"Tahoma","sans-serif"" lang="EN-US"> x265-devel [mailto:<a href="mailto:x265-devel-bounces@videolan.org" target="_blank">x265-devel-bounces@videolan.org</a>] <b>On Behalf Of </b>Ashok Kumar Mishra<br><b>Sent:</b> Tuesday, December 23, 2014 4:23 PM<br><b>To:</b> Development for x265<br><b>Subject:</b> Re: [x265] refine intra neighbors<u></u><u></u></span></p></div></div><div><div class="h5"><p class="MsoNormal"><span lang="EN-US"><u></u> <u></u></span></p><div><p class="MsoNormal"><span lang="EN-US">Hi,<u></u><u></u></span></p><div><p class="MsoNormal"><span lang="EN-US"><u></u> <u></u></span></p><div><p class="MsoNormal"><span lang="EN-US">We removed separate functions for constrained intra prediction(CIP) some time back. Because it was increasing the code size at the cost of few conditional checks.<u></u><u></u></span></p></div><div><p class="MsoNormal"><span lang="EN-US">Can you please send a separate patch for other changes not related to CIP.<u></u><u></u></span></p></div><div><p class="MsoNormal"><span lang="EN-US"><u></u> <u></u></span></p></div><div><p class="MsoNormal"><span lang="EN-US">Thanks<u></u><u></u></span></p></div><div><p class="MsoNormal"><span lang="EN-US">Ashok.<u></u><u></u></span></p></div></div></div><div><p class="MsoNormal"><span lang="EN-US"><u></u> <u></u></span></p><div><p class="MsoNormal"><span lang="EN-US">On Tue, Dec 23, 2014 at 11:23 AM, Satoshi Nakagawa <<a href="mailto:nakagawa424@oki.com" target="_blank">nakagawa424@oki.com</a>> wrote:<u></u><u></u></span></p><p class="MsoNormal"><span lang="EN-US"># HG changeset patch<br># User Satoshi Nakagawa <<a href="mailto:nakagawa424@oki.com" target="_blank">nakagawa424@oki.com</a>><br># Date 1419313799 -32400<br>#      Tue Dec 23 14:49:59 2014 +0900<br># Node ID 6b59452a17d75c42c1750d47e2318c8da80c39fb<br># Parent  8d2f418829c894c25da79daa861f16c61e5060d7<br>refine intra neighbors<br><br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/common.h<br>--- a/source/common/common.h    Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/common/common.h    Tue Dec 23 14:49:59 2014 +0900<br>@@ -163,6 +163,9 @@<br> template<typename T><br> inline T x265_max(T a, T b) { return a > b ? a : b; }<br><br>+template<typename T><br>+inline T x265_clip3(T minVal, T maxVal, T a) { return x265_min(x265_max(minVal, a), maxVal); }<br>+<br> typedef int16_t  coeff_t;      // transform coefficient<br><br> #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))<br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.cpp<br>--- a/source/common/cudata.cpp  Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/common/cudata.cpp  Tue Dec 23 14:49:59 2014 +0900<br>@@ -608,7 +608,7 @@<br>         {<br>             if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1])<br>             {<br>-                uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;<br>+                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;<br>                 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1];<br>                 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))<br>                     return m_encData->getPicCTU(m_cuAddr);<br>@@ -689,8 +689,6 @@<br>             return NULL;<br>         }<br>         blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1];<br>-        if (!m_cuLeft || !m_cuLeft->m_slice)<br>-            return NULL;<br>         return m_cuLeft;<br>     }<br><br>@@ -723,8 +721,6 @@<br>             return NULL;<br>         }<br>         arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset];<br>-        if (!m_cuAbove || !m_cuAbove->m_slice)<br>-            return NULL;<br>         return m_cuAbove;<br>     }<br><br>@@ -732,8 +728,6 @@<br>         return NULL;<br><br>     arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1];<br>-    if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL || (m_cuAboveRight->m_cuAddr) > m_cuAddr))<br>-        return NULL;<br>     return m_cuAboveRight;<br> }<br><br>@@ -904,7 +898,7 @@<br>     tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;<br>     tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;<br><br>-    tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1]));<br>+    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag));<br> }<br><br> void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const<br>@@ -916,7 +910,7 @@<br>     tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;<br>     tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;<br><br>-    tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1]));<br>+    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag));<br> }<br><br> uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const<br>@@ -1363,14 +1357,6 @@<br>     return outPartIdxRB;<br> }<br><br>-void CUData::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const<br>-{<br>-    uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth);<br>-<br>-    outPartIdxLT = m_absIdxInCTU + partOffset;<br>-    outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1];<br>-}<br>-<br> bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const<br> {<br>     if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])<br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.h<br>--- a/source/common/cudata.h    Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/common/cudata.h    Tue Dec 23 14:49:59 2014 +0900<br>@@ -212,7 +212,6 @@<br><br>     void     getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const;<br>     int      getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const;<br>-    void     deriveLeftRightTopIdxAdi(uint32_t& partIdxLT, uint32_t& partIdxRT, uint32_t partOffset, uint32_t partDepth) const;<br><br>     uint32_t getSCUAddr() const                  { return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInCTU; }<br>     uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const;<br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.cpp<br>--- a/source/common/predict.cpp Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/common/predict.cpp Tue Dec 23 14:49:59 2014 +0900<br>@@ -654,11 +654,8 @@<br>     }<br> }<br><br>-void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode)<br>+void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode)<br> {<br>-    IntraNeighbors intraNeighbors;<br>-    initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors);<br>-<br>     pixel* adiBuf      = m_predBuf;<br>     pixel* refAbove    = m_refAbove;<br>     pixel* refLeft     = m_refLeft;<br>@@ -700,12 +697,12 @@<br>             int refTL = refAbove[0];<br>             int refTR = refAbove[trSize2];<br>             bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&<br>-                abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);<br>+                                abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);<br><br>             if (bStrongSmoothing)<br>             {<br>                 // bilinear interpolation<br>-                const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1;<br>+                const int shift = 5 + 1; // log2TrSize + 1;<br>                 int init = (refTL << shift) + tuSize;<br>                 int delta;<br><br>@@ -738,10 +735,8 @@<br>     }<br> }<br><br>-void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId)<br>+void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId)<br> {<br>-    IntraNeighbors intraNeighbors;<br>-    initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors);<br>     uint32_t tuSize = intraNeighbors.tuSize;<br><br>     const pixel* adiOrigin = cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);<br>@@ -751,9 +746,9 @@<br>     fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors);<br> }<br><br>-void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors)<br>+void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors)<br> {<br>-    uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth;<br>+    uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;<br>     int log2UnitWidth = LOG2_UNIT_SIZE;<br>     int log2UnitHeight = LOG2_UNIT_SIZE;<br><br>@@ -764,12 +759,12 @@<br>         log2UnitHeight -= cu.m_vChromaShift;<br>     }<br><br>-    int   numIntraNeighbor = 0;<br>+    int numIntraNeighbor;<br>     bool* bNeighborFlags = intraNeighbors->bNeighborFlags;<br><br>-    uint32_t partIdxLT, partIdxRT, partIdxLB;<br>-<br>-    cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, partDepth);<br>+    uint32_t numPartInWidth = 1 << (cu.m_log2CUSize[0] - LOG2_UNIT_SIZE - tuDepth);<br>+    uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx;<br>+    uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + numPartInWidth - 1];<br><br>     uint32_t tuSize = 1 << log2TrSize;<br>     int  tuWidthInUnits = tuSize >> log2UnitWidth;<br>@@ -777,14 +772,26 @@<br>     int  aboveUnits = tuWidthInUnits << 1;<br>     int  leftUnits = tuHeightInUnits << 1;<br>     int  partIdxStride = cu.m_slice->m_sps->numPartInCUSize;<br>-    partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];<br>+    uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];<br><br>-    bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);<br>-    numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);<br>-    numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));<br>-    numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));<br>-    numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));<br>-    numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits));<br>+    if (cu.m_slice->isIntra() || !cu.m_slice->m_pps->bConstrainedIntraPred)<br>+    {<br>+        bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);<br>+        numIntraNeighbor  = (int)(bNeighborFlags[leftUnits]);<br>+        numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);<br>+        numIntraNeighbor += isAboveRightAvailable(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);<br>+        numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);<br>+        numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);<br>+    }<br>+    else<br>+    {<br>+        bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT);<br>+        numIntraNeighbor  = (int)(bNeighborFlags[leftUnits]);<br>+        numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);<br>+        numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);<br>+        numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);<br>+        numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);<br>+    }<br><br>     intraNeighbors->numIntraNeighbor = numIntraNeighbor;<br>     intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;<br>@@ -793,7 +800,6 @@<br>     intraNeighbors->unitWidth = 1 << log2UnitWidth;<br>     intraNeighbors->unitHeight = 1 << log2UnitHeight;<br>     intraNeighbors->tuSize = tuSize;<br>-    intraNeighbors->log2TrSize = log2TrSize;<br> }<br><br> void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors)<br>@@ -953,33 +959,27 @@<br>     uint32_t partAboveLeft;<br>     const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);<br><br>-    if (!cu.m_slice->m_pps->bConstrainedIntraPred)<br>-        return cuAboveLeft ? true : false;<br>-    else<br>-        return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);<br>+    return !!cuAboveLeft;<br> }<br><br> int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)<br> {<br>     const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];<br>-    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1;<br>+    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT];<br>     const uint32_t idxStep = 1;<br>-    bool* validFlagPtr = bValidFlags;<br>     int numIntra = 0;<br><br>-    for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)<br>+    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++)<br>     {<br>         uint32_t partAbove;<br>         const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);<br>-        if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAbove->isIntra(partAbove)))<br>+        if (cuAbove)<br>         {<br>             numIntra++;<br>-            *validFlagPtr = true;<br>+            *bValidFlags = true;<br>         }<br>         else<br>-            *validFlagPtr = false;<br>-<br>-        validFlagPtr++;<br>+            *bValidFlags = false;<br>     }<br><br>     return numIntra;<br>@@ -988,73 +988,156 @@<br> int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)<br> {<br>     const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];<br>-    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1;<br>+    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];<br>     const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;<br>-    bool* validFlagPtr = bValidFlags;<br>     int numIntra = 0;<br><br>-    for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)<br>+    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction<br>     {<br>         uint32_t partLeft;<br>         const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);<br>-        if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuLeft->isIntra(partLeft)))<br>+        if (cuLeft)<br>         {<br>             numIntra++;<br>-            *validFlagPtr = true;<br>+            *bValidFlags = true;<br>         }<br>         else<br>-            *validFlagPtr = false;<br>-<br>-        validFlagPtr--; // opposite direction<br>+            *bValidFlags = false;<br>     }<br><br>     return numIntra;<br> }<br><br>-int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)<br>+int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits)<br> {<br>-    const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1;<br>-    bool* validFlagPtr = bValidFlags;<br>     int numIntra = 0;<br><br>-    for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)<br>+    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++)<br>     {<br>         uint32_t partAboveRight;<br>         const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);<br>-        if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAboveRight->isIntra(partAboveRight)))<br>+        if (cuAboveRight)<br>         {<br>             numIntra++;<br>-            *validFlagPtr = true;<br>+            *bValidFlags = true;<br>         }<br>         else<br>-            *validFlagPtr = false;<br>-<br>-        validFlagPtr++;<br>+            *bValidFlags = false;<br>     }<br><br>     return numIntra;<br> }<br><br>-int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)<br>+int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits)<br> {<br>-    const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1;<br>-    bool* validFlagPtr = bValidFlags;<br>     int numIntra = 0;<br><br>-    for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)<br>+    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction<br>     {<br>         uint32_t partBelowLeft;<br>         const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);<br>-        if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuBelowLeft->isIntra(partBelowLeft)))<br>+        if (cuBelowLeft)<br>         {<br>             numIntra++;<br>-            *validFlagPtr = true;<br>+            *bValidFlags = true;<br>         }<br>         else<br>-            *validFlagPtr = false;<br>-<br>-        validFlagPtr--; // opposite direction<br>+            *bValidFlags = false;<br>     }<br><br>     return numIntra;<br> }<br>+<br>+bool Predict::isAboveLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT)<br>+{<br>+    uint32_t partAboveLeft;<br>+    const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);<br>+<br>+    return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);<br>+}<br>+<br>+int Predict::isAboveAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)<br>+{<br>+    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];<br>+    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT];<br>+    const uint32_t idxStep = 1;<br>+    int numIntra = 0;<br>+<br>+    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++)<br>+    {<br>+        uint32_t partAbove;<br>+        const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);<br>+        if (cuAbove && cuAbove->isIntra(partAbove))<br>+        {<br>+            numIntra++;<br>+            *bValidFlags = true;<br>+        }<br>+        else<br>+            *bValidFlags = false;<br>+    }<br>+<br>+    return numIntra;<br>+}<br>+<br>+int Predict::isLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)<br>+{<br>+    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];<br>+    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];<br>+    const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;<br>+    int numIntra = 0;<br>+<br>+    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction<br>+    {<br>+        uint32_t partLeft;<br>+        const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);<br>+        if (cuLeft && cuLeft->isIntra(partLeft))<br>+        {<br>+            numIntra++;<br>+            *bValidFlags = true;<br>+        }<br>+        else<br>+            *bValidFlags = false;<br>+    }<br>+<br>+    return numIntra;<br>+}<br>+<br>+int Predict::isAboveRightAvailableCIP(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits)<br>+{<br>+    int numIntra = 0;<br>+<br>+    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++)<br>+    {<br>+        uint32_t partAboveRight;<br>+        const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);<br>+        if (cuAboveRight && cuAboveRight->isIntra(partAboveRight))<br>+        {<br>+            numIntra++;<br>+            *bValidFlags = true;<br>+        }<br>+        else<br>+            *bValidFlags = false;<br>+    }<br>+<br>+    return numIntra;<br>+}<br>+<br>+int Predict::isBelowLeftAvailableCIP(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits)<br>+{<br>+    int numIntra = 0;<br>+<br>+    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction<br>+    {<br>+        uint32_t partBelowLeft;<br>+        const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);<br>+        if (cuBelowLeft && cuBelowLeft->isIntra(partBelowLeft))<br>+        {<br>+            numIntra++;<br>+            *bValidFlags = true;<br>+        }<br>+        else<br>+            *bValidFlags = false;<br>+    }<br>+<br>+    return numIntra;<br>+}<br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.h<br>--- a/source/common/predict.h   Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/common/predict.h   Tue Dec 23 14:49:59 2014 +0900<br>@@ -57,7 +57,6 @@<br>         int      unitWidth;<br>         int      unitHeight;<br>         int      tuSize;<br>-        uint32_t log2TrSize;<br>         bool     bNeighborFlags[4 * MAX_NUM_SPU_W + 1];<br>     };<br><br>@@ -105,14 +104,20 @@<br>     void addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const;<br><br>     /* Intra prediction helper functions */<br>-    static void initIntraNeighbors(const CUData& cu, uint32_t zOrderIdxInPart, uint32_t partDepth, bool isLuma, IntraNeighbors *IntraNeighbors);<br>+    static void initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *IntraNeighbors);<br>     static void fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors);<br><br>     static bool isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT);<br>     static int  isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);<br>     static int  isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);<br>-    static int  isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);<br>-    static int  isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);<br>+    static int  isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits);<br>+    static int  isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits);<br>+<br>+    static bool isAboveLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT);<br>+    static int  isAboveAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);<br>+    static int  isLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);<br>+    static int  isAboveRightAvailableCIP(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits);<br>+    static int  isBelowLeftAvailableCIP(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits);<br><br> public:<br><br>@@ -125,8 +130,8 @@<br>     void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSize);<br>     void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSizeC, int chFmt);<br><br>-    void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode);<br>-    void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId);<br>+    void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode);<br>+    void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId);<br>     pixel* getAdiChromaBuf(uint32_t chromaId, int tuSize)<br>     {<br>         return m_predBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (tuSize * 2 + 1));<br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/analysis.cpp<br>--- a/source/encoder/analysis.cpp       Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/encoder/analysis.cpp       Tue Dec 23 14:49:59 2014 +0900<br>@@ -914,7 +914,7 @@<br>                         cu.getInterTUQtDepthRange(tuDepthRange, 0);<br><br>                         m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);<br>-                        residualTransformQuantInter(*md.bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);<br>+                        residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange);<br>                         if (cu.getQtRootCbf(0))<br>                             md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]);<br>                         else<br>@@ -938,8 +938,7 @@<br>                         uint32_t tuDepthRange[2];<br>                         cu.getIntraTUQtDepthRange(tuDepthRange, 0);<br><br>-                        uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;<br>-                        residualTransformQuantIntra(*md.bestMode, cuGeom, initTuDepth, 0, tuDepthRange);<br>+                        residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange);<br>                         getBestIntraModeChroma(*md.bestMode, cuGeom);<br>                         residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);<br>                         md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.encodeIdx); // TODO:<br>@@ -1702,8 +1701,7 @@<br>         uint32_t tuDepthRange[2];<br>         cu.getIntraTUQtDepthRange(tuDepthRange, 0);<br><br>-        uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;<br>-        residualTransformQuantIntra(*bestMode, cuGeom, initTuDepth, 0, tuDepthRange);<br>+        residualTransformQuantIntra(*bestMode, cuGeom, 0, 0, tuDepthRange);<br>         getBestIntraModeChroma(*bestMode, cuGeom);<br>         residualQTIntraChroma(*bestMode, cuGeom, 0, 0);<br>     }<br>@@ -1736,7 +1734,7 @@<br>         uint32_t tuDepthRange[2];<br>         cu.getInterTUQtDepthRange(tuDepthRange, 0);<br><br>-        residualTransformQuantInter(*bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);<br>+        residualTransformQuantInter(*bestMode, cuGeom, 0, 0, tuDepthRange);<br><br>         if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))<br>             cu.setPredModeSubParts(MODE_SKIP);<br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/search.cpp<br>--- a/source/encoder/search.cpp Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/encoder/search.cpp Tue Dec 23 14:49:59 2014 +0900<br>@@ -239,7 +239,8 @@<br><br> void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const uint32_t depthRange[2])<br> {<br>-    uint32_t fullDepth  = mode.cu.m_cuDepth[0] + tuDepth;<br>+    CUData& cu = <a href="http://mode.cu" target="_blank">mode.cu</a>;<br>+    uint32_t fullDepth  = cu.m_cuDepth[0] + tuDepth;<br>     uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;<br>     uint32_t qtLayer    = log2TrSize - 2;<br>     uint32_t sizeIdx    = log2TrSize - 2;<br>@@ -253,8 +254,6 @@<br>         mightSplit = true;<br>     }<br><br>-    CUData& cu = <a href="http://mode.cu" target="_blank">mode.cu</a>;<br>-<br>     Cost fullCost;<br>     uint32_t bCBF = 0;<br><br>@@ -273,7 +272,9 @@<br><br>         // init availability pattern<br>         uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];<br>-        initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);<br>+        IntraNeighbors intraNeighbors;<br>+        initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);<br>+        initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);<br><br>         // get prediction signal<br>         predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);<br>@@ -365,7 +366,7 @@<br>             m_entropyCoder.load(m_rqt[fullDepth].rqtRoot);   // prep state of split encode<br>         }<br><br>-        // code split block<br>+        /* code split block */<br>         uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;<br><br>         int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];<br>@@ -451,11 +452,13 @@<br>     pixel*   pred = predYuv->getLumaAddr(absPartIdx);<br>     int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);<br>     uint32_t stride = fencYuv->m_size;<br>-    int      sizeIdx = log2TrSize - 2;<br>+    uint32_t sizeIdx = log2TrSize - 2;<br><br>     // init availability pattern<br>     uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];<br>-    initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);<br>+    IntraNeighbors intraNeighbors;<br>+    initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);<br>+    initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);<br><br>     // get prediction signal<br>     predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);<br>@@ -597,13 +600,12 @@<br> }<br><br> /* fast luma intra residual generation. Only perform the minimum number of TU splits required by the CU size */<br>-void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t depthRange[2])<br>+void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2])<br> {<br>     CUData& cu = <a href="http://mode.cu" target="_blank">mode.cu</a>;<br>-<br>-    uint32_t fullDepth   = cu.m_cuDepth[0] + tuDepth;<br>-    uint32_t log2TrSize  = g_maxLog2CUSize - fullDepth;<br>-    bool     bCheckFull  = log2TrSize <= depthRange[1];<br>+    uint32_t fullDepth  = cu.m_cuDepth[0] + tuDepth;<br>+    uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;<br>+    bool     bCheckFull = log2TrSize <= depthRange[1];<br><br>     X265_CHECK(m_slice->m_sliceType != I_SLICE, "residualTransformQuantIntra not intended for I slices\n");<br><br>@@ -614,28 +616,36 @@<br><br>     if (bCheckFull)<br>     {<br>-        const pixel* fenc  = mode.fencYuv->getLumaAddr(absPartIdx);<br>-        pixel*   pred      = mode.predYuv.getLumaAddr(absPartIdx);<br>-        int16_t* residual  = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);<br>+        const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx);<br>+        pixel*   pred     = mode.predYuv.getLumaAddr(absPartIdx);<br>+        int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);<br>+        uint32_t stride   = mode.fencYuv->m_size;<br>+<br>+        // init availability pattern<br>+        uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];<br>+        IntraNeighbors intraNeighbors;<br>+        initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);<br>+        initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);<br>+<br>+        // get prediction signal<br>+        predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);<br>+<br>+        X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], "unexpected tskip flag in residualTransformQuantIntra\n");<br>+        cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);<br>+<br>+        uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);<br>+        coeff_t* coeffY       = cu.m_trCoeff[0] + coeffOffsetY;<br>+<br>+        uint32_t sizeIdx   = log2TrSize - 2;<br>+        primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);<br>+<br>         pixel*   picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);<br>         intptr_t picStride = m_frame->m_reconPic->m_stride;<br>-        uint32_t stride    = mode.fencYuv->m_size;<br>-        uint32_t sizeIdx   = log2TrSize - 2;<br>-        uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];<br>-        uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);<br>-        coeff_t* coeff        = cu.m_trCoeff[TEXT_LUMA] + coeffOffsetY;<br>-<br>-        initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);<br>-        predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);<br>-<br>-        X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], "unexpected tskip flag in residualTransformQuantIntra\n");<br>-        cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);<br>-<br>-        primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);<br>-        uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, false);<br>+<br>+        uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);<br>         if (numSig)<br>         {<br>-            m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, false, numSig);<br>+            m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);<br>             primitives.luma_add_ps[sizeIdx](picReconY, picStride, pred, residual, stride, stride);<br>             cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);<br>         }<br>@@ -654,11 +664,11 @@<br>         uint32_t cbf = 0;<br>         for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)<br>         {<br>-            residualTransformQuantIntra(mode, cuGeom, tuDepth + 1, qPartIdx, depthRange);<br>+            residualTransformQuantIntra(mode, cuGeom, qPartIdx, tuDepth + 1, depthRange);<br>             cbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);<br>         }<br>         for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)<br>-            cu.m_cbf[TEXT_LUMA][absPartIdx + offs] |= (cbf << tuDepth);<br>+            cu.m_cbf[0][absPartIdx + offs] |= (cbf << tuDepth);<br>     }<br> }<br><br>@@ -739,15 +749,14 @@<br>         }<br>         for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)<br>         {<br>-            cu.m_cbf[TEXT_CHROMA_U][absPartIdx + offs] |= (splitCbfU << tuDepth);<br>-            cu.m_cbf[TEXT_CHROMA_V][absPartIdx + offs] |= (splitCbfV << tuDepth);<br>+            cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << tuDepth);<br>+            cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << tuDepth);<br>         }<br><br>         return outDist;<br>     }<br><br>     uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;<br>-<br>     uint32_t tuDepthC = tuDepth;<br>     if (log2TrSizeC < 2)<br>     {<br>@@ -766,46 +775,48 @@<br>     if (checkTransformSkip)<br>         return codeIntraChromaTSkip(mode, cuGeom, tuDepth, tuDepthC, absPartIdx, psyEnergy);<br><br>+    ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;<br>     uint32_t qtLayer = log2TrSize - 2;<br>     uint32_t tuSize = 1 << log2TrSizeC;<br>+    uint32_t stride = mode.fencYuv->m_csize;<br>+    const uint32_t sizeIdxC = log2TrSizeC - 2;<br>     uint32_t outDist = 0;<br><br>     uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);<br>     const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;<br><br>-    for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>+    TURecurse tuIterator(splitType, curPartNum, absPartIdx);<br>+    do<br>     {<br>-        TextType ttype = (TextType)chromaId;<br>-<br>-        TURecurse tuIterator(splitType, curPartNum, absPartIdx);<br>-        do<br>+        uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;<br>+<br>+        IntraNeighbors intraNeighbors;<br>+        initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, &intraNeighbors);<br>+<br>+        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>         {<br>-            uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;<br>+            TextType ttype = (TextType)chromaId;<br><br>             const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);<br>             pixel*   pred     = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);<br>-            int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getChromaAddr(chromaId, absPartIdxC);<br>-            uint32_t stride   = mode.fencYuv->m_csize;<br>-            uint32_t sizeIdxC = log2TrSizeC - 2;<br>-<br>+            int16_t* residual = resiYuv.getChromaAddr(chromaId, absPartIdxC);<br>             uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));<br>             coeff_t* coeffC        = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;<br>             pixel*   reconQt       = m_rqt[qtLayer].reconQtYuv.getChromaAddr(chromaId, absPartIdxC);<br>             uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;<br>-<br>             pixel*   picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);<br>             intptr_t picStride = m_frame->m_reconPic->m_strideC;<br><br>-            // init availability pattern<br>-            initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);<br>-            pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);<br>-<br>             uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];<br>             if (chromaPredMode == DM_CHROMA_IDX)<br>                 chromaPredMode = cu.m_lumaIntraDir[(m_csp == X265_CSP_I444) ? absPartIdxC : 0];<br>             if (m_csp == X265_CSP_I422)<br>                 chromaPredMode = g_chroma422IntraAngleMappingTable[chromaPredMode];<br><br>+            // init availability pattern<br>+            initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);<br>+            pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);<br>+<br>             // get prediction signal<br>             predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);<br><br>@@ -813,7 +824,6 @@<br><br>             primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);<br>             uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);<br>-            uint32_t tmpDist;<br>             if (numSig)<br>             {<br>                 m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);<br>@@ -827,7 +837,7 @@<br>                 cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);<br>             }<br><br>-            tmpDist = primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, fenc, stride);<br>+            uint32_t tmpDist = primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, fenc, stride);<br>             outDist += (ttype == TEXT_CHROMA_U) ? m_rdCost.scaleChromaDistCb(tmpDist) : m_rdCost.scaleChromaDistCr(tmpDist);<br><br>             if (m_rdCost.m_psyRd)<br>@@ -835,10 +845,13 @@<br><br>             primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, reconQt, reconQtStride);<br>         }<br>-        while (tuIterator.isNextSection());<br>-<br>-        if (splitType == VERTICAL_SPLIT)<br>-            offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx);<br>+    }<br>+    while (tuIterator.isNextSection());<br>+<br>+    if (splitType == VERTICAL_SPLIT)<br>+    {<br>+        offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);<br>+        offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);<br>     }<br><br>     return outDist;<br>@@ -866,14 +879,17 @@<br>     uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);<br>     const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;<br><br>-    for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>+    TURecurse tuIterator(splitType, curPartNum, absPartIdx);<br>+    do<br>     {<br>-        TextType ttype = (TextType)chromaId;<br>-<br>-        TURecurse tuIterator(splitType, curPartNum, absPartIdx);<br>-        do<br>+        uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;<br>+<br>+        IntraNeighbors intraNeighbors;<br>+        initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, &intraNeighbors);<br>+<br>+        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>         {<br>-            uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;<br>+            TextType ttype = (TextType)chromaId;<br><br>             const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);<br>             pixel*   pred = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);<br>@@ -887,7 +903,7 @@<br>             uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;<br><br>             // init availability pattern<br>-            initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);<br>+            initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);<br>             pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);<br><br>             uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];<br>@@ -980,10 +996,13 @@<br>             outDist += bDist;<br>             psyEnergy += bEnergy;<br>         }<br>-        while (tuIterator.isNextSection());<br>-<br>-        if (splitType == VERTICAL_SPLIT)<br>-            offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx);<br>+    }<br>+    while (tuIterator.isNextSection());<br>+<br>+    if (splitType == VERTICAL_SPLIT)<br>+    {<br>+        offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);<br>+        offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);<br>     }<br><br>     m_entropyCoder.load(m_rqt[fullDepth].rqtRoot);<br>@@ -1022,91 +1041,18 @@<br>     }<br> }<br><br>-void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx)<br>+void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth)<br> {<br>     CUData& cu = <a href="http://mode.cu" target="_blank">mode.cu</a>;<br>-    uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;<br>-    uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;<br>-<br>-    if (tuDepth == cu.m_tuDepth[absPartIdx])<br>-    {<br>-        uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;<br>-        uint32_t tuDepthC = tuDepth;<br>-        if (log2TrSizeC < 2)<br>-        {<br>-            X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");<br>-            if (absPartIdx & 3)<br>-                return;<br>-            log2TrSizeC = 2;<br>-            tuDepthC--;<br>-        }<br>-<br>-        ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;<br>-        uint32_t tuSize = 1 << log2TrSizeC;<br>-        uint32_t stride = mode.fencYuv->m_csize;<br>-        const int sizeIdxC = log2TrSizeC - 2;<br>-<br>-        uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);<br>-        const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;<br>-<br>-        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>-        {<br>-            TextType ttype = (TextType)chromaId;<br>-<br>-            TURecurse tuIterator(splitType, curPartNum, absPartIdx);<br>-            do<br>-            {<br>-                uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;<br>-<br>-                const pixel*   fenc   = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);<br>-                pixel*   pred         = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);<br>-                int16_t* residual     = resiYuv.getChromaAddr(chromaId, absPartIdxC);<br>-                pixel*   recon        = mode.reconYuv.getChromaAddr(chromaId, absPartIdxC); // TODO: needed?<br>-                uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));<br>-                coeff_t* coeff        = cu.m_trCoeff[ttype] + coeffOffsetC;<br>-                pixel*   picReconC    = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);<br>-                uint32_t picStride    = m_frame->m_reconPic->m_strideC;<br>-<br>-                uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];<br>-                if (chromaPredMode == DM_CHROMA_IDX)<br>-                    chromaPredMode = cu.m_lumaIntraDir[(m_csp == X265_CSP_I444) ? absPartIdxC : 0];<br>-                chromaPredMode = (m_csp == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;<br>-                initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);<br>-                pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);<br>-<br>-                predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);<br>-<br>-                X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not supported at low RD levels\n");<br>-<br>-                primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);<br>-                uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, false);<br>-                if (numSig)<br>-                {<br>-                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], residual, stride, coeff, log2TrSizeC, ttype, true, false, numSig);<br>-                    primitives.luma_add_ps[sizeIdxC](recon, stride, pred, residual, stride, stride);<br>-                    primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, recon, stride);<br>-                    cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);<br>-                }<br>-                else<br>-                {<br>-                    primitives.luma_copy_pp[sizeIdxC](recon, stride, pred, stride);<br>-                    primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, pred, stride);<br>-                    cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);<br>-                }<br>-            }<br>-            while (tuIterator.isNextSection());<br>-<br>-            if (splitType == VERTICAL_SPLIT)<br>-                offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, absPartIdx);<br>-        }<br>-    }<br>-    else<br>+    uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;<br>+<br>+    if (tuDepth < cu.m_tuDepth[absPartIdx])<br>     {<br>         uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;<br>         uint32_t splitCbfU = 0, splitCbfV = 0;<br>         for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)<br>         {<br>-            residualQTIntraChroma(mode, cuGeom, tuDepth + 1, qPartIdx);<br>+            residualQTIntraChroma(mode, cuGeom, qPartIdx, tuDepth + 1);<br>             splitCbfU |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);<br>             splitCbfV |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);<br>         }<br>@@ -1115,12 +1061,91 @@<br>             cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << tuDepth);<br>             cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << tuDepth);<br>         }<br>+<br>+        return;<br>+    }<br>+<br>+    uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;<br>+    uint32_t tuDepthC = tuDepth;<br>+    if (log2TrSizeC < 2)<br>+    {<br>+        X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");<br>+        if (absPartIdx & 3)<br>+            return;<br>+        log2TrSizeC = 2;<br>+        tuDepthC--;<br>+    }<br>+<br>+    ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;<br>+    uint32_t tuSize = 1 << log2TrSizeC;<br>+    uint32_t stride = mode.fencYuv->m_csize;<br>+    const uint32_t sizeIdxC = log2TrSizeC - 2;<br>+<br>+    uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);<br>+    const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;<br>+<br>+    TURecurse tuIterator(splitType, curPartNum, absPartIdx);<br>+    do<br>+    {<br>+        uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;<br>+<br>+        IntraNeighbors intraNeighbors;<br>+        initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, &intraNeighbors);<br>+<br>+        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>+        {<br>+            TextType ttype = (TextType)chromaId;<br>+<br>+            const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);<br>+            pixel*   pred     = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);<br>+            int16_t* residual = resiYuv.getChromaAddr(chromaId, absPartIdxC);<br>+            uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));<br>+            coeff_t* coeffC        = cu.m_trCoeff[ttype] + coeffOffsetC;<br>+            pixel*   picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);<br>+            intptr_t picStride = m_frame->m_reconPic->m_strideC;<br>+<br>+            uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];<br>+            if (chromaPredMode == DM_CHROMA_IDX)<br>+                chromaPredMode = cu.m_lumaIntraDir[(m_csp == X265_CSP_I444) ? absPartIdxC : 0];<br>+            if (m_csp == X265_CSP_I422)<br>+                chromaPredMode = g_chroma422IntraAngleMappingTable[chromaPredMode];<br>+<br>+            // init availability pattern<br>+            initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);<br>+            pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);<br>+<br>+            // get prediction signal<br>+            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);<br>+<br>+            X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not supported at low RD levels\n");<br>+<br>+            primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);<br>+            uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);<br>+            if (numSig)<br>+            {<br>+                m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);<br>+                primitives.luma_add_ps[sizeIdxC](picReconC, picStride, pred, residual, stride, stride);<br>+                cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);<br>+            }<br>+            else<br>+            {<br>+                // no coded residual, recon = pred<br>+                primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, pred, stride);<br>+                cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);<br>+            }<br>+        }<br>+    }<br>+    while (tuIterator.isNextSection());<br>+<br>+    if (splitType == VERTICAL_SPLIT)<br>+    {<br>+        offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);<br>+        offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);<br>     }<br> }<br><br> void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes)<br> {<br>-    uint32_t depth = cuGeom.depth;<br>     CUData& cu = intraMode.cu;<br><br>     cu.setPartSizeSubParts(partSize);<br>@@ -1143,7 +1168,7 @@<br>         m_entropyCoder.codePredMode(cu.m_predMode[0]);<br>     }<br><br>-    m_entropyCoder.codePartSize(cu, 0, depth);<br>+    m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);<br>     m_entropyCoder.codePredInfo(cu, 0);<br>     intraMode.mvBits = m_entropyCoder.getNumberOfWrittenBits();<br><br>@@ -1153,7 +1178,10 @@<br>     intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits();<br>     intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits;<br>     if (m_rdCost.m_psyRd)<br>-        intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, intraMode.fencYuv->m_buf[0], intraMode.fencYuv->m_size, intraMode.reconYuv.m_buf[0], intraMode.reconYuv.m_size);<br>+    {<br>+        const Yuv* fencYuv = intraMode.fencYuv;<br>+        intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, intraMode.reconYuv.m_buf[0], intraMode.reconYuv.m_size);<br>+    }<br><br>     updateModeCost(intraMode);<br> }<br>@@ -1174,7 +1202,9 @@<br>     const uint32_t absPartIdx = 0;<br><br>     // Reference sample smoothing<br>-    initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX);<br>+    IntraNeighbors intraNeighbors;<br>+    initIntraNeighbors(cu, absPartIdx, initTuDepth, true, &intraNeighbors);<br>+    initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, ALL_IDX);<br><br>     const pixel* fenc = intraMode.fencYuv->m_buf[0];<br>     uint32_t stride = intraMode.fencYuv->m_size;<br>@@ -1335,7 +1365,6 @@<br> {<br>     CUData& cu = intraMode.cu;<br>     Yuv* reconYuv = &intraMode.reconYuv;<br>-    const Yuv* fencYuv = intraMode.fencYuv;<br><br>     X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not expect NxN intra\n");<br>     X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");<br>@@ -1369,7 +1398,10 @@<br>     intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits();<br>     intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits;<br>     if (m_rdCost.m_psyRd)<br>+    {<br>+        const Yuv* fencYuv = intraMode.fencYuv;<br>         intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);<br>+    }<br><br>     m_entropyCoder.store(intraMode.contexts);<br>     updateModeCost(intraMode);<br>@@ -1404,7 +1436,9 @@<br>         else<br>         {<br>             // Reference sample smoothing<br>-            initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX);<br>+            IntraNeighbors intraNeighbors;<br>+            initIntraNeighbors(cu, absPartIdx, initTuDepth, true, &intraNeighbors);<br>+            initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, ALL_IDX);<br><br>             // determine set of modes to be tested (using prediction signal only)<br>             const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);<br>@@ -1602,8 +1636,10 @@<br>         log2TrSizeC = 5;<br>     }<br><br>-    Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 1);<br>-    Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 2);<br>+    IntraNeighbors intraNeighbors;<br>+    initIntraNeighbors(cu, 0, tuDepth, false, &intraNeighbors);<br>+    Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 1); // U<br>+    Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 2); // V<br>     cu.getAllowedChromaDir(0, modeList);<br><br>     // check chroma modes<br>@@ -2581,16 +2617,16 @@<br>     updateModeCost(interMode);<br> }<br><br>-void Search::residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2])<br>+void Search::residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2])<br> {<br>+    uint32_t depth = cuGeom.depth + tuDepth;<br>     CUData& cu = <a href="http://mode.cu" target="_blank">mode.cu</a>;<br>     X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "invalid depth\n");<br><br>     uint32_t log2TrSize = g_maxLog2CUSize - depth;<br>-    uint32_t tuDepth = depth - cu.m_cuDepth[0];<br><br>     bool bCheckFull = log2TrSize <= depthRange[1];<br>-    if (cu.m_partSize[0] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && log2TrSize > depthRange[0])<br>+    if (cu.m_partSize[0] != SIZE_2Nx2N && !tuDepth && log2TrSize > depthRange[0])<br>         bCheckFull = false;<br><br>     if (bCheckFull)<br>@@ -2611,7 +2647,7 @@<br>         uint32_t setCbf = 1 << tuDepth;<br><br>         uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);<br>-        coeff_t *coeffCurY = cu.m_trCoeff[0] + coeffOffsetY;<br>+        coeff_t* coeffCurY = cu.m_trCoeff[0] + coeffOffsetY;<br><br>         uint32_t sizeIdx  = log2TrSize  - 2;<br><br>@@ -2644,8 +2680,8 @@<br>             uint32_t strideResiC = resiYuv.m_csize;<br><br>             uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);<br>-            coeff_t *coeffCurU = cu.m_trCoeff[1] + coeffOffsetC;<br>-            coeff_t *coeffCurV = cu.m_trCoeff[2] + coeffOffsetC;<br>+            coeff_t* coeffCurU = cu.m_trCoeff[1] + coeffOffsetC;<br>+            coeff_t* coeffCurV = cu.m_trCoeff[2] + coeffOffsetC;<br>             bool splitIntoSubTUs = (m_csp == X265_CSP_I422);<br><br>             TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);<br>@@ -2702,16 +2738,16 @@<br>         uint32_t ycbf = 0, ucbf = 0, vcbf = 0;<br>         for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)<br>         {<br>-            residualTransformQuantInter(mode, cuGeom, qPartIdx, depth + 1, depthRange);<br>-            ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);<br>+            residualTransformQuantInter(mode, cuGeom, qPartIdx, tuDepth + 1, depthRange);<br>+            ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA,     tuDepth + 1);<br>             ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);<br>             vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);<br>         }<br>-        for (uint32_t i = 0; i < 4 * qNumParts; i++)<br>+        for (uint32_t i = 0; i < 4 * qNumParts; ++i)<br>         {<br>-            cu.m_cbf[TEXT_LUMA][absPartIdx + i] |= ycbf << tuDepth;<br>-            cu.m_cbf[TEXT_CHROMA_U][absPartIdx + i] |= ucbf << tuDepth;<br>-            cu.m_cbf[TEXT_CHROMA_V][absPartIdx + i] |= vcbf << tuDepth;<br>+            cu.m_cbf[0][absPartIdx + i] |= ycbf << tuDepth;<br>+            cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth;<br>+            cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth;<br>         }<br>     }<br> }<br>@@ -2769,7 +2805,7 @@<br><br>     uint32_t trSize = 1 << log2TrSize;<br>     const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);<br>-    uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] +  tuDepthC) << 1);<br>+    uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);<br>     const Yuv* fencYuv = mode.fencYuv;<br><br>     // code full block<br>@@ -3127,16 +3163,19 @@<br>         //Encode cbf flags<br>         if (bCodeChroma)<br>         {<br>-            for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)<br>+            if (!splitIntoSubTUs)<br>             {<br>-                if (!splitIntoSubTUs)<br>-                    m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], tuDepth);<br>-                else<br>-                {<br>-                    offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, absPartIdx);<br>-                    m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], tuDepth);<br>-                    m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][1], tuDepth);<br>-                }<br>+                m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][0], tuDepth);<br>+                m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][0], tuDepth);<br>+            }<br>+            else<br>+            {<br>+                offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);<br>+                offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);<br>+                m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][0], tuDepth);<br>+                m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][1], tuDepth);<br>+                m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][0], tuDepth);<br>+                m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][1], tuDepth);<br>             }<br>         }<br><br>diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/search.h<br>--- a/source/encoder/search.h   Sat Dec 20 21:27:14 2014 +0900<br>+++ b/source/encoder/search.h   Tue Dec 23 14:49:59 2014 +0900<br>@@ -178,9 +178,9 @@<br>     void     encodeResAndCalcRdSkipCU(Mode& interMode);<br><br>     // encode residual without rd-cost<br>-    void     residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2]);<br>-    void     residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t depthRange[2]);<br>-    void     residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx);<br>+    void     residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);<br>+    void     residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);<br>+    void     residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth);<br><br>     // pick be chroma mode from available using just sa8d costs<br>     void     getBestIntraModeChroma(Mode& intraMode, const CUGeom& cuGeom);<br>_______________________________________________<br>x265-devel mailing list<br><a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br><a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><u></u><u></u></span></p></div><p class="MsoNormal"><span lang="EN-US"><u></u> <u></u></span></p></div></div></div></div></div></div><br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div>