[x265] refine intra reference samples

Satoshi Nakagawa nakagawa424 at oki.com
Sat May 10 08:29:33 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1399703246 -32400
#      Sat May 10 15:27:26 2014 +0900
# Node ID dc0599b4da9e9b050bdceefb3c91418a6cfece64
# Parent  d0acf82a77f9ce2aaa08255f69dba0bceb9f4598
refine intra reference samples

diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/CommonDef.h
--- a/source/Lib/TLibCommon/CommonDef.h	Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/CommonDef.h	Sat May 10 15:27:26 2014 +0900
@@ -100,6 +100,7 @@
 
 #define FAST_UDI_MAX_RDMODE_NUM     35 ///< maximum number of RD comparison in fast-UDI estimation loop
 
+#define ALL_IDX                     -1
 #define PLANAR_IDX                  0
 #define VER_IDX                     26 // index for intra VERTICAL   mode
 #define HOR_IDX                     10 // index for intra HORIZONTAL mode
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp	Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPattern.cpp	Sat May 10 15:27:26 2014 +0900
@@ -38,6 +38,7 @@
 #include "TComPic.h"
 #include "TComPattern.h"
 #include "TComDataCU.h"
+#include "TComPrediction.h"
 
 using namespace x265;
 
@@ -49,116 +50,84 @@
 // ====================================================================================================================
 
 void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
-                                 int strideOrig, int heightOrig)
+                                 pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt, int dirMode)
 {
     pixel* roiOrigin;
     pixel* adiTemp;
-    uint32_t cuWidth = cu->getCUSize(0) >> partDepth;
-    uint32_t cuHeight = cu->getCUSize(0) >> partDepth;
-    uint32_t cuWidth2 = cuWidth << 1;
-    uint32_t cuHeight2 = cuHeight << 1;
 
-    uint32_t width;
-    uint32_t height;
-    int  picStride = cu->getPic()->getStride();
-    bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
-    int  numIntraNeighbor = 0;
+    int picStride = cu->getPic()->getStride();
 
-    uint32_t partIdxLT, partIdxRT, partIdxLB;
+    IntraNeighbors intraNeighbors;
 
-    cu->deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, zOrderIdxInPart, partDepth);
-
-    int  partIdxStride   = cu->getPic()->getNumPartInCUSize();
-    int  baseUnitSize    = g_maxCUSize >> g_maxCUDepth;
-    int  unitWidth       = baseUnitSize;
-    int  unitHeight      = baseUnitSize;
-    int  cuHeightInUnits = cuHeight / unitHeight;
-    int  cuWidthInUnits  = cuWidth / unitWidth;
-    int  iAboveUnits     = cuWidthInUnits << 1;
-    int  leftUnits       = cuHeightInUnits << 1;
-    partIdxLB            = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)];
-
-    if (!cu->getSlice()->getPPS()->getConstrainedIntraPred())
-    {
-        bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
-        numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
-        numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
-        numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
-        numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
-        numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
-    }
-    else
-    {
-        bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT);
-        numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
-        numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
-        numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
-        numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
-        numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
-    }
-
-    width = cuWidth2 + 1;
-    height = cuHeight2 + 1;
-
-    if (((width << 2) > strideOrig) || ((height << 2) > heightOrig))
-    {
-        return;
-    }
+    initIntraNeighbors(cu, zOrderIdxInPart, partDepth, TEXT_LUMA, &intraNeighbors);
+    uint32_t tuSize = intraNeighbors.tuSize;
+    uint32_t tuSize2 = tuSize << 1;
 
     roiOrigin = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
     adiTemp   = adiBuf;
 
-    fillReferenceSamples(roiOrigin, adiTemp, bNeighborFlags, numIntraNeighbor, unitWidth, unitHeight, iAboveUnits, leftUnits,
-                         cuWidth, cuHeight, width, height, picStride);
+    fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
 
-    // generate filtered intra prediction samples
-    // left and left above border + above and above right border + top left corner = length of 3. filter buffer
-    int bufSize = cuHeight2 + cuWidth2 + 1;
-    uint32_t wh = ADI_BUF_STRIDE * height;         // number of elements in one buffer
+    bool bUseFilteredPredictions = (dirMode == ALL_IDX || TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize));
 
-    pixel* filteredBuf1 = adiBuf + wh;         // 1. filter buffer
-    pixel* filteredBuf2 = filteredBuf1 + wh; // 2. filter buffer
-    pixel* filterBuf = filteredBuf2 + wh;    // buffer for 2. filtering (sequential)
-    pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
+    if (bUseFilteredPredictions && 8 <= tuSize && tuSize <= 32)
+    {
+        // generate filtered intra prediction samples
+        // left and left above border + above and above right border + top left corner = length of 3. filter buffer
+        int bufSize = tuSize2 + tuSize2 + 1;
+        uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1);         // number of elements in one buffer
 
-    int l = 0;
-    // left border from bottom to top
-    for (int i = 0; i < cuHeight2; i++)
-    {
-        filterBuf[l++] = adiTemp[ADI_BUF_STRIDE * (cuHeight2 - i)];
-    }
+        pixel* filterBuf  = adiBuf + wh;         // buffer for 2. filtering (sequential)
+        pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
 
-    // top left corner
-    filterBuf[l++] = adiTemp[0];
+        int l = 0;
+        // left border from bottom to top
+        for (int i = 0; i < tuSize2; i++)
+        {
+            filterBuf[l++] = adiTemp[ADI_BUF_STRIDE * (tuSize2 - i)];
+        }
 
-    // above border from left to right
-    memcpy(&filterBuf[l], &adiTemp[1], cuWidth2 * sizeof(*filterBuf));
+        // top left corner
+        filterBuf[l++] = adiTemp[0];
 
-    if (cu->getSlice()->getSPS()->getUseStrongIntraSmoothing())
-    {
-        int blkSize = 32;
-        int bottomLeft = filterBuf[0];
-        int topLeft = filterBuf[cuHeight2];
-        int topRight = filterBuf[bufSize - 1];
-        int threshold = 1 << (X265_DEPTH - 5);
-        bool bilinearLeft = abs(bottomLeft + topLeft - 2 * filterBuf[cuHeight]) < threshold;
-        bool bilinearAbove  = abs(topLeft + topRight - 2 * filterBuf[cuHeight2 + cuHeight]) < threshold;
+        // above border from left to right
+        memcpy(&filterBuf[l], &adiTemp[1], tuSize2 * sizeof(*filterBuf));
 
-        if (cuWidth >= blkSize && (bilinearLeft && bilinearAbove))
+        if (tuSize >= 32 && cu->getSlice()->getSPS()->getUseStrongIntraSmoothing())
         {
-            int shift = g_convertToBit[cuWidth] + 3; // log2(uiCuHeight2)
-            filterBufN[0] = filterBuf[0];
-            filterBufN[cuHeight2] = filterBuf[cuHeight2];
-            filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
-            //TODO: Performance Primitive???
-            for (int i = 1; i < cuHeight2; i++)
+            int bottomLeft = filterBuf[0];
+            int topLeft = filterBuf[tuSize2];
+            int topRight = filterBuf[bufSize - 1];
+            int threshold = 1 << (X265_DEPTH - 5);
+            bool bilinearLeft = abs(bottomLeft + topLeft - 2 * filterBuf[tuSize]) < threshold;
+            bool bilinearAbove  = abs(topLeft + topRight - 2 * filterBuf[tuSize2 + tuSize]) < threshold;
+
+            if (bilinearLeft && bilinearAbove)
             {
-                filterBufN[i] = ((cuHeight2 - i) * bottomLeft + i * topLeft + cuHeight) >> shift;
+                int shift = g_convertToBit[tuSize] + 3; // log2(tuSize2)
+                filterBufN[0] = filterBuf[0];
+                filterBufN[tuSize2] = filterBuf[tuSize2];
+                filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
+                //TODO: Performance Primitive???
+                for (int i = 1; i < tuSize2; i++)
+                {
+                    filterBufN[i] = ((tuSize2 - i) * bottomLeft + i * topLeft + tuSize) >> shift;
+                }
+
+                for (int i = 1; i < tuSize2; i++)
+                {
+                    filterBufN[tuSize2 + i] = ((tuSize2 - i) * topLeft + i * topRight + tuSize) >> shift;
+                }
             }
-
-            for (int i = 1; i < cuWidth2; i++)
+            else
             {
-                filterBufN[cuHeight2 + i] = ((cuWidth2 - i) * topLeft + i * topRight + cuWidth) >> shift;
+                // 1. filtering with [1 2 1]
+                filterBufN[0] = filterBuf[0];
+                filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
+                for (int i = 1; i < bufSize - 1; i++)
+                {
+                    filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
+                }
             }
         }
         else
@@ -171,165 +140,151 @@
                 filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
             }
         }
-    }
-    else
-    {
-        // 1. filtering with [1 2 1]
-        filterBufN[0] = filterBuf[0];
-        filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
-        for (int i = 1; i < bufSize - 1; i++)
+
+        // initialization of ADI buffers
+        refAboveFlt += tuSize - 1;
+        refLeftFlt += tuSize - 1;
+        memcpy(refAboveFlt, filterBufN + tuSize2, (tuSize2 + 1) * sizeof(pixel));
+        for (int k = 0; k < tuSize2 + 1; k++)
         {
-            filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
+            refLeftFlt[k] = filterBufN[tuSize2 - k];   // Smoothened
         }
     }
 
-    // fill 1. filter buffer with filtered values
-    l = 0;
-    for (int i = 0; i < cuHeight2; i++)
-    {
-        filteredBuf1[ADI_BUF_STRIDE * (cuHeight2 - i)] = filterBufN[l++];
-    }
+    // initialization of ADI buffers
+    refAbove += tuSize - 1;
+    refLeft += tuSize - 1;
 
-    filteredBuf1[0] = filterBufN[l++];
-    memcpy(&filteredBuf1[1], &filterBufN[l], cuWidth2 * sizeof(*filteredBuf1));
-}
-
-// Overloaded initialization of ADI buffers to support buffered references for xpredIntraAngBufRef
-void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int strideOrig, int heightOrig,
-                                 pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt)
-{
-    initAdiPattern(cu, zOrderIdxInPart, partDepth, adiBuf, strideOrig, heightOrig);
-    uint32_t cuWidth   = cu->getCUSize(0) >> partDepth;
-    uint32_t cuHeight  = cu->getCUSize(0) >> partDepth;
-    uint32_t cuWidth2  = cuWidth << 1;
-    uint32_t cuHeight2 = cuHeight << 1;
-
-    refAbove += cuWidth - 1;
-    refAboveFlt += cuWidth - 1;
-    refLeft += cuWidth - 1;
-    refLeftFlt += cuWidth - 1;
-
-    //  ADI_BUF_STRIDE * (2 * height + 1);
-    memcpy(refAbove, adiBuf, (cuWidth2 + 1) * sizeof(pixel));
-    memcpy(refAboveFlt, adiBuf + ADI_BUF_STRIDE * (2 * cuHeight + 1), (cuWidth2 + 1) * sizeof(pixel));
-
-    for (int k = 0; k < cuHeight2 + 1; k++)
+    //  ADI_BUF_STRIDE * (2 * tuSize + 1);
+    memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
+    for (int k = 0; k < tuSize2 + 1; k++)
     {
         refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
-        refLeftFlt[k] = (adiBuf + ADI_BUF_STRIDE * (cuHeight2 + 1))[k * ADI_BUF_STRIDE];   // Smoothened
     }
 }
 
-void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int strideOrig, int heightOrig, int chromaId)
+void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int chromaId)
 {
     pixel*  roiOrigin;
     pixel*  adiTemp;
-    uint32_t  cuWidth  = cu->getCUSize(0) >> (partDepth + cu->getHorzChromaShift());
-    uint32_t  cuHeight = cu->getCUSize(0) >> (partDepth + cu->getVertChromaShift());
 
-    cuHeight = (cuWidth != cuHeight) ? cuHeight >> 1 : cuHeight;
+    int picStride = cu->getPic()->getCStride();
 
-    uint32_t  width;
-    uint32_t  height;
-    int   picStride = cu->getPic()->getCStride();
+    IntraNeighbors intraNeighbors;
 
-    bool  bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
+    initIntraNeighbors(cu, zOrderIdxInPart, partDepth, TEXT_CHROMA, &intraNeighbors);
+    uint32_t tuSize = intraNeighbors.tuSize;
+
+    roiOrigin = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
+    adiTemp   = getAdiChromaBuf(chromaId, tuSize, adiBuf);
+
+    fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
+}
+
+void TComPattern::initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *intraNeighbors)
+{
+    uint32_t tuSize  = cu->getCUSize(0) >> partDepth;
+    int baseUnitSize = g_maxCUSize >> g_maxCUDepth;
+    int unitWidth    = baseUnitSize;
+    int unitHeight   = baseUnitSize;
+
+    if (cType != TEXT_LUMA)
+    {
+        tuSize     >>= cu->getHorzChromaShift();
+        unitWidth  >>= cu->getHorzChromaShift();
+        unitHeight >>= cu->getVertChromaShift();
+    }
+
     int   numIntraNeighbor = 0;
+    bool *bNeighborFlags = intraNeighbors->bNeighborFlags;
 
     uint32_t partIdxLT, partIdxRT, partIdxLB;
 
     cu->deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, zOrderIdxInPart, partDepth);
 
     int  partIdxStride   = cu->getPic()->getNumPartInCUSize();
-    int  baseUnitSize    = g_maxCUSize >> g_maxCUDepth;
-    int  unitWidth       = baseUnitSize  >> cu->getHorzChromaShift();
-    int  unitHeight      = baseUnitSize  >> cu->getVertChromaShift();
-    int  cuHeightInUnits = cuHeight / unitHeight;
-    int  cuWidthInUnits  = cuWidth  / unitWidth;
-    int  aboveUnits      = cuWidthInUnits << 1;
-    int  leftUnits       = cuHeightInUnits << 1;
-    partIdxLB            = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)];
+    int  tuHeightInUnits = tuSize / unitHeight;
+    int  tuWidthInUnits  = tuSize / unitWidth;
+    int  aboveUnits      = tuWidthInUnits << 1;
+    int  leftUnits       = tuHeightInUnits << 1;
+    partIdxLB            = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
 
     if (!cu->getSlice()->getPPS()->getConstrainedIntraPred())
     {
         bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
         numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
         numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
-        numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
+        numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
         numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
-        numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
+        numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits   - 1 - tuHeightInUnits));
     }
     else
     {
         bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT);
         numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
         numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
-        numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
+        numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
         numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
-        numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits   - 1 - cuHeightInUnits));
+        numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits   - 1 - tuHeightInUnits));
     }
-
-    width = cuWidth * 2 + 1;
-    height = cuHeight * 2 + 1;
-
-    if ((4 * width > strideOrig) || (4 * height > heightOrig))
-    {
-        return;
-    }
-    roiOrigin = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
-    adiTemp   = (chromaId == 1) ? adiBuf : (adiBuf + 2 * ADI_BUF_STRIDE * height);
-
-    fillReferenceSamples(roiOrigin, adiTemp, bNeighborFlags, numIntraNeighbor, unitWidth, unitHeight, aboveUnits, leftUnits,
-                         cuWidth, cuHeight, width, height, picStride);
+    intraNeighbors->numIntraNeighbor = numIntraNeighbor;
+    intraNeighbors->totalUnits       = aboveUnits + leftUnits + 1;
+    intraNeighbors->aboveUnits       = aboveUnits;
+    intraNeighbors->leftUnits        = leftUnits;
+    intraNeighbors->tuSize           = tuSize;
+    intraNeighbors->unitWidth        = unitWidth;
+    intraNeighbors->unitHeight       = unitHeight;
 }
 
-void TComPattern::fillReferenceSamples(pixel* roiOrigin, pixel* adiTemp, bool* bNeighborFlags, int numIntraNeighbor, int unitWidth, int unitHeight, int aboveUnits, int leftUnits, uint32_t cuWidth, uint32_t cuHeight, uint32_t width, uint32_t height, int picStride)
+void TComPattern::fillReferenceSamples(pixel* roiOrigin, int picStride, pixel* adiTemp, const IntraNeighbors& intraNeighbors)
 {
+    int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
+    int totalUnits       = intraNeighbors.totalUnits;
+    uint32_t tuSize      = intraNeighbors.tuSize;
+
+    uint32_t refSize = tuSize * 2 + 1;
     pixel* roiTemp;
     int  i, j;
     int  dcValue = 1 << (X265_DEPTH - 1);
-    int  totalUnits = aboveUnits + leftUnits + 1;
 
     if (numIntraNeighbor == 0)
     {
         // Fill border with DC value
-        for (i = 0; i < width; i++)
+        for (i = 0; i < refSize; i++)
         {
             adiTemp[i] = dcValue;
         }
 
-        for (i = 1; i < height; i++)
+        for (i = 1; i < refSize; i++)
         {
             adiTemp[i * ADI_BUF_STRIDE] = dcValue;
         }
     }
     else if (numIntraNeighbor == totalUnits)
     {
-        // Fill top-left border with rec. samples
+        // Fill top border with rec. samples
         roiTemp = roiOrigin - picStride - 1;
-        adiTemp[0] = roiTemp[0];
+        memcpy(adiTemp, roiTemp, refSize * sizeof(*adiTemp));
 
         // Fill left border with rec. samples
-        // Fill below left border with rec. samples
         roiTemp = roiOrigin - 1;
-
-        for (i = 0; i < 2 * cuHeight; i++)
+        for (i = 1; i < refSize; i++)
         {
-            adiTemp[(1 + i) * ADI_BUF_STRIDE] = roiTemp[0];
+            adiTemp[i * ADI_BUF_STRIDE] = roiTemp[0];
             roiTemp += picStride;
         }
-
-        // Fill top border with rec. samples
-        // Fill top right border with rec. samples
-        roiTemp = roiOrigin - picStride;
-        memcpy(&adiTemp[1], roiTemp, 2 * cuWidth * sizeof(*adiTemp));
     }
     else // reference samples are partially available
     {
+        const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
+        int aboveUnits       = intraNeighbors.aboveUnits;
+        int leftUnits        = intraNeighbors.leftUnits;
+        int unitWidth        = intraNeighbors.unitWidth;
+        int unitHeight       = intraNeighbors.unitHeight;
         int  totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
         pixel pAdiLine[5 * MAX_CU_SIZE];
         pixel *pAdiLineTemp;
-        bool  *pNeighborFlags;
+        const bool  *pNeighborFlags;
         int   next, curr;
 
         // Initialize
@@ -449,11 +404,11 @@
         }
 
         // Copy processed samples
-        pAdiLineTemp = pAdiLine + height + unitWidth - 2;
-        memcpy(adiTemp, pAdiLineTemp, width * sizeof(*adiTemp));
+        pAdiLineTemp = pAdiLine + refSize + unitWidth - 2;
+        memcpy(adiTemp, pAdiLineTemp, refSize * sizeof(*adiTemp));
 
-        pAdiLineTemp = pAdiLine + height - 1;
-        for (i = 1; i < height; i++)
+        pAdiLineTemp = pAdiLine + refSize - 1;
+        for (i = 1; i < refSize; i++)
         {
             adiTemp[i * ADI_BUF_STRIDE] = pAdiLineTemp[-i];
         }
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPattern.h
--- a/source/Lib/TLibCommon/TComPattern.h	Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPattern.h	Sat May 10 15:27:26 2014 +0900
@@ -52,15 +52,27 @@
 
 class TComDataCU;
 
+struct IntraNeighbors
+{
+    int  numIntraNeighbor;
+    int  totalUnits;
+    int  aboveUnits;
+    int  leftUnits;
+    int  tuSize;
+    int  unitWidth;
+    int  unitHeight;
+    bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
+};
+
 /// neighboring pixel access class for all components
 class TComPattern
 {
 public:
 
     // access functions of ADI buffers
-    static pixel* getAdiChromaBuf(int chromaId, int cuHeight, pixel* adiBuf)
+    static pixel* getAdiChromaBuf(int chromaId, int tuSize, pixel* adiBuf)
     {
-        return adiBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (cuHeight * 2 + 1));
+        return adiBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (tuSize * 2 + 1));
     }
 
     // -------------------------------------------------------------------------------------------------------------------
@@ -69,21 +81,19 @@
 
     /// set parameters from pixel buffers for accessing neighboring pixels
     static void initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
-                               int strideOrig, int heightOrig, pixel* refAbove, pixel* refLeft,
-                               pixel* refAboveFlt, pixel* refLeftFlt);
-
-    /// set luma parameters from CU data for accessing ADI data
-    static void  initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
-                                int strideOrig, int heightOrig);
+                               pixel* refAbove, pixel* refLeft,
+                               pixel* refAboveFlt, pixel* refLeftFlt, int dirMode);
 
     /// set chroma parameters from CU data for accessing ADI data
-    static void  initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
-                                      pixel* adiBuf, int strideOrig, int heightOrig, int chromaId);
+    static void initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
+                                     pixel* adiBuf, int chromaId);
+
+    static void initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *IntraNeighbors);
 
 private:
 
     /// padding of unavailable reference samples for intra prediction
-    static void fillReferenceSamples(pixel* roiOrigin, pixel* adiTemp, bool* bNeighborFlags, int numIntraNeighbor, int unitWidth, int unitHeight, int aboveUnits, int leftUnits, uint32_t cuWidth, uint32_t cuHeight, uint32_t width, uint32_t height, int picStride);
+    static void fillReferenceSamples(pixel* roiOrigin, int picStride, pixel* adiTemp, const IntraNeighbors& intraNeighbors);
 
     /// constrained intra prediction
     static bool  isAboveLeftAvailable(TComDataCU* cu, uint32_t partIdxLT);
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp	Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPrediction.cpp	Sat May 10 15:27:26 2014 +0900
@@ -90,9 +90,9 @@
 
     if (m_predBuf == NULL)
     {
-        m_predBufHeight = ((MAX_CU_SIZE + 2) << 4);
-        m_predBufStride = ((MAX_CU_SIZE + 8) << 4);
-        m_predBuf = X265_MALLOC(pixel, m_predBufStride * m_predBufHeight);
+        int predBufHeight = ((MAX_CU_SIZE + 2) << 4);
+        int predBufStride = ((MAX_CU_SIZE + 8) << 4);
+        m_predBuf = X265_MALLOC(pixel, predBufStride * predBufHeight);
 
         m_refAbove = X265_MALLOC(pixel, 3 * MAX_CU_SIZE);
         m_refAboveFlt = X265_MALLOC(pixel, 3 * MAX_CU_SIZE);
@@ -113,7 +113,7 @@
 // Public member functions
 // ====================================================================================================================
 
-bool TComPrediction::filteringIntraReferenceSamples(uint32_t dirMode, uint32_t width)
+bool TComPrediction::filteringIntraReferenceSamples(uint32_t dirMode, uint32_t tuSize)
 {
     bool bFilter;
 
@@ -124,39 +124,38 @@
     else
     {
         int diff = std::min<int>(abs((int)dirMode - HOR_IDX), abs((int)dirMode - VER_IDX));
-        uint32_t sizeIndex = g_convertToBit[width];
+        uint32_t sizeIndex = g_convertToBit[tuSize];
         bFilter = diff > intraFilterThreshold[sizeIndex];
     }
 
     return bFilter;
 }
 
-void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, int width)
+void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize)
 {
-    assert(width >= 4 && width <= 64);
-    int log2BlkSize = g_convertToBit[width];
-    bool bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, width);
+    assert(tuSize >= 4 && tuSize <= 64);
+    int log2BlkSize = g_convertToBit[tuSize];
+    bool bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
 
     pixel *refLft, *refAbv;
-    refLft = m_refLeft + width - 1;
-    refAbv = m_refAbove + width - 1;
+    refLft = m_refLeft + tuSize - 1;
+    refAbv = m_refAbove + tuSize - 1;
 
-    pixel *src = m_predBuf;
     if (bUseFilteredPredictions)
     {
-        src += ADI_BUF_STRIDE * (2 * width + 1);
-        refLft = m_refLeftFlt + width - 1;
-        refAbv = m_refAboveFlt + width - 1;
+        refLft = m_refLeftFlt + tuSize - 1;
+        refAbv = m_refAboveFlt + tuSize - 1;
     }
 
-    bool bFilter = width <= 16 && dirMode != PLANAR_IDX;
+    bool bFilter = tuSize <= 16 && dirMode != PLANAR_IDX;
     primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
 }
 
 // Angular chroma
-void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, int width, int height, int chFmt)
+void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize, int chFmt)
 {
-    int log2BlkSize = g_convertToBit[width];
+    int log2BlkSize = g_convertToBit[tuSize];
+    uint32_t tuSize2 = tuSize << 1;
 
     // Create the prediction
     pixel refAbv[3 * MAX_CU_SIZE];
@@ -170,36 +169,32 @@
     }
     else
     {
-        assert(width >= 4 && height >= 4 && width < 128 && height < 128);
-        bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, width);
+        assert(tuSize >= 4 && tuSize < 128);
+        bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
     }
 
     if (bUseFilteredPredictions)
     {
-        uint32_t cuWidth2  = width << 1;
-        uint32_t cuHeight2 = height << 1;
         // generate filtered intra prediction samples
         // left and left above border + above and above right border + top left corner = length of 3. filter buffer
-        int bufSize = cuHeight2 + cuWidth2 + 1;
-        uint32_t wh = ADI_BUF_STRIDE * height;         // number of elements in one buffer
+        int bufSize = tuSize2 + tuSize2 + 1;
+        uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1);         // number of elements in one buffer
 
-        pixel* filteredBuf1 = src + wh;             // 1. filter buffer
-        pixel* filteredBuf2 = filteredBuf1 + wh;    // 2. filter buffer
-        pixel* filterBuf    = filteredBuf2 + wh;    // buffer for 2. filtering (sequential)
-        pixel* filterBufN   = filterBuf + bufSize;  // buffer for 1. filtering (sequential)
+        pixel* filterBuf  = src + wh;            // buffer for 2. filtering (sequential)
+        pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
 
         int l = 0;
         // left border from bottom to top
-        for (int i = 0; i < cuHeight2; i++)
+        for (int i = 0; i < tuSize2; i++)
         {
-            filterBuf[l++] = src[ADI_BUF_STRIDE * (cuHeight2 - i)];
+            filterBuf[l++] = src[ADI_BUF_STRIDE * (tuSize2 - i)];
         }
 
         // top left corner
         filterBuf[l++] = src[0];
 
         // above border from left to right
-        memcpy(&filterBuf[l], &src[1], cuWidth2 * sizeof(*filterBuf));
+        memcpy(&filterBuf[l], &src[1], tuSize2 * sizeof(*filterBuf));
 
         // 1. filtering with [1 2 1]
         filterBufN[0] = filterBuf[0];
@@ -209,35 +204,25 @@
             filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
         }
 
-        // fill 1. filter buffer with filtered values
-        l = 0;
-        for (int i = 0; i < cuHeight2; i++)
-        {
-            filteredBuf1[ADI_BUF_STRIDE * (cuHeight2 - i)] = filterBufN[l++];
-        }
-
-        filteredBuf1[0] = filterBufN[l++];
-        memcpy(&filteredBuf1[1], &filterBufN[l], cuWidth2 * sizeof(*filteredBuf1));
-
-        int limit = (2 * width + 1);
-        src += wh;
-        memcpy(refAbv + width - 1, src, (limit) * sizeof(pixel));
+        // initialization of ADI buffers
+        int limit = tuSize2 + 1;
+        memcpy(refAbv + tuSize - 1, filterBufN + tuSize2, limit * sizeof(pixel));
         for (int k = 0; k < limit; k++)
         {
-            refLft[k + width - 1] = src[k * ADI_BUF_STRIDE];
+            refLft[k + tuSize - 1] = filterBufN[tuSize2 - k];   // Smoothened
         }
     }
     else
     {
-        int limit = (dirMode <= 25 && dirMode >= 11) ? (width + 1 + 1) : (2 * width + 1);
-        memcpy(refAbv + width - 1, src, (limit) * sizeof(pixel));
+        int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
+        memcpy(refAbv + tuSize - 1, src, (limit) * sizeof(pixel));
         for (int k = 0; k < limit; k++)
         {
-            refLft[k + width - 1] = src[k * ADI_BUF_STRIDE];
+            refLft[k + tuSize - 1] = src[k * ADI_BUF_STRIDE];
         }
     }
 
-    primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft + width - 1, refAbv + width - 1, dirMode, 0);
+    primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
 }
 
 /** Function for checking identical motion.
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPrediction.h
--- a/source/Lib/TLibCommon/TComPrediction.h	Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPrediction.h	Sat May 10 15:27:26 2014 +0900
@@ -93,8 +93,6 @@
     pixel*    m_refAboveFlt;
     pixel*    m_refLeft;
     pixel*    m_refLeftFlt;
-    int       m_predBufStride;
-    int       m_predBufHeight;
 
     TComPrediction();
     virtual ~TComPrediction();
@@ -105,9 +103,9 @@
     void motionCompensation(TComDataCU* cu, TComYuv* predYuv, int picList = REF_PIC_LIST_X, int partIdx = -1, bool bLuma = true, bool bChroma = true);
 
     // Angular Intra
-    void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, int width);
-    void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, int width, int height, int chFmt);
-    bool filteringIntraReferenceSamples(uint32_t dirMode, uint32_t width);
+    void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, int tuSize);
+    void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, int tuSize, int chFmt);
+    static bool filteringIntraReferenceSamples(uint32_t dirMode, uint32_t tuSize);
 };
 }
 //! \}
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Sat May 10 15:27:26 2014 +0900
@@ -396,13 +396,12 @@
                                      bool        bReusePred)
 {
     uint32_t fullDepth    = cu->getDepth(0)  + trDepth;
-    uint32_t width        = cu->getCUSize(0) >> trDepth;
-    uint32_t height       = cu->getCUSize(0) >> trDepth;
+    uint32_t tuSize       = cu->getCUSize(0) >> trDepth;
     uint32_t stride       = fencYuv->getStride();
     pixel*   fenc         = fencYuv->getLumaAddr(absPartIdx);
     pixel*   pred         = predYuv->getLumaAddr(absPartIdx);
     int16_t* residual     = resiYuv->getLumaAddr(absPartIdx);
-    int      part         = partitionFromSizes(width, height);
+    int      part         = partitionFromSizes(tuSize, tuSize);
 
     uint32_t trSizeLog2     = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
     uint32_t qtLayer        = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
@@ -421,23 +420,23 @@
     if (!bReusePred)
     {
         //===== init availability pattern =====
-        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt);
         uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
+        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
         //===== get prediction signal =====
-        predIntraLumaAng(lumaPredMode, pred, stride, width);
+        predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
     }
 
     //===== get residual signal =====
-    assert(!((uint32_t)(size_t)fenc & (width - 1)));
-    assert(!((uint32_t)(size_t)pred & (width - 1)));
-    assert(!((uint32_t)(size_t)residual & (width - 1)));
-    primitives.calcresidual[(int)g_convertToBit[width]](fenc, pred, residual, stride);
+    assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+    assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+    assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+    primitives.calcresidual[(int)g_convertToBit[tuSize]](fenc, pred, residual, stride);
 
     //===== transform and quantization =====
     //--- init rate estimation arrays for RDOQ ---
     if (useTransformSkip ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
     {
-        m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, width, TEXT_LUMA);
+        m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, TEXT_LUMA);
     }
 
     //--- transform and quantization ---
@@ -449,27 +448,27 @@
     m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
     m_trQuant->selectLambda(TEXT_LUMA);
 
-    absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
+    absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
 
     //--- set coded block flag ---
     cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
 
     //--- inverse transform ---
-    int size = g_convertToBit[width];
+    int size = g_convertToBit[tuSize];
     if (absSum)
     {
         int scalingListType = 0 + TEXT_LUMA;
         assert(scalingListType < 6);
-        m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, width, scalingListType, useTransformSkip, lastPos);
+        m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
     }
     else
     {
         int16_t* resiTmp = residual;
-        memset(coeff, 0, sizeof(coeff_t) * width * height);
+        memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
         primitives.blockfill_s[size](resiTmp, stride, 0);
     }
 
-    assert(width <= 32);
+    assert(tuSize <= 32);
     //===== reconstruction =====
     primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
     //===== update distortion =====
@@ -507,8 +506,7 @@
     }
 
     TextType ttype          = (chromaId == 1) ? TEXT_CHROMA_U : TEXT_CHROMA_V;
-    uint32_t width          = cu->getCUSize(absPartIdx)  >> (trDepth + m_hChromaShift);
-    uint32_t height         = width;
+    uint32_t tuSize         = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
     uint32_t stride         = fencYuv->getCStride();
     pixel*   fenc           = (chromaId == 1) ? fencYuv->getCbAddr(absPartIdx) : fencYuv->getCrAddr(absPartIdx);
     pixel*   pred           = (chromaId == 1) ? predYuv->getCbAddr(absPartIdx) : predYuv->getCrAddr(absPartIdx);
@@ -523,13 +521,13 @@
     pixel*   reconIPred       = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
     uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
     bool     useTransformSkipChroma = cu->getTransformSkip(absPartIdx, ttype);
-    int      part = partitionFromSizes(width, height);
+    int      part = partitionFromSizes(tuSize, tuSize);
 
     if (!bReusePred)
     {
         //===== init availability pattern =====
-        TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
-        pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, height, m_predBuf);
+        TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, chromaId);
+        pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
 
         uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);
         //===== update chroma mode =====
@@ -540,14 +538,14 @@
         }
         chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
         //===== get prediction signal =====
-        predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, chFmt);
+        predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
     }
 
     //===== get residual signal =====
-    assert(!((uint32_t)(size_t)fenc & (width - 1)));
-    assert(!((uint32_t)(size_t)pred & (width - 1)));
-    assert(!((uint32_t)(size_t)residual & (width - 1)));
-    int size = g_convertToBit[width];
+    assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+    assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+    assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+    int size = g_convertToBit[tuSize];
     primitives.calcresidual[size](fenc, pred, residual, stride);
 
     //===== transform and quantization =====
@@ -555,7 +553,7 @@
         //--- init rate estimation arrays for RDOQ ---
         if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
         {
-            m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, width, ttype);
+            m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);
         }
         //--- transform and quantization ---
         uint32_t absSum = 0;
@@ -574,7 +572,7 @@
 
         m_trQuant->selectLambda(TEXT_CHROMA);
 
-        absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
+        absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
 
         //--- set coded block flag ---
         cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
@@ -584,18 +582,18 @@
         {
             int scalingListType = 0 + ttype;
             assert(scalingListType < 6);
-            m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, width, scalingListType, useTransformSkipChroma, lastPos);
+            m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
         }
         else
         {
             int16_t* resiTmp = residual;
-            memset(coeff, 0, sizeof(coeff_t) * width * height);
+            memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
             primitives.blockfill_s[size](resiTmp, stride, 0);
         }
     }
 
-    assert(((intptr_t)residual & (width - 1)) == 0);
-    assert(width <= 32);
+    assert(((intptr_t)residual & (tuSize - 1)) == 0);
+    assert(tuSize <= 32);
     //===== reconstruction =====
     primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
     //===== update distortion =====
@@ -852,8 +850,7 @@
 
         //----- code luma block with given intra prediction mode and store Cbf-----
         uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
-        uint32_t width        = cu->getCUSize(0) >> trDepth;
-        uint32_t height       = cu->getCUSize(0) >> trDepth;
+        uint32_t tuSize       = cu->getCUSize(0) >> trDepth;
         int      chFmt        = cu->getChromaFormat();
         uint32_t stride       = fencYuv->getStride();
         pixel*   fenc         = fencYuv->getLumaAddr(absPartIdx);
@@ -871,16 +868,15 @@
         bool     useTransformSkip = cu->getTransformSkip(absPartIdx, TEXT_LUMA);
 
         //===== init availability pattern =====
-
-        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt);
+        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
         //===== get prediction signal =====
-        predIntraLumaAng(lumaPredMode, pred, stride, width);
+        predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
 
         //===== get residual signal =====
-        assert(!((uint32_t)(size_t)fenc & (width - 1)));
-        assert(!((uint32_t)(size_t)pred & (width - 1)));
-        assert(!((uint32_t)(size_t)residual & (width - 1)));
-        primitives.calcresidual[(int)g_convertToBit[width]](fenc, pred, residual, stride);
+        assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+        assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+        assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+        primitives.calcresidual[(int)g_convertToBit[tuSize]](fenc, pred, residual, stride);
 
         //===== transform and quantization =====
         uint32_t absSum = 0;
@@ -889,31 +885,31 @@
 
         m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
         m_trQuant->selectLambda(TEXT_LUMA);
-        absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
+        absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
 
         //--- set coded block flag ---
         cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
 
         //--- inverse transform ---
-        int size = g_convertToBit[width];
+        int size = g_convertToBit[tuSize];
         if (absSum)
         {
             int scalingListType = 0 + TEXT_LUMA;
             assert(scalingListType < 6);
-            m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, width, scalingListType, useTransformSkip, lastPos);
+            m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
         }
         else
         {
             int16_t* resiTmp = residual;
-            memset(coeff, 0, sizeof(coeff_t) * width * height);
+            memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
             primitives.blockfill_s[size](resiTmp, stride, 0);
         }
 
         //Generate Recon
-        assert(width <= 32);
-        int part = partitionFromSizes(width, height);
+        assert(tuSize <= 32);
+        int part = partitionFromSizes(tuSize, tuSize);
         primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
-        primitives.blockcpy_pp(width, height, reconIPred, reconIPredStride, recon, stride);
+        primitives.blockcpy_pp(tuSize, tuSize, reconIPred, reconIPredStride, recon, stride);
     }
 
     if (bCheckSplit && !bCheckFull)
@@ -1417,8 +1413,7 @@
             }
         }
 
-        uint32_t width  = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
-        uint32_t height = width;
+        uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
         uint32_t stride = fencYuv->getCStride();
         const bool splitIntoSubTUs = (chFmt == CHROMA_422);
 
@@ -1455,17 +1450,17 @@
                 }
                 chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
                 //===== init availability pattern =====
-                TComPattern::initAdiPatternChroma(cu, absTUPartIdxC, actualTrDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
-                pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, height, m_predBuf);
+                TComPattern::initAdiPatternChroma(cu, absTUPartIdxC, actualTrDepth, m_predBuf, chromaId);
+                pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
 
                 //===== get prediction signal =====
-                predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, chFmt);
+                predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
 
                 //===== get residual signal =====
-                assert(!((uint32_t)(size_t)fenc & (width - 1)));
-                assert(!((uint32_t)(size_t)pred & (width - 1)));
-                assert(!((uint32_t)(size_t)residual & (width - 1)));
-                int size = g_convertToBit[width];
+                assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+                assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+                assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+                int size = g_convertToBit[tuSize];
                 primitives.calcresidual[size](fenc, pred, residual, stride);
 
                 //--- transform and quantization ---
@@ -1485,7 +1480,7 @@
 
                 m_trQuant->selectLambda(TEXT_CHROMA);
 
-                absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, ttype, absTUPartIdxC, &lastPos, useTransformSkipChroma);
+                absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absTUPartIdxC, &lastPos, useTransformSkipChroma);
 
                 //--- set coded block flag ---
                 cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absTUPartIdxC, tuIterator.m_absPartIdxStep);
@@ -1495,21 +1490,21 @@
                 {
                     int scalingListType = 0 + ttype;
                     assert(scalingListType < 6);
-                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC), REG_DCT, residual, stride, coeff, width, scalingListType, useTransformSkipChroma, lastPos);
+                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
                 }
                 else
                 {
                     int16_t* resiTmp = residual;
-                    memset(coeff, 0, sizeof(coeff_t) * width * height);
+                    memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
                     primitives.blockfill_s[size](resiTmp, stride, 0);
                 }
 
                 //===== reconstruction =====
-                assert(((intptr_t)residual & (width - 1)) == 0);
-                assert(width <= 32);
+                assert(((intptr_t)residual & (tuSize - 1)) == 0);
+                assert(tuSize <= 32);
 
                 // use square primitive
-                int part = partitionFromSizes(width, width);
+                int part = partitionFromSizes(tuSize, tuSize);
                 primitives.chroma[CHROMA_444].add_ps[part](recon, stride, pred, residual, stride, stride);
                 primitives.chroma[CHROMA_444].copy_pp[part](reconIPred, reconIPredStride, recon, stride);
             }
@@ -1547,13 +1542,13 @@
     uint32_t depth        = cu->getDepth(0);
     uint32_t initTrDepth  = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
     uint32_t numPU        = 1 << (2 * initTrDepth);
-    uint32_t puSize       = cu->getCUSize(0) >> initTrDepth;
+    uint32_t tuSize       = cu->getCUSize(0) >> initTrDepth;
     uint32_t qNumParts    = cu->getTotalNumPart() >> 2;
     uint32_t qPartNum     = cu->getPic()->getNumPartInCU() >> ((depth + initTrDepth) << 1);
     uint32_t overallDistY = 0;
     uint32_t candNum;
     uint64_t candCostList[FAST_UDI_MAX_RDMODE_NUM];
-    uint32_t puSizeIdx    = g_convertToBit[puSize]; // log2(puSize) - 2
+    uint32_t tuSizeIdx    = g_convertToBit[tuSize]; // log2(tuSize) - 2
     static const uint8_t intraModeNumFast[] = { 8, 8, 3, 3, 3 }; // 4x4, 8x8, 16x16, 32x32, 64x64
 
     //===== loop over partitions =====
@@ -1562,14 +1557,14 @@
     for (uint32_t pu = 0; pu < numPU; pu++, partOffset += qNumParts)
     {
         // Reference sample smoothing
-        TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_predBufStride, m_predBufHeight, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt);
+        TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX);
 
         //===== determine set of modes to be tested (using prediction signal only) =====
         const int numModesAvailable = 35; //total number of Intra modes
-        pixel*   fenc   = fencYuv->getLumaAddr(pu, puSize);
+        pixel*   fenc   = fencYuv->getLumaAddr(pu, tuSize);
         uint32_t stride = predYuv->getStride();
         uint32_t rdModeList[FAST_UDI_MAX_RDMODE_NUM];
-        int numModesForFullRD = intraModeNumFast[puSizeIdx];
+        int numModesForFullRD = intraModeNumFast[tuSizeIdx];
 
         bool doFastSearch = (numModesForFullRD != numModesAvailable);
         if (doFastSearch)
@@ -1584,10 +1579,10 @@
             candNum = 0;
             uint32_t modeCosts[35];
 
-            pixel *above         = m_refAbove    + puSize - 1;
-            pixel *aboveFiltered = m_refAboveFlt + puSize - 1;
-            pixel *left          = m_refLeft     + puSize - 1;
-            pixel *leftFiltered  = m_refLeftFlt  + puSize - 1;
+            pixel *above         = m_refAbove    + tuSize - 1;
+            pixel *aboveFiltered = m_refAboveFlt + tuSize - 1;
+            pixel *left          = m_refLeft     + tuSize - 1;
+            pixel *leftFiltered  = m_refLeftFlt  + tuSize - 1;
 
             // 33 Angle modes once
             ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
@@ -1595,14 +1590,15 @@
             ALIGN_VAR_32(pixel, bufScale[32 * 32]);
             pixel _above[4 * 32 + 1];
             pixel _left[4 * 32 + 1];
-            pixel *aboveScale  = _above + 2 * 32;
-            pixel *leftScale   = _left + 2 * 32;
-            int scaleSize = puSize;
+            int scaleTuSize = tuSize;
             int scaleStride = stride;
             int costShift = 0;
 
-            if (puSize > 32)
+            if (tuSize > 32)
             {
+                pixel *aboveScale  = _above + 2 * 32;
+                pixel *leftScale   = _left + 2 * 32;
+
                 // origin is 64x64, we scale to 32x32 and setup required parameters
                 primitives.scale2D_64to32(bufScale, fenc, stride);
                 fenc = bufScale;
@@ -1613,7 +1609,7 @@
                 primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
                 primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
 
-                scaleSize = 32;
+                scaleTuSize = 32;
                 scaleStride = 32;
                 costShift = 2;
 
@@ -1624,17 +1620,17 @@
                 leftFiltered  = leftScale;
             }
 
-            int log2SizeMinus2 = g_convertToBit[scaleSize];
+            int log2SizeMinus2 = g_convertToBit[scaleTuSize];
             pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
 
             // DC
-            primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleSize <= 16));
+            primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
             modeCosts[DC_IDX] = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
 
             pixel *abovePlanar   = above;
             pixel *leftPlanar    = left;
 
-            if (puSize >= 8 && puSize <= 32)
+            if (tuSize >= 8 && tuSize <= 32)
             {
                 abovePlanar = aboveFiltered;
                 leftPlanar  = leftFiltered;
@@ -1647,14 +1643,14 @@
             // Transpose NxN
             primitives.transpose[log2SizeMinus2](buf_trans, fenc, scaleStride);
 
-            primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleSize <= 16));
+            primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
 
             for (uint32_t mode = 2; mode < numModesAvailable; mode++)
             {
                 bool modeHor = (mode < 18);
                 pixel *cmp = (modeHor ? buf_trans : fenc);
-                intptr_t srcStride = (modeHor ? scaleSize : scaleStride);
-                modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleSize * scaleSize)], scaleSize) << costShift;
+                intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
+                modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
             }
 
             // Find N least cost modes. N = numModesForFullRD
@@ -1770,7 +1766,7 @@
         if (pu != numPU - 1)
         {
             uint32_t zorder      = cu->getZorderIdxInCU() + partOffset;
-            int      part        = partitionFromSizes(puSize, puSize);
+            int      part        = partitionFromSizes(tuSize, tuSize);
             pixel*   dst         = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
             uint32_t dststride   = cu->getPic()->getPicYuvRec()->getStride();
             pixel*   src         = reconYuv->getLumaAddr(partOffset);
@@ -1817,44 +1813,44 @@
     uint32_t maxMode = NUM_CHROMA_MODE;
     uint32_t modeList[NUM_CHROMA_MODE];
 
-    uint32_t width          = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
-    uint32_t height         = cu->getCUSize(0) >> (trDepth + m_vChromaShift);
+    uint32_t tuSize         = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
     int      chFmt          = cu->getChromaFormat();
     uint32_t stride         = fencYuv->getCStride();
-    int scaleWidth = width;
+    int scaleTuSize = tuSize;
     int scaleStride = stride;
     int costShift = 0;
 
-    if (width > 32)
+    if (tuSize > 32)
     {
-        scaleWidth = 32;
+        scaleTuSize = 32;
         scaleStride = 32;
         costShift = 2;
     }
 
-    TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 1);
-    TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 2);
+    TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, 1);
+    TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, 2);
     cu->getAllowedChromaDir(0, modeList);
     //----- check chroma modes -----
     for (uint32_t mode = minMode; mode < maxMode; mode++)
     {
+        uint32_t chromaPredMode = modeList[mode];
+        if (chromaPredMode == DM_CHROMA_IDX)
+        {
+            chromaPredMode = cu->getLumaIntraDir(0);
+        }
+        chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
         uint64_t cost = 0;
         for (int chromaId = 0; chromaId < 2; chromaId++)
         {
-            int sad = 0;
-            uint32_t chromaPredMode = modeList[mode];
-            if (chromaPredMode == DM_CHROMA_IDX)
-                chromaPredMode = cu->getLumaIntraDir(0);
             pixel* fenc = (chromaId > 0 ? fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
             pixel* pred = (chromaId > 0 ? predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
-            pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId + 1, height, m_predBuf);
+            pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId + 1, tuSize, m_predBuf);
 
             //===== get prediction signal =====
-            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, chFmt);
-            int log2SizeMinus2 = g_convertToBit[scaleWidth];
+            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, scaleTuSize, chFmt);
+            int log2SizeMinus2 = g_convertToBit[scaleTuSize];
             pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
-            sad = sa8d(fenc, scaleStride, pred, scaleStride) << costShift;
-            cost += sad;
+            cost += sa8d(fenc, stride, pred, stride) << costShift;
         }
 
         //----- compare -----
diff -r d0acf82a77f9 -r dc0599b4da9e source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Thu May 08 18:52:17 2014 +0900
+++ b/source/encoder/compress.cpp	Sat May 10 15:27:26 2014 +0900
@@ -82,21 +82,21 @@
     cu->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
 
     uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
-    uint32_t width       = cu->getCUSize(0) >> initTrDepth;
-    uint32_t partOffset  = 0;
+    uint32_t tuSize      = cu->getCUSize(0) >> initTrDepth;
+    const uint32_t partOffset  = 0;
 
     // Reference sample smoothing
-    TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_search->m_predBuf, m_search->m_predBufStride,
-                                m_search->m_predBufHeight, m_search->m_refAbove, m_search->m_refLeft,
-                                m_search->m_refAboveFlt, m_search->m_refLeftFlt);
+    TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_search->m_predBuf,
+                                m_search->m_refAbove, m_search->m_refLeft,
+                                m_search->m_refAboveFlt, m_search->m_refLeftFlt, ALL_IDX);
 
     pixel* fenc     = m_origYuv[depth]->getLumaAddr();
     uint32_t stride = m_modePredYuv[5][depth]->getStride();
 
-    pixel *above         = m_search->m_refAbove    + width - 1;
-    pixel *aboveFiltered = m_search->m_refAboveFlt + width - 1;
-    pixel *left          = m_search->m_refLeft     + width - 1;
-    pixel *leftFiltered  = m_search->m_refLeftFlt  + width - 1;
+    pixel *above         = m_search->m_refAbove    + tuSize - 1;
+    pixel *aboveFiltered = m_search->m_refAboveFlt + tuSize - 1;
+    pixel *left          = m_search->m_refLeft     + tuSize - 1;
+    pixel *leftFiltered  = m_search->m_refLeftFlt  + tuSize - 1;
     int sad, bsad;
     uint32_t bits, bbits, mode, bmode;
     uint64_t cost, bcost;
@@ -104,11 +104,11 @@
     // 33 Angle modes once
     ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
     ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
-    int scaleWidth = width;
+    int scaleTuSize = tuSize;
     int scaleStride = stride;
     int costMultiplier = 1;
 
-    if (width > 32)
+    if (tuSize > 32)
     {
         // origin is 64x64, we scale to 32x32 and setup required parameters
         ALIGN_VAR_32(pixel, bufScale[32 * 32]);
@@ -125,7 +125,7 @@
         primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
         primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
 
-        scaleWidth = 32;
+        scaleTuSize = 32;
         scaleStride = 32;
         costMultiplier = 4;
 
@@ -136,11 +136,11 @@
         leftFiltered  = leftScale;
     }
 
-    int log2SizeMinus2 = g_convertToBit[scaleWidth];
+    int log2SizeMinus2 = g_convertToBit[scaleTuSize];
     pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
 
     // DC
-    primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleWidth <= 16));
+    primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
     bsad = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
     bmode = mode = DC_IDX;
     bbits  = m_search->xModeBitsIntra(cu, mode, partOffset, depth, initTrDepth);
@@ -149,7 +149,7 @@
     pixel *abovePlanar   = above;
     pixel *leftPlanar    = left;
 
-    if (width >= 8 && width <= 32)
+    if (tuSize >= 8 && tuSize <= 32)
     {
         abovePlanar = aboveFiltered;
         leftPlanar  = leftFiltered;
@@ -166,14 +166,14 @@
     // Transpose NxN
     primitives.transpose[log2SizeMinus2](buf_trans, fenc, scaleStride);
 
-    primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleWidth <= 16));
+    primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
 
     for (mode = 2; mode < 35; mode++)
     {
         bool modeHor = (mode < 18);
         pixel *cmp = (modeHor ? buf_trans : fenc);
-        intptr_t srcStride = (modeHor ? scaleWidth : scaleStride);
-        sad  = costMultiplier * sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleWidth * scaleWidth)], scaleWidth);
+        intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
+        sad  = costMultiplier * sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize);
         bits = m_search->xModeBitsIntra(cu, mode, partOffset, depth, initTrDepth);
         cost = m_rdCost->calcRdSADCost(sad, bits);
         COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);


More information about the x265-devel mailing list