[x265] refine YUV and coeff buffer

Steve Borho steve at borho.org
Wed May 28 17:07:28 CEST 2014


On Wed, May 28, 2014 at 2:50 AM, Satoshi Nakagawa <nakagawa424 at oki.com> wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1401263327 -32400
> #      Wed May 28 16:48:47 2014 +0900
> # Node ID 8e2f16c13099ec0d4252055ae41523299b5b62da
> # Parent  807ee7f1597b3aea8ed0c09ee49cd0778f28e0d5
> refine YUV and coeff buffer

staged for testing, thanks

> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComDataCU.cpp
> --- a/source/Lib/TLibCommon/TComDataCU.cpp      Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.cpp      Wed May 28 16:48:47 2014 +0900
> @@ -80,9 +80,9 @@
>      m_cbf[0] = NULL;
>      m_cbf[1] = NULL;
>      m_cbf[2] = NULL;
> -    m_trCoeffY = NULL;
> -    m_trCoeffCb = NULL;
> -    m_trCoeffCr = NULL;
> +    m_trCoeff[0] = NULL;
> +    m_trCoeff[1] = NULL;
> +    m_trCoeff[2] = NULL;
>      m_iPCMFlags = NULL;
>      m_iPCMSampleY = NULL;
>      m_iPCMSampleCb = NULL;
> @@ -148,9 +148,9 @@
>      CHECKED_MALLOC(m_mvpIdx[0], uint8_t, numPartition * 2);
>      m_mvpIdx[1] = m_mvpIdx[0] + numPartition;
>
> -    CHECKED_MALLOC(m_trCoeffY, coeff_t, sizeL + sizeC * 2);
> -    m_trCoeffCb = m_trCoeffY + sizeL;
> -    m_trCoeffCr = m_trCoeffY + sizeL + sizeC;
> +    CHECKED_MALLOC(m_trCoeff[0], coeff_t, sizeL + sizeC * 2);
> +    m_trCoeff[1] = m_trCoeff[0] + sizeL;
> +    m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
>
>      CHECKED_MALLOC(m_iPCMFlags, bool, numPartition);
>      CHECKED_MALLOC(m_iPCMSampleY, pixel, sizeL + sizeC * 2);
> @@ -177,7 +177,7 @@
>      X265_FREE(m_chromaIntraDir);
>      X265_FREE(m_trIdx);
>      X265_FREE(m_transformSkip[0]);
> -    X265_FREE(m_trCoeffY);
> +    X265_FREE(m_trCoeff[0]);
>      X265_FREE(m_iPCMFlags);
>      X265_FREE(m_iPCMSampleY);
>      X265_FREE(m_mvpIdx[0]);
> @@ -607,12 +607,12 @@
>
>      uint32_t tmp  = g_maxCUSize * g_maxCUSize >> (depth << 1);
>      uint32_t tmp2 = partUnitIdx * tmp;
> -    memcpy(m_trCoeffY  + tmp2, cu->getCoeffY(),  sizeof(coeff_t) * tmp);
> +    memcpy(m_trCoeff[0]  + tmp2, cu->getCoeffY(),  sizeof(coeff_t) * tmp);
>      memcpy(m_iPCMSampleY + tmp2, cu->getPCMSampleY(), sizeof(pixel) * tmp);
>      tmp  >>= m_hChromaShift + m_vChromaShift;
>      tmp2 >>= m_hChromaShift + m_vChromaShift;
> -    memcpy(m_trCoeffCb + tmp2, cu->getCoeffCb(), sizeof(coeff_t) * tmp);
> -    memcpy(m_trCoeffCr + tmp2, cu->getCoeffCr(), sizeof(coeff_t) * tmp);
> +    memcpy(m_trCoeff[1] + tmp2, cu->m_trCoeff[1], sizeof(coeff_t) * tmp);
> +    memcpy(m_trCoeff[2] + tmp2, cu->m_trCoeff[2], sizeof(coeff_t) * tmp);
>      memcpy(m_iPCMSampleCb + tmp2, cu->getPCMSampleCb(), sizeof(pixel) * tmp);
>      memcpy(m_iPCMSampleCr + tmp2, cu->getPCMSampleCr(), sizeof(pixel) * tmp);
>  }
> @@ -665,12 +665,12 @@
>
>      uint32_t tmp  = (g_maxCUSize * g_maxCUSize) >> (depth << 1);
>      uint32_t tmp2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
> -    memcpy(rpcCU->getCoeffY()     + tmp2, m_trCoeffY,    sizeof(coeff_t) * tmp);
> +    memcpy(rpcCU->getCoeffY()     + tmp2, m_trCoeff[0],    sizeof(coeff_t) * tmp);
>      memcpy(rpcCU->getPCMSampleY() + tmp2, m_iPCMSampleY, sizeof(pixel) * tmp);
>      tmp  >>= m_hChromaShift + m_vChromaShift;
>      tmp2 >>= m_hChromaShift + m_vChromaShift;
> -    memcpy(rpcCU->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(coeff_t) * tmp);
> -    memcpy(rpcCU->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(coeff_t) * tmp);
> +    memcpy(rpcCU->m_trCoeff[1] + tmp2, m_trCoeff[1], sizeof(coeff_t) * tmp);
> +    memcpy(rpcCU->m_trCoeff[2] + tmp2, m_trCoeff[2], sizeof(coeff_t) * tmp);
>      memcpy(rpcCU->getPCMSampleCb() + tmp2, m_iPCMSampleCb, sizeof(pixel) * tmp);
>      memcpy(rpcCU->getPCMSampleCr() + tmp2, m_iPCMSampleCr, sizeof(pixel) * tmp);
>  }
> @@ -695,11 +695,11 @@
>
>      uint32_t tmp  = (g_maxCUSize * g_maxCUSize) >> (depth << 1);
>      uint32_t tmp2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
> -    memcpy(rpcCU->getCoeffY() + tmp2, m_trCoeffY, sizeof(coeff_t) * tmp);
> +    memcpy(rpcCU->getCoeffY() + tmp2, m_trCoeff[0], sizeof(coeff_t) * tmp);
>      tmp  >>= m_hChromaShift + m_vChromaShift;
>      tmp2 >>= m_hChromaShift + m_vChromaShift;
> -    memcpy(rpcCU->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(coeff_t) * tmp);
> -    memcpy(rpcCU->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(coeff_t) * tmp);
> +    memcpy(rpcCU->m_trCoeff[1] + tmp2, m_trCoeff[1], sizeof(coeff_t) * tmp);
> +    memcpy(rpcCU->m_trCoeff[2] + tmp2, m_trCoeff[2], sizeof(coeff_t) * tmp);
>  }
>
>  void TComDataCU::copyToPic(uint8_t depth, uint32_t partIdx, uint32_t partDepth)
> @@ -748,12 +748,12 @@
>
>      uint32_t tmp  = (g_maxCUSize * g_maxCUSize) >> ((depth + partDepth) << 1);
>      uint32_t tmp2 = partOffset << m_pic->getLog2UnitSize() * 2;
> -    memcpy(cu->getCoeffY()  + tmp2, m_trCoeffY,  sizeof(coeff_t) * tmp);
> +    memcpy(cu->getCoeffY()  + tmp2, m_trCoeff[0],  sizeof(coeff_t) * tmp);
>      memcpy(cu->getPCMSampleY() + tmp2, m_iPCMSampleY, sizeof(pixel) * tmp);
>      tmp  >>= m_hChromaShift + m_vChromaShift;
>      tmp2 >>= m_hChromaShift + m_vChromaShift;
> -    memcpy(cu->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(coeff_t) * tmp);
> -    memcpy(cu->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(coeff_t) * tmp);
> +    memcpy(cu->m_trCoeff[1] + tmp2, m_trCoeff[1], sizeof(coeff_t) * tmp);
> +    memcpy(cu->m_trCoeff[2] + tmp2, m_trCoeff[2], sizeof(coeff_t) * tmp);
>      memcpy(cu->getPCMSampleCb() + tmp2, m_iPCMSampleCb, sizeof(pixel) * tmp);
>      memcpy(cu->getPCMSampleCr() + tmp2, m_iPCMSampleCr, sizeof(pixel) * tmp);
>  }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h        Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.h        Wed May 28 16:48:47 2014 +0900
> @@ -112,9 +112,7 @@
>      uint8_t*      m_transformSkip[3];   ///< array of transform skipping flags
>      uint8_t*      m_cbf[3];             ///< array of coded block flags (CBF)
>      TComCUMvField m_cuMvField[2];       ///< array of motion vectors
> -    coeff_t*      m_trCoeffY;           ///< transformed coefficient buffer (Y)
> -    coeff_t*      m_trCoeffCb;          ///< transformed coefficient buffer (Cb)
> -    coeff_t*      m_trCoeffCr;          ///< transformed coefficient buffer (Cr)
> +    coeff_t*      m_trCoeff[3];         ///< transformed coefficient buffer
>
>      pixel*        m_iPCMSampleY;        ///< PCM sample buffer (Y)
>      pixel*        m_iPCMSampleCb;       ///< PCM sample buffer (Cb)
> @@ -279,11 +277,13 @@
>
>      TComCUMvField* getCUMvField(int e)        { return &m_cuMvField[e]; }
>
> -    coeff_t*&     getCoeffY()                 { return m_trCoeffY; }
> +    coeff_t*      getCoeffY()                 { return m_trCoeff[0]; }
>
> -    coeff_t*&     getCoeffCb()                { return m_trCoeffCb; }
> +    coeff_t*      getCoeffCb()                { return m_trCoeff[1]; }
>
> -    coeff_t*&     getCoeffCr()                { return m_trCoeffCr; }
> +    coeff_t*      getCoeffCr()                { return m_trCoeff[2]; }
> +
> +    coeff_t*      getCoeff(TextType ttype)    { return m_trCoeff[ttype]; }
>
>      pixel*&       getPCMSampleY()             { return m_iPCMSampleY; }
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPattern.cpp
> --- a/source/Lib/TLibCommon/TComPattern.cpp     Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPattern.cpp     Wed May 28 16:48:47 2014 +0900
> @@ -163,7 +163,7 @@
>      }
>  }
>
> -void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int chromaId)
> +void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, uint32_t chromaId)
>  {
>      pixel*  roiOrigin;
>      pixel*  adiTemp;
> @@ -175,7 +175,7 @@
>      initIntraNeighbors(cu, zOrderIdxInPart, partDepth, TEXT_CHROMA, &intraNeighbors);
>      uint32_t tuSize = intraNeighbors.tuSize;
>
> -    roiOrigin = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
> +    roiOrigin = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
>      adiTemp   = getAdiChromaBuf(chromaId, tuSize, adiBuf);
>
>      fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPattern.h
> --- a/source/Lib/TLibCommon/TComPattern.h       Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPattern.h       Wed May 28 16:48:47 2014 +0900
> @@ -70,7 +70,7 @@
>  public:
>
>      // access functions of ADI buffers
> -    static pixel* getAdiChromaBuf(int chromaId, int tuSize, pixel* adiBuf)
> +    static pixel* getAdiChromaBuf(uint32_t chromaId, int tuSize, pixel* adiBuf)
>      {
>          return adiBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (tuSize * 2 + 1));
>      }
> @@ -86,7 +86,7 @@
>
>      /// set chroma parameters from CU data for accessing ADI data
>      static void initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
> -                                     pixel* adiBuf, int chromaId);
> +                                     pixel* adiBuf, uint32_t chromaId);
>
>      static void initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *IntraNeighbors);
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp      Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp      Wed May 28 16:48:47 2014 +0900
> @@ -46,13 +46,13 @@
>
>  TComPicYuv::TComPicYuv()
>  {
> -    m_picBufY = NULL; // Buffer (including margin)
> -    m_picBufU = NULL;
> -    m_picBufV = NULL;
> +    m_picBuf[0] = NULL; // Buffer (including margin)
> +    m_picBuf[1] = NULL;
> +    m_picBuf[2] = NULL;
>
> -    m_picOrgY = NULL;  // m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
> -    m_picOrgU = NULL;
> -    m_picOrgV = NULL;
> +    m_picOrg[0] = NULL;  // m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
> +    m_picOrg[1] = NULL;
> +    m_picOrg[2] = NULL;
>
>      m_cuOffsetY = NULL;
>      m_cuOffsetC = NULL;
> @@ -88,13 +88,13 @@
>      m_strideC = ((m_numCuInWidth * g_maxCUSize) >> m_hChromaShift) + (m_chromaMarginX * 2);
>      int maxHeight = m_numCuInHeight * g_maxCUSize;
>
> -    CHECKED_MALLOC(m_picBufY, pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
> -    CHECKED_MALLOC(m_picBufU, pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> -    CHECKED_MALLOC(m_picBufV, pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> +    CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
> +    CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> +    CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
>
> -    m_picOrgY = m_picBufY + m_lumaMarginY   * getStride()  + m_lumaMarginX;
> -    m_picOrgU = m_picBufU + m_chromaMarginY * getCStride() + m_chromaMarginX;
> -    m_picOrgV = m_picBufV + m_chromaMarginY * getCStride() + m_chromaMarginX;
> +    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * getStride()  + m_lumaMarginX;
> +    m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * getCStride() + m_chromaMarginX;
> +    m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * getCStride() + m_chromaMarginX;
>
>      /* TODO: these four buffers are the same for every TComPicYuv in the encoder */
>      CHECKED_MALLOC(m_cuOffsetY, int, m_numCuInWidth * m_numCuInHeight);
> @@ -127,9 +127,9 @@
>
>  void TComPicYuv::destroy()
>  {
> -    X265_FREE(m_picBufY);
> -    X265_FREE(m_picBufU);
> -    X265_FREE(m_picBufV);
> +    X265_FREE(m_picBuf[0]);
> +    X265_FREE(m_picBuf[1]);
> +    X265_FREE(m_picBuf[2]);
>      X265_FREE(m_cuOffsetY);
>      X265_FREE(m_cuOffsetC);
>      X265_FREE(m_buOffsetY);
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h        Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.h        Wed May 28 16:48:47 2014 +0900
> @@ -64,13 +64,9 @@
>      //  YUV buffer
>      // ------------------------------------------------------------------------------------------------
>
> -    pixel*  m_picBufY;         ///< Buffer (including margin)
> -    pixel*  m_picBufU;
> -    pixel*  m_picBufV;
> +    pixel*  m_picBuf[3];        ///< Buffer (including margin)
>
> -    pixel*  m_picOrgY;          ///< m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
> -    pixel*  m_picOrgU;
> -    pixel*  m_picOrgV;
> +    pixel*  m_picOrg[3];        ///< m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
>
>      // ------------------------------------------------------------------------------------------------
>      //  Parameter for general YUV buffer usage
> @@ -131,32 +127,31 @@
>      //  Access function for picture buffer
>      // ------------------------------------------------------------------------------------------------
>
> -    //  Access starting position of picture buffer with margin
> -    pixel*  getBufY()     { return m_picBufY; }
> +    //  Access starting position of original picture
> +    pixel*  getLumaAddr()   { return m_picOrg[0]; }
>
> -    pixel*  getBufU()     { return m_picBufU; }
> +    pixel*  getCbAddr()     { return m_picOrg[1]; }
>
> -    pixel*  getBufV()     { return m_picBufV; }
> +    pixel*  getCrAddr()     { return m_picOrg[2]; }
>
> -    //  Access starting position of original picture
> -    pixel*  getLumaAddr()   { return m_picOrgY; }
> -
> -    pixel*  getCbAddr()     { return m_picOrgU; }
> -
> -    pixel*  getCrAddr()     { return m_picOrgV; }
> +    pixel*  getChromaAddr(uint32_t chromaId)     { return m_picOrg[chromaId]; }
>
>      //  Access starting position of original picture for specific coding unit (CU) or partition unit (PU)
> -    pixel*  getLumaAddr(int cuAddr) { return m_picOrgY + m_cuOffsetY[cuAddr]; }
> +    pixel*  getLumaAddr(int cuAddr) { return m_picOrg[0] + m_cuOffsetY[cuAddr]; }
>
> -    pixel*  getCbAddr(int cuAddr) { return m_picOrgU + m_cuOffsetC[cuAddr]; }
> +    pixel*  getCbAddr(int cuAddr) { return m_picOrg[1] + m_cuOffsetC[cuAddr]; }
>
> -    pixel*  getCrAddr(int cuAddr) { return m_picOrgV + m_cuOffsetC[cuAddr]; }
> +    pixel*  getCrAddr(int cuAddr) { return m_picOrg[2] + m_cuOffsetC[cuAddr]; }
>
> -    pixel*  getLumaAddr(int cuAddr, int absZOrderIdx) { return m_picOrgY + m_cuOffsetY[cuAddr] + m_buOffsetY[g_zscanToRaster[absZOrderIdx]]; }
> +    pixel*  getChromaAddr(uint32_t chromaId, int cuAddr) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr]; }
>
> -    pixel*  getCbAddr(int cuAddr, int absZOrderIdx) { return m_picOrgU + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> +    pixel*  getLumaAddr(int cuAddr, int absZOrderIdx) { return m_picOrg[0] + m_cuOffsetY[cuAddr] + m_buOffsetY[g_zscanToRaster[absZOrderIdx]]; }
>
> -    pixel*  getCrAddr(int cuAddr, int absZOrderIdx) { return m_picOrgV + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> +    pixel*  getCbAddr(int cuAddr, int absZOrderIdx) { return m_picOrg[1] + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> +
> +    pixel*  getCrAddr(int cuAddr, int absZOrderIdx) { return m_picOrg[2] + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> +
> +    pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
>
>      uint32_t getCUHeight(int rowNum);
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
> --- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp        Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp        Wed May 28 16:48:47 2014 +0900
> @@ -566,14 +566,9 @@
>          rec    = m_pic->getPicYuvRec()->getLumaAddr(addr);
>          stride = m_pic->getStride();
>      }
> -    else if (yCbCr == 1)
> -    {
> -        rec    = m_pic->getPicYuvRec()->getCbAddr(addr);
> -        stride = m_pic->getCStride();
> -    }
>      else
>      {
> -        rec    = m_pic->getPicYuvRec()->getCrAddr(addr);
> +        rec    = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
>          stride = m_pic->getCStride();
>      }
>
> @@ -848,14 +843,9 @@
>          rec        = m_pic->getPicYuvRec()->getLumaAddr();
>          picWidthTmp = m_picWidth;
>      }
> -    else if (yCbCr == 1)
> -    {
> -        rec        = m_pic->getPicYuvRec()->getCbAddr();
> -        picWidthTmp = m_picWidth >> m_hChromaShift;
> -    }
>      else
>      {
> -        rec        = m_pic->getPicYuvRec()->getCrAddr();
> +        rec        = m_pic->getPicYuvRec()->getChromaAddr(yCbCr);
>          picWidthTmp = m_picWidth >> m_hChromaShift;
>      }
>
> @@ -892,15 +882,9 @@
>              stride = m_pic->getStride();
>              picWidthTmp = m_picWidth;
>          }
> -        else if (yCbCr == 1)
> -        {
> -            rec  = m_pic->getPicYuvRec()->getCbAddr(addr);
> -            stride = m_pic->getCStride();
> -            picWidthTmp = m_picWidth >> m_hChromaShift;
> -        }
>          else
>          {
> -            rec  = m_pic->getPicYuvRec()->getCrAddr(addr);
> +            rec  = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
>              stride = m_pic->getCStride();
>              picWidthTmp = m_picWidth >> m_hChromaShift;
>          }
> @@ -977,14 +961,9 @@
>                          rec  = m_pic->getPicYuvRec()->getLumaAddr(addr);
>                          stride = m_pic->getStride();
>                      }
> -                    else if (yCbCr == 1)
> -                    {
> -                        rec  = m_pic->getPicYuvRec()->getCbAddr(addr);
> -                        stride = m_pic->getCStride();
> -                    }
>                      else
>                      {
> -                        rec  = m_pic->getPicYuvRec()->getCrAddr(addr);
> +                        rec  = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
>                          stride = m_pic->getCStride();
>                      }
>
> @@ -1019,14 +998,9 @@
>          rec        = m_pic->getPicYuvRec()->getLumaAddr();
>          picWidthTmp = m_picWidth;
>      }
> -    else if (yCbCr == 1)
> -    {
> -        rec        = m_pic->getPicYuvRec()->getCbAddr();
> -        picWidthTmp = m_picWidth >> m_hChromaShift;
> -    }
>      else
>      {
> -        rec        = m_pic->getPicYuvRec()->getCrAddr();
> +        rec        = m_pic->getPicYuvRec()->getChromaAddr(yCbCr);
>          picWidthTmp = m_picWidth >> m_hChromaShift;
>      }
>
> @@ -1061,15 +1035,9 @@
>              stride = m_pic->getStride();
>              picWidthTmp = m_picWidth;
>          }
> -        else if (yCbCr == 1)
> -        {
> -            rec  = m_pic->getPicYuvRec()->getCbAddr(addr);
> -            stride = m_pic->getCStride();
> -            picWidthTmp = m_picWidth >> m_hChromaShift;
> -        }
>          else
>          {
> -            rec  = m_pic->getPicYuvRec()->getCrAddr(addr);
> +            rec  = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
>              stride = m_pic->getCStride();
>              picWidthTmp = m_picWidth >> m_hChromaShift;
>          }
> @@ -1139,14 +1107,9 @@
>                          rec  = m_pic->getPicYuvRec()->getLumaAddr(addr);
>                          stride = m_pic->getStride();
>                      }
> -                    else if (yCbCr == 1)
> -                    {
> -                        rec  = m_pic->getPicYuvRec()->getCbAddr(addr);
> -                        stride = m_pic->getCStride();
> -                    }
>                      else
>                      {
> -                        rec  = m_pic->getPicYuvRec()->getCrAddr(addr);
> +                        rec  = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
>                          stride = m_pic->getCStride();
>                      }
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComYuv.cpp
> --- a/source/Lib/TLibCommon/TComYuv.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComYuv.cpp Wed May 28 16:48:47 2014 +0900
> @@ -49,13 +49,15 @@
>
>  TComYuv::TComYuv()
>  {
> -    m_bufY = NULL;
> -    m_bufU = NULL;
> -    m_bufV = NULL;
> +    m_buf[0] = NULL;
> +    m_buf[1] = NULL;
> +    m_buf[2] = NULL;
>  }
>
>  TComYuv::~TComYuv()
> -{}
> +{
> +    destroy();
> +}
>
>  bool TComYuv::create(uint32_t width, uint32_t height, int csp)
>  {
> @@ -72,10 +74,13 @@
>      m_csp = csp;
>      m_part = partitionFromSizes(m_width, m_height);
>
> +    uint32_t sizeL = width * height;
> +    uint32_t sizeC = m_cwidth * m_cheight;
> +    X265_CHECK((sizeC & 15) == 0, "invalid size");
>      // memory allocation (padded for SIMD reads)
> -    CHECKED_MALLOC(m_bufY, pixel, width * height);
> -    CHECKED_MALLOC(m_bufU, pixel, m_cwidth * m_cheight + 8);
> -    CHECKED_MALLOC(m_bufV, pixel, m_cwidth * m_cheight + 8);
> +    CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
> +    m_buf[1] = m_buf[0] + sizeL;
> +    m_buf[2] = m_buf[0] + sizeL + sizeC;
>      return true;
>
>  fail:
> @@ -85,55 +90,53 @@
>  void TComYuv::destroy()
>  {
>      // memory free
> -    X265_FREE(m_bufY);
> -    m_bufY = NULL;
> -    X265_FREE(m_bufU);
> -    m_bufU = NULL;
> -    X265_FREE(m_bufV);
> -    m_bufV = NULL;
> +    X265_FREE(m_buf[0]);
> +    m_buf[0] = NULL;
> +    m_buf[1] = NULL;
> +    m_buf[2] = NULL;
>  }
>
>  void TComYuv::clear()
>  {
> -    ::memset(m_bufY, 0, (m_width  * m_height) * sizeof(pixel));
> -    ::memset(m_bufU, 0, (m_cwidth * m_cheight) * sizeof(pixel));
> -    ::memset(m_bufV, 0, (m_cwidth * m_cheight) * sizeof(pixel));
> +    ::memset(m_buf[0], 0, (m_width  * m_height) * sizeof(pixel));
> +    ::memset(m_buf[1], 0, (m_cwidth * m_cheight) * sizeof(pixel));
> +    ::memset(m_buf[2], 0, (m_cwidth * m_cheight) * sizeof(pixel));
>  }
>
>  void TComYuv::copyToPicYuv(TComPicYuv* destPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx)
>  {
>      pixel* dstY = destPicYuv->getLumaAddr(cuAddr, absZOrderIdx);
>
> -    primitives.luma_copy_pp[m_part](dstY, destPicYuv->getStride(), m_bufY, getStride());
> +    primitives.luma_copy_pp[m_part](dstY, destPicYuv->getStride(), m_buf[0], getStride());
>
>      pixel* dstU = destPicYuv->getCbAddr(cuAddr, absZOrderIdx);
>      pixel* dstV = destPicYuv->getCrAddr(cuAddr, absZOrderIdx);
> -    primitives.chroma[m_csp].copy_pp[m_part](dstU, destPicYuv->getCStride(), m_bufU, getCStride());
> -    primitives.chroma[m_csp].copy_pp[m_part](dstV, destPicYuv->getCStride(), m_bufV, getCStride());
> +    primitives.chroma[m_csp].copy_pp[m_part](dstU, destPicYuv->getCStride(), m_buf[1], getCStride());
> +    primitives.chroma[m_csp].copy_pp[m_part](dstV, destPicYuv->getCStride(), m_buf[2], getCStride());
>  }
>
>  void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx)
>  {
>      pixel* srcY = srcPicYuv->getLumaAddr(cuAddr, absZOrderIdx);
>
> -    primitives.luma_copy_pp[m_part](m_bufY, getStride(), srcY, srcPicYuv->getStride());
> +    primitives.luma_copy_pp[m_part](m_buf[0], getStride(), srcY, srcPicYuv->getStride());
>
>      pixel* srcU = srcPicYuv->getCbAddr(cuAddr, absZOrderIdx);
>      pixel* srcV = srcPicYuv->getCrAddr(cuAddr, absZOrderIdx);
> -    primitives.chroma[m_csp].copy_pp[m_part](m_bufU, getCStride(), srcU, srcPicYuv->getCStride());
> -    primitives.chroma[m_csp].copy_pp[m_part](m_bufV, getCStride(), srcV, srcPicYuv->getCStride());
> +    primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], getCStride(), srcU, srcPicYuv->getCStride());
> +    primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], getCStride(), srcV, srcPicYuv->getCStride());
>  }
>
>  void TComYuv::copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx)
>  {
>      pixel* dstY = dstPicYuv->getLumaAddr(partIdx);
>
> -    primitives.luma_copy_pp[m_part](dstY, dstPicYuv->getStride(), m_bufY, getStride());
> +    primitives.luma_copy_pp[m_part](dstY, dstPicYuv->getStride(), m_buf[0], getStride());
>
>      pixel* dstU = dstPicYuv->getCbAddr(partIdx);
>      pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> -    primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPicYuv->getCStride(), m_bufU, getCStride());
> -    primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPicYuv->getCStride(), m_bufV, getCStride());
> +    primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPicYuv->getCStride(), m_buf[1], getCStride());
> +    primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPicYuv->getCStride(), m_buf[2], getCStride());
>  }
>
>  void TComYuv::copyPartToYuv(TComYuv* dstPicYuv, uint32_t partIdx)
> @@ -196,41 +199,17 @@
>
>  void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
>  {
> +    X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
> +
>      int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
>
> -    if (chromaId == 1)
> -    {
> -        pixel*   srcU = getCbAddr(partIdx);
> -        int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> +    pixel*   src = getChromaAddr(chromaId, partIdx);
> +    int16_t* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
>
> -        uint32_t srcstride = getCStride();
> -        uint32_t dststride = dstPicYuv->m_cwidth;
> +    uint32_t srcstride = getCStride();
> +    uint32_t dststride = dstPicYuv->m_cwidth;
>
> -        primitives.chroma[m_csp].copy_ps[part](dstU, dststride, srcU, srcstride);
> -    }
> -    else if (chromaId == 2)
> -    {
> -        pixel*   srcV = getCrAddr(partIdx);
> -        int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> -
> -        uint32_t srcstride = getCStride();
> -        uint32_t dststride = dstPicYuv->m_cwidth;
> -
> -        primitives.chroma[m_csp].copy_ps[part](dstV, dststride, srcV, srcstride);
> -    }
> -    else
> -    {
> -        pixel*   srcU = getCbAddr(partIdx);
> -        pixel*   srcV = getCrAddr(partIdx);
> -        int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> -        int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> -
> -        uint32_t srcstride = getCStride();
> -        uint32_t dststride = dstPicYuv->m_cwidth;
> -
> -        primitives.chroma[m_csp].copy_ps[part](dstU, dststride, srcU, srcstride);
> -        primitives.chroma[m_csp].copy_ps[part](dstV, dststride, srcV, srcstride);
> -    }
> +    primitives.chroma[m_csp].copy_ps[part](dst, dststride, src, srcstride);
>  }
>
>  void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComYuv.h
> --- a/source/Lib/TLibCommon/TComYuv.h   Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComYuv.h   Wed May 28 16:48:47 2014 +0900
> @@ -65,9 +65,7 @@
>      //  YUV buffer
>      // ------------------------------------------------------------------------------------------------------------------
>
> -    pixel* m_bufY;
> -    pixel* m_bufU;
> -    pixel* m_bufV;
> +    pixel* m_buf[3];
>
>      // ------------------------------------------------------------------------------------------------------------------
>      //  Parameter for general YUV buffer usage
> @@ -98,30 +96,12 @@
>          return blkX + blkY * width;
>      }
>
> -    static int getAddrOffset(uint32_t unitIdx, uint32_t size, uint32_t width)
> -    {
> -        int blkX = (unitIdx * size) &  (width - 1);
> -        int blkY = (unitIdx * size) & ~(width - 1);
> -
> -        return blkX + blkY * size;
> -    }
> -
> -    int getChromaAddrOffset(uint32_t unitIdx, uint32_t size, uint32_t width)
> -    {
> -        int blkX = (unitIdx * size) &  (width - 1);
> -        int blkY = (unitIdx * size) & ~(width - 1);
> -
> -        if (m_csp == CHROMA_422) blkY <<= 1;
> -
> -        return blkX + blkY * size;
> -    }
> -
>  public:
>
>      int m_part; // partitionFromSizes(m_width, m_height)
>
>      TComYuv();
> -    virtual ~TComYuv();
> +    ~TComYuv();
>
>      // ------------------------------------------------------------------------------------------------------------------
>      //  Memory management
> @@ -171,25 +151,22 @@
>      // ------------------------------------------------------------------------------------------------------------------
>
>      //  Access starting position of YUV buffer
> -    pixel* getLumaAddr()  { return m_bufY; }
> +    pixel* getLumaAddr()  { return m_buf[0]; }
>
> -    pixel* getCbAddr()    { return m_bufU; }
> +    pixel* getCbAddr()    { return m_buf[1]; }
>
> -    pixel* getCrAddr()    { return m_bufV; }
> +    pixel* getCrAddr()    { return m_buf[2]; }
> +
> +    pixel* getChromaAddr(uint32_t chromaId)    { return m_buf[chromaId]; }
>
>      //  Access starting position of YUV partition unit buffer
> -    pixel* getLumaAddr(uint32_t partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }
> +    pixel* getLumaAddr(uint32_t partUnitIdx) { return m_buf[0] + getAddrOffset(partUnitIdx, m_width); }
>
> -    pixel* getCbAddr(uint32_t partUnitIdx) { return m_bufU + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> +    pixel* getCbAddr(uint32_t partUnitIdx) { return m_buf[1] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> -    pixel* getCrAddr(uint32_t partUnitIdx) { return m_bufV + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> +    pixel* getCrAddr(uint32_t partUnitIdx) { return m_buf[2] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> -    //  Access starting position of YUV transform unit buffer
> -    pixel* getLumaAddr(uint32_t transUnitIdx, uint32_t blkSize) { return m_bufY + getAddrOffset(transUnitIdx, blkSize, m_width); }
> -
> -    pixel* getCbAddr(uint32_t transUnitIdx, uint32_t blkSize) { return m_bufU + getChromaAddrOffset(transUnitIdx, blkSize, m_cwidth); }
> -
> -    pixel* getCrAddr(uint32_t transUnitIdx, uint32_t blkSize) { return m_bufV + getChromaAddrOffset(transUnitIdx, blkSize, m_cwidth); }
> +    pixel* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
>      //  Get stride value of YUV buffer
>      uint32_t getStride()    { return m_width;   }
> @@ -209,6 +186,7 @@
>      // -------------------------------------------------------------------------------------------------------------------
>
>      int  getHorzChromaShift()  { return m_hChromaShift; }
> +
>      int  getVertChromaShift()  { return m_vChromaShift; }
>  };
>  }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibEncoder/TEncEntropy.cpp
> --- a/source/Lib/TLibEncoder/TEncEntropy.cpp    Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp    Wed May 28 16:48:47 2014 +0900
> @@ -376,11 +376,11 @@
>
>                  uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
>
> -                for (uint32_t chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> +                for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
>                  {
>                      TComTURecurse tuIterator;
>                      initTUEntropySection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, m_bakAbsPartIdx);
> -                    coeff_t* coeffChroma = (chromaId == 1) ? cu->getCoeffCb() : cu->getCoeffCr();
> +                    coeff_t* coeffChroma = cu->getCoeff((TextType)chromaId);
>                      do
>                      {
>                          uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
> @@ -399,11 +399,11 @@
>              uint32_t trSizeC  = tuSize >> hChromaShift;
>              const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>              uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> (depth << 1);
> -            for (uint32_t chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> +            for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
>              {
>                  TComTURecurse tuIterator;
>                  initTUEntropySection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
> -                coeff_t* coeffChroma = (chromaId == 1) ? cu->getCoeffCb() : cu->getCoeffCr();
> +                coeff_t* coeffChroma = cu->getCoeff((TextType)chromaId);
>                  do
>                  {
>                      uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp     Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Wed May 28 16:48:47 2014 +0900
> @@ -52,14 +52,14 @@
>
>  TEncSearch::TEncSearch()
>  {
> -    m_qtTempCoeffY  = NULL;
> -    m_qtTempCoeffCb = NULL;
> -    m_qtTempCoeffCr = NULL;
> +    m_qtTempCoeff[0] = NULL;
> +    m_qtTempCoeff[1] = NULL;
> +    m_qtTempCoeff[2] = NULL;
>      m_qtTempTrIdx = NULL;
>      m_qtTempShortYuv = NULL;
> -    m_qtTempTUCoeffY  = NULL;
> -    m_qtTempTUCoeffCb = NULL;
> -    m_qtTempTUCoeffCr = NULL;
> +    m_qtTempTUCoeff[0] = NULL;
> +    m_qtTempTUCoeff[1] = NULL;
> +    m_qtTempTUCoeff[2] = NULL;
>      for (int i = 0; i < 3; i++)
>      {
>          m_qtTempTransformSkipFlag[i] = NULL;
> @@ -81,16 +81,16 @@
>          const uint32_t numLayersToAllocate = m_cfg->m_quadtreeTULog2MaxSize - m_cfg->m_quadtreeTULog2MinSize + 1;
>          for (uint32_t i = 0; i < numLayersToAllocate; ++i)
>          {
> -            X265_FREE(m_qtTempCoeffY[i]);
> +            X265_FREE(m_qtTempCoeff[0][i]);
>              m_qtTempShortYuv[i].destroy();
>          }
>      }
> -    X265_FREE(m_qtTempTUCoeffY);
> +    X265_FREE(m_qtTempTUCoeff[0]);
>      X265_FREE(m_qtTempTrIdx);
>      X265_FREE(m_qtTempCbf[0]);
>      X265_FREE(m_qtTempTransformSkipFlag[0]);
>
> -    delete[] m_qtTempCoeffY;
> +    delete[] m_qtTempCoeff[0];
>      delete[] m_qtTempShortYuv;
>      m_qtTempTransformSkipYuv.destroy();
>  }
> @@ -110,17 +110,17 @@
>      m_refLagPixels = cfg->param->frameNumThreads > 1 ? cfg->param->searchRange : cfg->param->sourceHeight;
>
>      const uint32_t numLayersToAllocate = cfg->m_quadtreeTULog2MaxSize - cfg->m_quadtreeTULog2MinSize + 1;
> -    m_qtTempCoeffY   = new coeff_t*[numLayersToAllocate * 3];
> -    m_qtTempCoeffCb  = m_qtTempCoeffY + numLayersToAllocate;
> -    m_qtTempCoeffCr  = m_qtTempCoeffY + numLayersToAllocate * 2;
> +    m_qtTempCoeff[0] = new coeff_t*[numLayersToAllocate * 3];
> +    m_qtTempCoeff[1] = m_qtTempCoeff[0] + numLayersToAllocate;
> +    m_qtTempCoeff[2] = m_qtTempCoeff[0] + numLayersToAllocate * 2;
>      m_qtTempShortYuv = new ShortYuv[numLayersToAllocate];
>      uint32_t sizeL = g_maxCUSize * g_maxCUSize;
>      uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
>      for (uint32_t i = 0; i < numLayersToAllocate; ++i)
>      {
> -        m_qtTempCoeffY[i]  = X265_MALLOC(coeff_t, sizeL + sizeC * 2);
> -        m_qtTempCoeffCb[i] = m_qtTempCoeffY[i] + sizeL;
> -        m_qtTempCoeffCr[i] = m_qtTempCoeffY[i] + sizeL + sizeC;
> +        m_qtTempCoeff[0][i] = X265_MALLOC(coeff_t, sizeL + sizeC * 2);
> +        m_qtTempCoeff[1][i] = m_qtTempCoeff[0][i] + sizeL;
> +        m_qtTempCoeff[2][i] = m_qtTempCoeff[0][i] + sizeL + sizeC;
>          m_qtTempShortYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, cfg->param->internalCsp);
>      }
>
> @@ -133,9 +133,9 @@
>      m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] + numPartitions;
>      m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] + numPartitions * 2;
>
> -    CHECKED_MALLOC(m_qtTempTUCoeffY, coeff_t, MAX_TS_SIZE * MAX_TS_SIZE * 3);
> -    m_qtTempTUCoeffCb = m_qtTempTUCoeffY + MAX_TS_SIZE * MAX_TS_SIZE;
> -    m_qtTempTUCoeffCr = m_qtTempTUCoeffY + MAX_TS_SIZE * MAX_TS_SIZE * 2;
> +    CHECKED_MALLOC(m_qtTempTUCoeff[0], coeff_t, MAX_TS_SIZE * MAX_TS_SIZE * 3);
> +    m_qtTempTUCoeff[1] = m_qtTempTUCoeff[0] + MAX_TS_SIZE * MAX_TS_SIZE;
> +    m_qtTempTUCoeff[2] = m_qtTempTUCoeff[0] + MAX_TS_SIZE * MAX_TS_SIZE * 2;
>
>      return m_qtTempTransformSkipYuv.create(g_maxCUSize, g_maxCUSize, cfg->param->internalCsp);
>
> @@ -268,19 +268,7 @@
>      height = splitIntoSubTUs ? height >> 1 : height;
>      uint32_t coeffOffset = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (cspx + cspy));
>      uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> -    coeff_t* coeff = 0;
> -    switch (ttype)
> -    {
> -    case TEXT_LUMA:     coeff = m_qtTempCoeffY[qtLayer];
> -        break;
> -    case TEXT_CHROMA_U: coeff = m_qtTempCoeffCb[qtLayer];
> -        break;
> -    case TEXT_CHROMA_V: coeff = m_qtTempCoeffCr[qtLayer];
> -        break;
> -    default: X265_CHECK(0, "invalid texture type\n");
> -    }
> -
> -    coeff += coeffOffset;
> +    coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
>
>      if (width == height)
>      {
> @@ -393,14 +381,7 @@
>  uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
>  {
>      m_entropyCoder->resetBits();
> -    if (chromaId == TEXT_CHROMA_U)
> -    {
> -        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_U, splitIntoSubTUs);
> -    }
> -    else if (chromaId == TEXT_CHROMA_V)
> -    {
> -        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_V, splitIntoSubTUs);
> -    }
> +    xEncCoeffQT(cu, trDepth, absPartIdx, (TextType)chromaId, splitIntoSubTUs);
>      return m_entropyCoder->getNumberOfWrittenBits();
>  }
>
> @@ -424,7 +405,7 @@
>      uint32_t trSizeLog2     = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
>      uint32_t qtLayer        = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>      uint32_t coeffOffsetY   = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> -    coeff_t* coeff          = m_qtTempCoeffY[qtLayer] + coeffOffsetY;
> +    coeff_t* coeff          = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
>
>      int16_t* reconQt        = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
>
> @@ -512,20 +493,20 @@
>          }
>      }
>
> -    TextType ttype          = (chromaId == 1) ? TEXT_CHROMA_U : TEXT_CHROMA_V;
> +    TextType ttype          = (TextType)chromaId;
>      uint32_t tuSize         = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
>      uint32_t stride         = fencYuv->getCStride();
> -    pixel*   fenc           = (chromaId == 1) ? fencYuv->getCbAddr(absPartIdx) : fencYuv->getCrAddr(absPartIdx);
> -    pixel*   pred           = (chromaId == 1) ? predYuv->getCbAddr(absPartIdx) : predYuv->getCrAddr(absPartIdx);
> -    int16_t* residual       = (chromaId == 1) ? resiYuv->getCbAddr(absPartIdx) : resiYuv->getCrAddr(absPartIdx);
> +    pixel*   fenc           = fencYuv->getChromaAddr(chromaId, absPartIdx);
> +    pixel*   pred           = predYuv->getChromaAddr(chromaId, absPartIdx);
> +    int16_t* residual       = resiYuv->getChromaAddr(chromaId, absPartIdx);
>
>      uint32_t qtlayer        = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>      uint32_t coeffOffsetC   = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> -    coeff_t* coeff          = (chromaId == 1 ? m_qtTempCoeffCb[qtlayer] : m_qtTempCoeffCr[qtlayer]) + coeffOffsetC;
> -    int16_t* reconQt        = (chromaId == 1) ? m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdx) : m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdx);
> +    coeff_t* coeff          = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> +    int16_t* reconQt        = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
>      uint32_t reconQtStride  = m_qtTempShortYuv[qtlayer].m_cwidth;
>      uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
> -    pixel*   reconIPred       = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> +    pixel*   reconIPred       = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
>      uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
>      bool     useTransformSkipChroma = !!cu->getTransformSkip(absPartIdx, ttype);
>      int      part = partitionFromSize(tuSize);
> @@ -942,12 +923,12 @@
>          //===== copy transform coefficients =====
>          uint32_t numCoeffY    = 1 << (trSizeLog2 * 2);
>          uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> -        coeff_t* coeffSrcY    = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> -        coeff_t* coeffDestY   = cu->getCoeffY()         + coeffOffsetY;
> +        coeff_t* coeffSrcY    = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> +        coeff_t* coeffDestY   = cu->getCoeffY()           + coeffOffsetY;
>          ::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
>          //===== copy reconstruction =====
> -        m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2);
> +        m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2);
>      }
>      else
>      {
> @@ -968,13 +949,13 @@
>      //===== copy transform coefficients =====
>      uint32_t numCoeffY    = 1 << (trSizeLog2 * 2);
>      uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> -    coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> -    coeff_t* coeffDstY = m_qtTempTUCoeffY;
> +    coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> +    coeff_t* coeffDstY = m_qtTempTUCoeff[0];
>
>      ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
>      //===== copy reconstruction =====
> -    m_qtTempShortYuv[qtlayer].copyPartToPartLuma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2);
> +    m_qtTempShortYuv[qtlayer].copyPartToPartLuma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2);
>  }
>
>  void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
> @@ -986,8 +967,8 @@
>      //===== copy transform coefficients =====
>      uint32_t numCoeffY    = 1 << (trSizeLog2 * 2);
>      uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> -    coeff_t* coeffDstY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> -    coeff_t* coeffSrcY = m_qtTempTUCoeffY;
> +    coeff_t* coeffDstY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> +    coeff_t* coeffSrcY = m_qtTempTUCoeff[0];
>
>      ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
> @@ -1036,18 +1017,9 @@
>          uint32_t numCoeffC = width * height;
>          uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
>
> -        if (chromaId == 1)
> -        {
> -            coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> -            coeff_t* coeffDstU = m_qtTempTUCoeffCb;
> -            ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> -        }
> -        if (chromaId == 2)
> -        {
> -            coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> -            coeff_t* coeffDstV = m_qtTempTUCoeffCr;
> -            ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
> -        }
> +        coeff_t* coeffSrc = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> +        coeff_t* coeffDst = m_qtTempTUCoeff[chromaId];
> +        ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
>
>          //===== copy reconstruction =====
>          uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
> @@ -1055,9 +1027,9 @@
>      }
>  }
>
> -void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
> +void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId)
>  {
> -    assert(chromaId == 1 || chromaId == 2);
> +    X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
>
>      uint32_t fullDepth = cu->getDepth(0) + trDepth;
>      uint32_t trMode    = cu->getTransformIdx(absPartIdx);
> @@ -1067,12 +1039,15 @@
>          uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
>          uint32_t qtlayer    = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>          int      chFmt      = cu->getChromaFormat();
> -
> +        const bool splitIntoSubTUs = (chFmt == CHROMA_422);
> +
> +        uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
>          bool bChromaSame = false;
>          if (trSizeLog2 == 2 && !(chFmt == CHROMA_444))
>          {
>              X265_CHECK(trDepth > 0, "invalid trDepth\n");
>              trDepth--;
> +            trSizeCLog2++;
>              uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
>              bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
>              bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
> @@ -1084,24 +1059,13 @@
>          }
>
>          //===== copy transform coefficients =====
> -        uint32_t trWidthC  = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
> -        uint32_t trHeightC = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
> -        trHeightC = splitIntoSubTUs ? trHeightC >> 1 : trHeightC;
> -        uint32_t numCoeffC = trWidthC * trHeightC;
> +        uint32_t trSizeC  = 1 << trSizeCLog2;
> +        uint32_t numCoeffC = 1 << trSizeCLog2 * 2;
>          uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
>
> -        if (chromaId == 1)
> -        {
> -            coeff_t* coeffDstU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> -            coeff_t* coeffSrcU = m_qtTempTUCoeffCb;
> -            ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> -        }
> -        if (chromaId == 2)
> -        {
> -            coeff_t* coeffDstV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> -            coeff_t* coeffSrcV = m_qtTempTUCoeffCr;
> -            ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
> -        }
> +        coeff_t* coeffDst = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> +        coeff_t* coeffSrc = m_qtTempTUCoeff[chromaId];
> +        ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
>
>          //===== copy reconstruction =====
>          uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
> @@ -1111,18 +1075,9 @@
>          uint32_t reconQtStride    = m_qtTempShortYuv[qtlayer].m_cwidth;
>          uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
>
> -        if (chromaId == 1)
> -        {
> -            pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> -            int16_t* reconQt  = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdx);
> -            primitives.blockcpy_ps(trWidthC, trHeightC, reconIPred, reconIPredStride, reconQt, reconQtStride);
> -        }
> -        if (chromaId == 2)
> -        {
> -            pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> -            int16_t* reconQt  = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdx);
> -            primitives.blockcpy_ps(trWidthC, trHeightC, reconIPred, reconIPredStride, reconQt, reconQtStride);
> -        }
> +        pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> +        int16_t* reconQt  = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
> +        primitives.blockcpy_ps(trSizeC, trSizeC, reconIPred, reconIPredStride, reconQt, reconQtStride);
>      }
>  }
>
> @@ -1216,7 +1171,7 @@
>              }
>          }
>
> -        for (int chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> +        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
>          {
>              TComTURecurse tuIterator;
>              uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) +  actualTrDepth) << 1);
> @@ -1225,7 +1180,7 @@
>              do
>              {
>                  uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                pixel*   pred        = (chromaId == 1) ? predYuv->getCbAddr(absPartIdxC) : predYuv->getCrAddr(absPartIdxC);
> +                pixel*   pred        = predYuv->getChromaAddr(chromaId, absPartIdxC);
>
>                  //===== init availability pattern =====
>                  TComPattern::initAdiPatternChroma(cu, absPartIdxC, actualTrDepth, m_predBuf, chromaId);
> @@ -1298,7 +1253,7 @@
>
>                      if (bestModeId == firstCheckId)
>                      {
> -                        xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
> +                        xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId);
>                          cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                          m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
> @@ -1379,10 +1334,10 @@
>          uint32_t numCoeffC = width * height;
>          uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
>
> -        coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> -        coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> -        coeff_t* coeffDstU = cu->getCoeffCb()         + coeffOffsetC;
> -        coeff_t* coeffDstV = cu->getCoeffCr()         + coeffOffsetC;
> +        coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> +        coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> +        coeff_t* coeffDstU = cu->getCoeffCb()          + coeffOffsetC;
> +        coeff_t* coeffDstV = cu->getCoeffCr()          + coeffOffsetC;
>          ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
>          ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
>
> @@ -1433,7 +1388,7 @@
>          const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>          int sizeIdx = g_convertToBit[tuSize];
>
> -        for (int chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> +        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
>          {
>              TComTURecurse tuIterator;
>              uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) +  actualTrDepth) << 1);
> @@ -1442,20 +1397,21 @@
>              do
>              {
>                  uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                cu->setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
> -
> -                TextType ttype          = (chromaId == 1) ? TEXT_CHROMA_U : TEXT_CHROMA_V;
> -                pixel*   fenc           = (chromaId == 1) ? fencYuv->getCbAddr(absPartIdxC) : fencYuv->getCrAddr(absPartIdxC);
> -                pixel*   pred           = (chromaId == 1) ? predYuv->getCbAddr(absPartIdxC) : predYuv->getCrAddr(absPartIdxC);
> -                int16_t* residual       = (chromaId == 1) ? resiYuv->getCbAddr(absPartIdxC) : resiYuv->getCrAddr(absPartIdxC);
> -                pixel*   recon          = (chromaId == 1) ? reconYuv->getCbAddr(absPartIdxC) : reconYuv->getCrAddr(absPartIdxC);
> +
> +                TextType ttype          = (TextType)chromaId;
> +                pixel*   fenc           = fencYuv->getChromaAddr(chromaId, absPartIdxC);
> +                pixel*   pred           = predYuv->getChromaAddr(chromaId, absPartIdxC);
> +                int16_t* residual       = resiYuv->getChromaAddr(chromaId, absPartIdxC);
> +                pixel*   recon          = reconYuv->getChromaAddr(chromaId, absPartIdxC);
>                  uint32_t coeffOffsetC   = absPartIdxC << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> -                coeff_t* coeff          = (chromaId == 1 ? cu->getCoeffCb() : cu->getCoeffCr()) + coeffOffsetC;
> +                coeff_t* coeff          = cu->getCoeff(ttype) + coeffOffsetC;
>                  uint32_t zorder         = cu->getZorderIdxInCU() + absPartIdxC;
> -                pixel*   reconIPred     = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> +                pixel*   reconIPred     = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
>                  uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
> +
>                  //bool     useTransformSkipChroma = cu->getTransformSkip(absPartIdxC, ttype);
>                  const bool useTransformSkipChroma = false;
> +                cu->setTransformSkipPartRange(0, ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
>                  //===== update chroma mode =====
> @@ -1576,7 +1532,7 @@
>
>          //===== determine set of modes to be tested (using prediction signal only) =====
>          const int numModesAvailable = 35; //total number of Intra modes
> -        pixel*   fenc   = fencYuv->getLumaAddr(pu, tuSize);
> +        pixel*   fenc   = fencYuv->getLumaAddr(partOffset);
>          uint32_t stride = predYuv->getStride();
>          uint32_t rdModeList[FAST_UDI_MAX_RDMODE_NUM];
>          int numModesForFullRD = intraModeNumFast[sizeIdx];
> @@ -1858,11 +1814,11 @@
>          }
>          chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
>          uint64_t cost = 0;
> -        for (int chromaId = 0; chromaId < 2; chromaId++)
> +        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
>          {
> -            pixel* fenc = (chromaId > 0 ? fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
> -            pixel* pred = (chromaId > 0 ? predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
> -            pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId + 1, tuSize, m_predBuf);
> +            pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
> +            pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
> +            pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
>
>              //===== get prediction signal =====
>              predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, scaleTuSize, chFmt);
> @@ -2056,14 +2012,7 @@
>      else
>      {
>          reconStride = cu->getPic()->getPicYuvRec()->getCStride();
> -        if (eText == TEXT_CHROMA_U)
> -        {
> -            reconPic = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + absPartIdx);
> -        }
> -        else
> -        {
> -            reconPic = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + absPartIdx);
> -        }
> +        reconPic = cu->getPic()->getPicYuvRec()->getChromaAddr(eText, cu->getAddr(), cu->getZorderIdxInCU() + absPartIdx);
>          shiftPcm = X265_DEPTH - cu->getSlice()->getSPS()->getPCMBitDepthChroma();
>      }
>
> @@ -2877,7 +2826,7 @@
>
>          uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
>          uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> -        coeff_t *coeffCurY = cu->getCoeffY() + coeffOffsetY;
> +        coeff_t *coeffCurY = cu->getCoeffY()  + coeffOffsetY;
>          coeff_t *coeffCurU = cu->getCoeffCb() + coeffOffsetC;
>          coeff_t *coeffCurV = cu->getCoeffCr() + coeffOffsetC;
>
> @@ -3081,9 +3030,9 @@
>          const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>          uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
>          uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> -        coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> -        coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> -        coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> +        coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> +        coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> +        coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
>
>          cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
>          bool checkTransformSkip   = cu->getSlice()->getPPS()->getUseTransformSkip() && !cu->getCUTransquantBypass(0);
> @@ -3677,7 +3626,7 @@
>          bestCBF[TEXT_LUMA] = cu->getCbf(absPartIdx, TEXT_LUMA, trMode);
>          if (bCodeChroma)
>          {
> -            for (uint32_t chromId = TEXT_CHROMA_U; chromId < MAX_NUM_COMPONENT; chromId++)
> +            for (uint32_t chromId = TEXT_CHROMA_U; chromId <= TEXT_CHROMA_V; chromId++)
>              {
>                  bestCBF[chromId] = cu->getCbf(absPartIdx, (TextType)chromId, trMode);
>                  if (splitIntoSubTUs)
> @@ -3707,7 +3656,7 @@
>          bestCBF[TEXT_LUMA] = cu->getCbf(absPartIdx, TEXT_LUMA, trMode);
>          if (bCodeChroma)
>          {
> -            for (uint32_t chromId = TEXT_CHROMA_U; chromId < MAX_NUM_COMPONENT; chromId++)
> +            for (uint32_t chromId = TEXT_CHROMA_U; chromId <= TEXT_CHROMA_V; chromId++)
>              {
>                  bestCBF[chromId] = cu->getCbf(absPartIdx, (TextType)chromId, trMode);
>                  if (splitIntoSubTUs)
> @@ -3796,7 +3745,7 @@
>          const uint32_t numberOfSections  = splitIntoSubTUs ? 2 : 1;
>          uint32_t partIdxesPerSubTU  = absPartIdxStep >> (splitIntoSubTUs ? 1 : 0);
>
> -        for (uint32_t chromId = TEXT_CHROMA_U; chromId < MAX_NUM_COMPONENT; chromId++)
> +        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
>          {
>              for (uint32_t subTUIndex = 0; subTUIndex < numberOfSections; subTUIndex++)
>              {
> @@ -3804,12 +3753,12 @@
>
>                  if (splitIntoSubTUs)
>                  {
> -                    const uint8_t combinedCBF = (bestsubTUCBF[chromId][subTUIndex] << subTUDepth) | (bestCBF[chromId] << trMode);
> -                    cu->setCbfPartRange(combinedCBF, (TextType)chromId, subTUPartIdx, partIdxesPerSubTU);
> +                    const uint8_t combinedCBF = (bestsubTUCBF[chromaId][subTUIndex] << subTUDepth) | (bestCBF[chromaId] << trMode);
> +                    cu->setCbfPartRange(combinedCBF, (TextType)chromaId, subTUPartIdx, partIdxesPerSubTU);
>                  }
>                  else
>                  {
> -                    cu->setCbfPartRange((bestCBF[chromId] << trMode), (TextType)chromId, subTUPartIdx, partIdxesPerSubTU);
> +                    cu->setCbfPartRange((bestCBF[chromaId] << trMode), (TextType)chromaId, subTUPartIdx, partIdxesPerSubTU);
>                  }
>              }
>          }
> @@ -3872,7 +3821,7 @@
>          //Luma
>          const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>          uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> -        coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> +        coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
>
>          //Chroma
>          bool bCodeChroma = true;
> @@ -3898,8 +3847,8 @@
>              if (bCodeChroma)
>              {
>                  uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> -                coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> -                coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> +                coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> +                coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
>                  uint32_t trSizeC = 1 << trSizeCLog2;
>
>                  if (!splitIntoSubTUs)
> @@ -3976,7 +3925,7 @@
>          if (bSpatial)
>          {
>              uint32_t trSize = 1 << trSizeLog2;
> -            m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize, trSize);
> +            m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize);
>
>              if (bCodeChroma)
>              {
> @@ -3987,18 +3936,18 @@
>          {
>              uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
>              uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> -            coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> -            coeff_t* coeffDstY = cu->getCoeffY() + coeffOffsetY;
> +            coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> +            coeff_t* coeffDstY = cu->getCoeffY()           + coeffOffsetY;
>              ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>              if (bCodeChroma)
>              {
>                  uint32_t numCoeffC = 1 << (trSizeCLog2 * 2 + (chFmt == CHROMA_422));
>                  uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
>
> -                coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> -                coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> -                coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
> -                coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
> +                coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> +                coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> +                coeff_t* coeffDstU = cu->getCoeffCb()          + coeffOffsetC;
> +                coeff_t* coeffDstV = cu->getCoeffCr()          + coeffOffsetC;
>                  ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
>                  ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
>              }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h       Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.h       Wed May 28 16:48:47 2014 +0900
> @@ -115,15 +115,11 @@
>
>      ShortYuv*       m_qtTempShortYuv;
>
> -    coeff_t**       m_qtTempCoeffY;
> -    coeff_t**       m_qtTempCoeffCb;
> -    coeff_t**       m_qtTempCoeffCr;
> +    coeff_t**       m_qtTempCoeff[3];
>      uint8_t*        m_qtTempTrIdx;
>      uint8_t*        m_qtTempCbf[3];
>
> -    coeff_t*        m_qtTempTUCoeffY;
> -    coeff_t*        m_qtTempTUCoeffCb;
> -    coeff_t*        m_qtTempTUCoeffCr;
> +    coeff_t*        m_qtTempTUCoeff[3];
>      uint8_t*        m_qtTempTransformSkipFlag[3];
>      TComYuv         m_qtTempTransformSkipYuv;
>
> @@ -230,7 +226,7 @@
>      void xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
>      void xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
>      void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
> -    void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
> +    void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId);
>
>      // --------------------------------------------------------------------------------------------
>      // Inter search (AMP)
> diff -r 807ee7f1597b -r 8e2f16c13099 source/common/shortyuv.cpp
> --- a/source/common/shortyuv.cpp        Tue May 27 23:22:21 2014 +0530
> +++ b/source/common/shortyuv.cpp        Wed May 28 16:48:47 2014 +0900
> @@ -35,13 +35,15 @@
>
>  ShortYuv::ShortYuv()
>  {
> -    m_bufY = NULL;
> -    m_bufCb = NULL;
> -    m_bufCr = NULL;
> +    m_buf[0] = NULL;
> +    m_buf[1] = NULL;
> +    m_buf[2] = NULL;
>  }
>
>  ShortYuv::~ShortYuv()
> -{}
> +{
> +    destroy();
> +}
>
>  bool ShortYuv::create(uint32_t width, uint32_t height, int csp)
>  {
> @@ -56,9 +58,12 @@
>      m_cwidth  = width  >> m_hChromaShift;
>      m_cheight = height >> m_vChromaShift;
>
> -    CHECKED_MALLOC(m_bufY, int16_t, width * height);
> -    CHECKED_MALLOC(m_bufCb, int16_t, m_cwidth * m_cheight);
> -    CHECKED_MALLOC(m_bufCr, int16_t, m_cwidth * m_cheight);
> +    uint32_t sizeL = width * height;
> +    uint32_t sizeC = m_cwidth * m_cheight;
> +    X265_CHECK((sizeC & 15) == 0, "invalid size");
> +    CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> +    m_buf[1] = m_buf[0] + sizeL;
> +    m_buf[2] = m_buf[0] + sizeL + sizeC;
>      return true;
>
>  fail:
> @@ -67,19 +72,17 @@
>
>  void ShortYuv::destroy()
>  {
> -    X265_FREE(m_bufY);
> -    m_bufY = NULL;
> -    X265_FREE(m_bufCb);
> -    m_bufCb = NULL;
> -    X265_FREE(m_bufCr);
> -    m_bufCr = NULL;
> +    X265_FREE(m_buf[0]);
> +    m_buf[0] = NULL;
> +    m_buf[1] = NULL;
> +    m_buf[2] = NULL;
>  }
>
>  void ShortYuv::clear()
>  {
> -    ::memset(m_bufY,  0, (m_width  * m_height) * sizeof(int16_t));
> -    ::memset(m_bufCb, 0, (m_cwidth * m_cheight) * sizeof(int16_t));
> -    ::memset(m_bufCr, 0, (m_cwidth * m_cheight) * sizeof(int16_t));
> +    ::memset(m_buf[0], 0, (m_width  * m_height) * sizeof(int16_t));
> +    ::memset(m_buf[1], 0, (m_cwidth * m_cheight) * sizeof(int16_t));
> +    ::memset(m_buf[2], 0, (m_cwidth * m_cheight) * sizeof(int16_t));
>  }
>
>  void ShortYuv::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partSize)
> @@ -116,6 +119,24 @@
>      primitives.pixeladd_ss(cpartSize, cpartSize, getCrAddr(), m_cwidth, srcV0, srcV1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
>  }
>
> +void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize)
> +{
> +    int part = partitionFromSize(partSize);
> +    int16_t* src = getLumaAddr(partIdx);
> +    int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
> +
> +    primitives.luma_copy_ss[part](dst, dstPicYuv->m_width, src, m_width);
> +}
> +
> +void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize)
> +{
> +    int part = partitionFromSize(partSize);
> +    int16_t* src = getLumaAddr(partIdx);
> +    pixel* dst = dstPicYuv->getLumaAddr(partIdx);
> +
> +    primitives.luma_copy_sp[part](dst, dstPicYuv->getStride(), src, m_width);
> +}
> +
>  void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
>  {
>      int part = partitionFromSizes(width, height);
> @@ -181,35 +202,15 @@
>
>  void ShortYuv::copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
>  {
> +    X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
> +
>      int part = partitionFromSize(lumaSize);
>
> -    if (chromaId == 0)
> -    {
> -        int16_t* srcU = getCbAddr(partIdx);
> -        int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> -        uint32_t srcStride = m_cwidth;
> -        uint32_t dstStride = dstPicYuv->m_cwidth;
> -        primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
> -    }
> -    else if (chromaId == 1)
> -    {
> -        int16_t* srcV = getCrAddr(partIdx);
> -        int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> -        uint32_t srcStride = m_cwidth;
> -        uint32_t dstStride = dstPicYuv->m_cwidth;
> -        primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
> -    }
> -    else
> -    {
> -        int16_t* srcU = getCbAddr(partIdx);
> -        int16_t* srcV = getCrAddr(partIdx);
> -        int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> -        int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> -        uint32_t srcStride = m_cwidth;
> -        uint32_t dstStride = dstPicYuv->m_cwidth;
> -        primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
> -        primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
> -    }
> +    int16_t* src = getChromaAddr(chromaId, partIdx);
> +    int16_t* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
> +    uint32_t srcStride = m_cwidth;
> +    uint32_t dstStride = dstPicYuv->m_cwidth;
> +    primitives.chroma[m_csp].copy_ss[part](dst, dstStride, src, srcStride);
>  }
>
>  void ShortYuv::copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
> @@ -218,20 +219,9 @@
>
>      int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
>
> -    if (chromaId == 1)
> -    {
> -        int16_t* srcU = getCbAddr(partIdx);
> -        pixel* dstU = dstPicYuv->getCbAddr(partIdx);
> -        uint32_t srcStride = m_cwidth;
> -        uint32_t dstStride = dstPicYuv->getCStride();
> -        primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
> -    }
> -    else
> -    {
> -        int16_t* srcV = getCrAddr(partIdx);
> -        pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> -        uint32_t srcStride = m_cwidth;
> -        uint32_t dstStride = dstPicYuv->getCStride();
> -        primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
> -    }
> +    int16_t* src = getChromaAddr(chromaId, partIdx);
> +    pixel* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
> +    uint32_t srcStride = m_cwidth;
> +    uint32_t dstStride = dstPicYuv->getCStride();
> +    primitives.chroma[m_csp].copy_sp[part](dst, dstStride, src, srcStride);
>  }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/common/shortyuv.h
> --- a/source/common/shortyuv.h  Tue May 27 23:22:21 2014 +0530
> +++ b/source/common/shortyuv.h  Wed May 28 16:48:47 2014 +0900
> @@ -37,9 +37,7 @@
>  {
>  public:
>
> -    int16_t* m_bufY;
> -    int16_t* m_bufCb;
> -    int16_t* m_bufCr;
> +    int16_t* m_buf[3];
>
>      uint32_t m_width;
>      uint32_t m_height;
> @@ -51,7 +49,7 @@
>      int m_vChromaShift;
>
>      ShortYuv();
> -    virtual ~ShortYuv();
> +    ~ShortYuv();
>
>      int getChromaAddrOffset(uint32_t partUnitIdx, uint32_t width)
>      {
> @@ -69,56 +67,37 @@
>          return blkX + blkY * width;
>      }
>
> -    static int getAddrOffset(uint32_t idx, uint32_t size, uint32_t width)
> -    {
> -        int blkX = (idx * size) &  (width - 1);
> -        int blkY = (idx * size) & ~(width - 1);
> -
> -        return blkX + blkY * size;
> -    }
> -
> -    int getChromaAddrOffset(uint32_t unitIdx, uint32_t size, uint32_t width)
> -    {
> -        int blkX = (unitIdx * size) &  (width - 1);
> -        int blkY = (unitIdx * size) & ~(width - 1);
> -
> -        if (m_csp == CHROMA_422) blkY <<= 1;
> -
> -        return blkX + blkY * size;
> -    }
> -
>      bool create(uint32_t width, uint32_t height, int csp);
>
>      void destroy();
>      void clear();
>
> -    int16_t* getLumaAddr()  { return m_bufY; }
> +    int16_t* getLumaAddr()  { return m_buf[0]; }
>
> -    int16_t* getCbAddr()    { return m_bufCb; }
> +    int16_t* getCbAddr()    { return m_buf[1]; }
>
> -    int16_t* getCrAddr()    { return m_bufCr; }
> +    int16_t* getCrAddr()    { return m_buf[2]; }
> +
> +    int16_t* getChromaAddr(uint32_t chromaId)    { return m_buf[chromaId]; }
>
>      // Access starting position of YUV partition unit buffer
> -    int16_t* getLumaAddr(uint32_t partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }
> +    int16_t* getLumaAddr(uint32_t partUnitIdx) { return m_buf[0] + getAddrOffset(partUnitIdx, m_width); }
>
> -    int16_t* getCbAddr(uint32_t partUnitIdx) { return m_bufCb + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> +    int16_t* getCbAddr(uint32_t partUnitIdx) { return m_buf[1] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> -    int16_t* getCrAddr(uint32_t partUnitIdx) { return m_bufCr + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> +    int16_t* getCrAddr(uint32_t partUnitIdx) { return m_buf[2] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> -    // Access starting position of YUV transform unit buffer
> -    int16_t* getLumaAddr(uint32_t partIdx, uint32_t size) { return m_bufY + getAddrOffset(partIdx, size, m_width); }
> -
> -    int16_t* getCbAddr(uint32_t partIdx, uint32_t size) { return m_bufCb + getChromaAddrOffset(partIdx, size, m_cwidth); }
> -
> -    int16_t* getCrAddr(uint32_t partIdx, uint32_t size) { return m_bufCr + getChromaAddrOffset(partIdx, size, m_cwidth); }
> +    int16_t* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
>      void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partSize);
>      void addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize);
>
> +    void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize);
>      void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
>      void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
>      void copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
>
> +    void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize);
>      void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
>      void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
>      void copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs);
> @@ -128,6 +107,7 @@
>      // -------------------------------------------------------------------------------------------------------------------
>
>      int  getHorzChromaShift()  { return m_hChromaShift; }
> +
>      int  getVertChromaShift()  { return m_vChromaShift; }
>  };
>  }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/encoder/reference.cpp
> --- a/source/encoder/reference.cpp      Tue May 27 23:22:21 2014 +0530
> +++ b/source/encoder/reference.cpp      Wed May 28 16:48:47 2014 +0900
> @@ -43,7 +43,7 @@
>      intptr_t startpad = pic->m_lumaMarginY * lumaStride + pic->m_lumaMarginX;
>
>      /* directly reference the pre-extended integer pel plane */
> -    fpelPlane = pic->m_picBufY + startpad;
> +    fpelPlane = pic->m_picBuf[0] + startpad;
>      isWeighted = false;
>
>      if (w)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list