[x265] refine YUV and coeff buffer
Steve Borho
steve at borho.org
Wed May 28 17:07:28 CEST 2014
On Wed, May 28, 2014 at 2:50 AM, Satoshi Nakagawa <nakagawa424 at oki.com> wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1401263327 -32400
> # Wed May 28 16:48:47 2014 +0900
> # Node ID 8e2f16c13099ec0d4252055ae41523299b5b62da
> # Parent 807ee7f1597b3aea8ed0c09ee49cd0778f28e0d5
> refine YUV and coeff buffer
staged for testing, thanks
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComDataCU.cpp
> --- a/source/Lib/TLibCommon/TComDataCU.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.cpp Wed May 28 16:48:47 2014 +0900
> @@ -80,9 +80,9 @@
> m_cbf[0] = NULL;
> m_cbf[1] = NULL;
> m_cbf[2] = NULL;
> - m_trCoeffY = NULL;
> - m_trCoeffCb = NULL;
> - m_trCoeffCr = NULL;
> + m_trCoeff[0] = NULL;
> + m_trCoeff[1] = NULL;
> + m_trCoeff[2] = NULL;
> m_iPCMFlags = NULL;
> m_iPCMSampleY = NULL;
> m_iPCMSampleCb = NULL;
> @@ -148,9 +148,9 @@
> CHECKED_MALLOC(m_mvpIdx[0], uint8_t, numPartition * 2);
> m_mvpIdx[1] = m_mvpIdx[0] + numPartition;
>
> - CHECKED_MALLOC(m_trCoeffY, coeff_t, sizeL + sizeC * 2);
> - m_trCoeffCb = m_trCoeffY + sizeL;
> - m_trCoeffCr = m_trCoeffY + sizeL + sizeC;
> + CHECKED_MALLOC(m_trCoeff[0], coeff_t, sizeL + sizeC * 2);
> + m_trCoeff[1] = m_trCoeff[0] + sizeL;
> + m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
>
> CHECKED_MALLOC(m_iPCMFlags, bool, numPartition);
> CHECKED_MALLOC(m_iPCMSampleY, pixel, sizeL + sizeC * 2);
> @@ -177,7 +177,7 @@
> X265_FREE(m_chromaIntraDir);
> X265_FREE(m_trIdx);
> X265_FREE(m_transformSkip[0]);
> - X265_FREE(m_trCoeffY);
> + X265_FREE(m_trCoeff[0]);
> X265_FREE(m_iPCMFlags);
> X265_FREE(m_iPCMSampleY);
> X265_FREE(m_mvpIdx[0]);
> @@ -607,12 +607,12 @@
>
> uint32_t tmp = g_maxCUSize * g_maxCUSize >> (depth << 1);
> uint32_t tmp2 = partUnitIdx * tmp;
> - memcpy(m_trCoeffY + tmp2, cu->getCoeffY(), sizeof(coeff_t) * tmp);
> + memcpy(m_trCoeff[0] + tmp2, cu->getCoeffY(), sizeof(coeff_t) * tmp);
> memcpy(m_iPCMSampleY + tmp2, cu->getPCMSampleY(), sizeof(pixel) * tmp);
> tmp >>= m_hChromaShift + m_vChromaShift;
> tmp2 >>= m_hChromaShift + m_vChromaShift;
> - memcpy(m_trCoeffCb + tmp2, cu->getCoeffCb(), sizeof(coeff_t) * tmp);
> - memcpy(m_trCoeffCr + tmp2, cu->getCoeffCr(), sizeof(coeff_t) * tmp);
> + memcpy(m_trCoeff[1] + tmp2, cu->m_trCoeff[1], sizeof(coeff_t) * tmp);
> + memcpy(m_trCoeff[2] + tmp2, cu->m_trCoeff[2], sizeof(coeff_t) * tmp);
> memcpy(m_iPCMSampleCb + tmp2, cu->getPCMSampleCb(), sizeof(pixel) * tmp);
> memcpy(m_iPCMSampleCr + tmp2, cu->getPCMSampleCr(), sizeof(pixel) * tmp);
> }
> @@ -665,12 +665,12 @@
>
> uint32_t tmp = (g_maxCUSize * g_maxCUSize) >> (depth << 1);
> uint32_t tmp2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
> - memcpy(rpcCU->getCoeffY() + tmp2, m_trCoeffY, sizeof(coeff_t) * tmp);
> + memcpy(rpcCU->getCoeffY() + tmp2, m_trCoeff[0], sizeof(coeff_t) * tmp);
> memcpy(rpcCU->getPCMSampleY() + tmp2, m_iPCMSampleY, sizeof(pixel) * tmp);
> tmp >>= m_hChromaShift + m_vChromaShift;
> tmp2 >>= m_hChromaShift + m_vChromaShift;
> - memcpy(rpcCU->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(coeff_t) * tmp);
> - memcpy(rpcCU->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(coeff_t) * tmp);
> + memcpy(rpcCU->m_trCoeff[1] + tmp2, m_trCoeff[1], sizeof(coeff_t) * tmp);
> + memcpy(rpcCU->m_trCoeff[2] + tmp2, m_trCoeff[2], sizeof(coeff_t) * tmp);
> memcpy(rpcCU->getPCMSampleCb() + tmp2, m_iPCMSampleCb, sizeof(pixel) * tmp);
> memcpy(rpcCU->getPCMSampleCr() + tmp2, m_iPCMSampleCr, sizeof(pixel) * tmp);
> }
> @@ -695,11 +695,11 @@
>
> uint32_t tmp = (g_maxCUSize * g_maxCUSize) >> (depth << 1);
> uint32_t tmp2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
> - memcpy(rpcCU->getCoeffY() + tmp2, m_trCoeffY, sizeof(coeff_t) * tmp);
> + memcpy(rpcCU->getCoeffY() + tmp2, m_trCoeff[0], sizeof(coeff_t) * tmp);
> tmp >>= m_hChromaShift + m_vChromaShift;
> tmp2 >>= m_hChromaShift + m_vChromaShift;
> - memcpy(rpcCU->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(coeff_t) * tmp);
> - memcpy(rpcCU->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(coeff_t) * tmp);
> + memcpy(rpcCU->m_trCoeff[1] + tmp2, m_trCoeff[1], sizeof(coeff_t) * tmp);
> + memcpy(rpcCU->m_trCoeff[2] + tmp2, m_trCoeff[2], sizeof(coeff_t) * tmp);
> }
>
> void TComDataCU::copyToPic(uint8_t depth, uint32_t partIdx, uint32_t partDepth)
> @@ -748,12 +748,12 @@
>
> uint32_t tmp = (g_maxCUSize * g_maxCUSize) >> ((depth + partDepth) << 1);
> uint32_t tmp2 = partOffset << m_pic->getLog2UnitSize() * 2;
> - memcpy(cu->getCoeffY() + tmp2, m_trCoeffY, sizeof(coeff_t) * tmp);
> + memcpy(cu->getCoeffY() + tmp2, m_trCoeff[0], sizeof(coeff_t) * tmp);
> memcpy(cu->getPCMSampleY() + tmp2, m_iPCMSampleY, sizeof(pixel) * tmp);
> tmp >>= m_hChromaShift + m_vChromaShift;
> tmp2 >>= m_hChromaShift + m_vChromaShift;
> - memcpy(cu->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(coeff_t) * tmp);
> - memcpy(cu->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(coeff_t) * tmp);
> + memcpy(cu->m_trCoeff[1] + tmp2, m_trCoeff[1], sizeof(coeff_t) * tmp);
> + memcpy(cu->m_trCoeff[2] + tmp2, m_trCoeff[2], sizeof(coeff_t) * tmp);
> memcpy(cu->getPCMSampleCb() + tmp2, m_iPCMSampleCb, sizeof(pixel) * tmp);
> memcpy(cu->getPCMSampleCr() + tmp2, m_iPCMSampleCr, sizeof(pixel) * tmp);
> }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.h Wed May 28 16:48:47 2014 +0900
> @@ -112,9 +112,7 @@
> uint8_t* m_transformSkip[3]; ///< array of transform skipping flags
> uint8_t* m_cbf[3]; ///< array of coded block flags (CBF)
> TComCUMvField m_cuMvField[2]; ///< array of motion vectors
> - coeff_t* m_trCoeffY; ///< transformed coefficient buffer (Y)
> - coeff_t* m_trCoeffCb; ///< transformed coefficient buffer (Cb)
> - coeff_t* m_trCoeffCr; ///< transformed coefficient buffer (Cr)
> + coeff_t* m_trCoeff[3]; ///< transformed coefficient buffer
>
> pixel* m_iPCMSampleY; ///< PCM sample buffer (Y)
> pixel* m_iPCMSampleCb; ///< PCM sample buffer (Cb)
> @@ -279,11 +277,13 @@
>
> TComCUMvField* getCUMvField(int e) { return &m_cuMvField[e]; }
>
> - coeff_t*& getCoeffY() { return m_trCoeffY; }
> + coeff_t* getCoeffY() { return m_trCoeff[0]; }
>
> - coeff_t*& getCoeffCb() { return m_trCoeffCb; }
> + coeff_t* getCoeffCb() { return m_trCoeff[1]; }
>
> - coeff_t*& getCoeffCr() { return m_trCoeffCr; }
> + coeff_t* getCoeffCr() { return m_trCoeff[2]; }
> +
> + coeff_t* getCoeff(TextType ttype) { return m_trCoeff[ttype]; }
>
> pixel*& getPCMSampleY() { return m_iPCMSampleY; }
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPattern.cpp
> --- a/source/Lib/TLibCommon/TComPattern.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPattern.cpp Wed May 28 16:48:47 2014 +0900
> @@ -163,7 +163,7 @@
> }
> }
>
> -void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int chromaId)
> +void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, uint32_t chromaId)
> {
> pixel* roiOrigin;
> pixel* adiTemp;
> @@ -175,7 +175,7 @@
> initIntraNeighbors(cu, zOrderIdxInPart, partDepth, TEXT_CHROMA, &intraNeighbors);
> uint32_t tuSize = intraNeighbors.tuSize;
>
> - roiOrigin = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
> + roiOrigin = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
> adiTemp = getAdiChromaBuf(chromaId, tuSize, adiBuf);
>
> fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPattern.h
> --- a/source/Lib/TLibCommon/TComPattern.h Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPattern.h Wed May 28 16:48:47 2014 +0900
> @@ -70,7 +70,7 @@
> public:
>
> // access functions of ADI buffers
> - static pixel* getAdiChromaBuf(int chromaId, int tuSize, pixel* adiBuf)
> + static pixel* getAdiChromaBuf(uint32_t chromaId, int tuSize, pixel* adiBuf)
> {
> return adiBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (tuSize * 2 + 1));
> }
> @@ -86,7 +86,7 @@
>
> /// set chroma parameters from CU data for accessing ADI data
> static void initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
> - pixel* adiBuf, int chromaId);
> + pixel* adiBuf, uint32_t chromaId);
>
> static void initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *IntraNeighbors);
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp Wed May 28 16:48:47 2014 +0900
> @@ -46,13 +46,13 @@
>
> TComPicYuv::TComPicYuv()
> {
> - m_picBufY = NULL; // Buffer (including margin)
> - m_picBufU = NULL;
> - m_picBufV = NULL;
> + m_picBuf[0] = NULL; // Buffer (including margin)
> + m_picBuf[1] = NULL;
> + m_picBuf[2] = NULL;
>
> - m_picOrgY = NULL; // m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
> - m_picOrgU = NULL;
> - m_picOrgV = NULL;
> + m_picOrg[0] = NULL; // m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
> + m_picOrg[1] = NULL;
> + m_picOrg[2] = NULL;
>
> m_cuOffsetY = NULL;
> m_cuOffsetC = NULL;
> @@ -88,13 +88,13 @@
> m_strideC = ((m_numCuInWidth * g_maxCUSize) >> m_hChromaShift) + (m_chromaMarginX * 2);
> int maxHeight = m_numCuInHeight * g_maxCUSize;
>
> - CHECKED_MALLOC(m_picBufY, pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
> - CHECKED_MALLOC(m_picBufU, pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> - CHECKED_MALLOC(m_picBufV, pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> + CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
> + CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> + CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
>
> - m_picOrgY = m_picBufY + m_lumaMarginY * getStride() + m_lumaMarginX;
> - m_picOrgU = m_picBufU + m_chromaMarginY * getCStride() + m_chromaMarginX;
> - m_picOrgV = m_picBufV + m_chromaMarginY * getCStride() + m_chromaMarginX;
> + m_picOrg[0] = m_picBuf[0] + m_lumaMarginY * getStride() + m_lumaMarginX;
> + m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * getCStride() + m_chromaMarginX;
> + m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * getCStride() + m_chromaMarginX;
>
> /* TODO: these four buffers are the same for every TComPicYuv in the encoder */
> CHECKED_MALLOC(m_cuOffsetY, int, m_numCuInWidth * m_numCuInHeight);
> @@ -127,9 +127,9 @@
>
> void TComPicYuv::destroy()
> {
> - X265_FREE(m_picBufY);
> - X265_FREE(m_picBufU);
> - X265_FREE(m_picBufV);
> + X265_FREE(m_picBuf[0]);
> + X265_FREE(m_picBuf[1]);
> + X265_FREE(m_picBuf[2]);
> X265_FREE(m_cuOffsetY);
> X265_FREE(m_cuOffsetC);
> X265_FREE(m_buOffsetY);
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.h Wed May 28 16:48:47 2014 +0900
> @@ -64,13 +64,9 @@
> // YUV buffer
> // ------------------------------------------------------------------------------------------------
>
> - pixel* m_picBufY; ///< Buffer (including margin)
> - pixel* m_picBufU;
> - pixel* m_picBufV;
> + pixel* m_picBuf[3]; ///< Buffer (including margin)
>
> - pixel* m_picOrgY; ///< m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
> - pixel* m_picOrgU;
> - pixel* m_picOrgV;
> + pixel* m_picOrg[3]; ///< m_apiPicBufY + m_iMarginLuma*getStride() + m_iMarginLuma
>
> // ------------------------------------------------------------------------------------------------
> // Parameter for general YUV buffer usage
> @@ -131,32 +127,31 @@
> // Access function for picture buffer
> // ------------------------------------------------------------------------------------------------
>
> - // Access starting position of picture buffer with margin
> - pixel* getBufY() { return m_picBufY; }
> + // Access starting position of original picture
> + pixel* getLumaAddr() { return m_picOrg[0]; }
>
> - pixel* getBufU() { return m_picBufU; }
> + pixel* getCbAddr() { return m_picOrg[1]; }
>
> - pixel* getBufV() { return m_picBufV; }
> + pixel* getCrAddr() { return m_picOrg[2]; }
>
> - // Access starting position of original picture
> - pixel* getLumaAddr() { return m_picOrgY; }
> -
> - pixel* getCbAddr() { return m_picOrgU; }
> -
> - pixel* getCrAddr() { return m_picOrgV; }
> + pixel* getChromaAddr(uint32_t chromaId) { return m_picOrg[chromaId]; }
>
> // Access starting position of original picture for specific coding unit (CU) or partition unit (PU)
> - pixel* getLumaAddr(int cuAddr) { return m_picOrgY + m_cuOffsetY[cuAddr]; }
> + pixel* getLumaAddr(int cuAddr) { return m_picOrg[0] + m_cuOffsetY[cuAddr]; }
>
> - pixel* getCbAddr(int cuAddr) { return m_picOrgU + m_cuOffsetC[cuAddr]; }
> + pixel* getCbAddr(int cuAddr) { return m_picOrg[1] + m_cuOffsetC[cuAddr]; }
>
> - pixel* getCrAddr(int cuAddr) { return m_picOrgV + m_cuOffsetC[cuAddr]; }
> + pixel* getCrAddr(int cuAddr) { return m_picOrg[2] + m_cuOffsetC[cuAddr]; }
>
> - pixel* getLumaAddr(int cuAddr, int absZOrderIdx) { return m_picOrgY + m_cuOffsetY[cuAddr] + m_buOffsetY[g_zscanToRaster[absZOrderIdx]]; }
> + pixel* getChromaAddr(uint32_t chromaId, int cuAddr) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr]; }
>
> - pixel* getCbAddr(int cuAddr, int absZOrderIdx) { return m_picOrgU + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> + pixel* getLumaAddr(int cuAddr, int absZOrderIdx) { return m_picOrg[0] + m_cuOffsetY[cuAddr] + m_buOffsetY[g_zscanToRaster[absZOrderIdx]]; }
>
> - pixel* getCrAddr(int cuAddr, int absZOrderIdx) { return m_picOrgV + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> + pixel* getCbAddr(int cuAddr, int absZOrderIdx) { return m_picOrg[1] + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> +
> + pixel* getCrAddr(int cuAddr, int absZOrderIdx) { return m_picOrg[2] + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
> +
> + pixel* getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[g_zscanToRaster[absZOrderIdx]]; }
>
> uint32_t getCUHeight(int rowNum);
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
> --- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp Wed May 28 16:48:47 2014 +0900
> @@ -566,14 +566,9 @@
> rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> stride = m_pic->getStride();
> }
> - else if (yCbCr == 1)
> - {
> - rec = m_pic->getPicYuvRec()->getCbAddr(addr);
> - stride = m_pic->getCStride();
> - }
> else
> {
> - rec = m_pic->getPicYuvRec()->getCrAddr(addr);
> + rec = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
> stride = m_pic->getCStride();
> }
>
> @@ -848,14 +843,9 @@
> rec = m_pic->getPicYuvRec()->getLumaAddr();
> picWidthTmp = m_picWidth;
> }
> - else if (yCbCr == 1)
> - {
> - rec = m_pic->getPicYuvRec()->getCbAddr();
> - picWidthTmp = m_picWidth >> m_hChromaShift;
> - }
> else
> {
> - rec = m_pic->getPicYuvRec()->getCrAddr();
> + rec = m_pic->getPicYuvRec()->getChromaAddr(yCbCr);
> picWidthTmp = m_picWidth >> m_hChromaShift;
> }
>
> @@ -892,15 +882,9 @@
> stride = m_pic->getStride();
> picWidthTmp = m_picWidth;
> }
> - else if (yCbCr == 1)
> - {
> - rec = m_pic->getPicYuvRec()->getCbAddr(addr);
> - stride = m_pic->getCStride();
> - picWidthTmp = m_picWidth >> m_hChromaShift;
> - }
> else
> {
> - rec = m_pic->getPicYuvRec()->getCrAddr(addr);
> + rec = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
> stride = m_pic->getCStride();
> picWidthTmp = m_picWidth >> m_hChromaShift;
> }
> @@ -977,14 +961,9 @@
> rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> stride = m_pic->getStride();
> }
> - else if (yCbCr == 1)
> - {
> - rec = m_pic->getPicYuvRec()->getCbAddr(addr);
> - stride = m_pic->getCStride();
> - }
> else
> {
> - rec = m_pic->getPicYuvRec()->getCrAddr(addr);
> + rec = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
> stride = m_pic->getCStride();
> }
>
> @@ -1019,14 +998,9 @@
> rec = m_pic->getPicYuvRec()->getLumaAddr();
> picWidthTmp = m_picWidth;
> }
> - else if (yCbCr == 1)
> - {
> - rec = m_pic->getPicYuvRec()->getCbAddr();
> - picWidthTmp = m_picWidth >> m_hChromaShift;
> - }
> else
> {
> - rec = m_pic->getPicYuvRec()->getCrAddr();
> + rec = m_pic->getPicYuvRec()->getChromaAddr(yCbCr);
> picWidthTmp = m_picWidth >> m_hChromaShift;
> }
>
> @@ -1061,15 +1035,9 @@
> stride = m_pic->getStride();
> picWidthTmp = m_picWidth;
> }
> - else if (yCbCr == 1)
> - {
> - rec = m_pic->getPicYuvRec()->getCbAddr(addr);
> - stride = m_pic->getCStride();
> - picWidthTmp = m_picWidth >> m_hChromaShift;
> - }
> else
> {
> - rec = m_pic->getPicYuvRec()->getCrAddr(addr);
> + rec = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
> stride = m_pic->getCStride();
> picWidthTmp = m_picWidth >> m_hChromaShift;
> }
> @@ -1139,14 +1107,9 @@
> rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> stride = m_pic->getStride();
> }
> - else if (yCbCr == 1)
> - {
> - rec = m_pic->getPicYuvRec()->getCbAddr(addr);
> - stride = m_pic->getCStride();
> - }
> else
> {
> - rec = m_pic->getPicYuvRec()->getCrAddr(addr);
> + rec = m_pic->getPicYuvRec()->getChromaAddr(yCbCr, addr);
> stride = m_pic->getCStride();
> }
>
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComYuv.cpp
> --- a/source/Lib/TLibCommon/TComYuv.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComYuv.cpp Wed May 28 16:48:47 2014 +0900
> @@ -49,13 +49,15 @@
>
> TComYuv::TComYuv()
> {
> - m_bufY = NULL;
> - m_bufU = NULL;
> - m_bufV = NULL;
> + m_buf[0] = NULL;
> + m_buf[1] = NULL;
> + m_buf[2] = NULL;
> }
>
> TComYuv::~TComYuv()
> -{}
> +{
> + destroy();
> +}
>
> bool TComYuv::create(uint32_t width, uint32_t height, int csp)
> {
> @@ -72,10 +74,13 @@
> m_csp = csp;
> m_part = partitionFromSizes(m_width, m_height);
>
> + uint32_t sizeL = width * height;
> + uint32_t sizeC = m_cwidth * m_cheight;
> + X265_CHECK((sizeC & 15) == 0, "invalid size");
> // memory allocation (padded for SIMD reads)
> - CHECKED_MALLOC(m_bufY, pixel, width * height);
> - CHECKED_MALLOC(m_bufU, pixel, m_cwidth * m_cheight + 8);
> - CHECKED_MALLOC(m_bufV, pixel, m_cwidth * m_cheight + 8);
> + CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
> + m_buf[1] = m_buf[0] + sizeL;
> + m_buf[2] = m_buf[0] + sizeL + sizeC;
> return true;
>
> fail:
> @@ -85,55 +90,53 @@
> void TComYuv::destroy()
> {
> // memory free
> - X265_FREE(m_bufY);
> - m_bufY = NULL;
> - X265_FREE(m_bufU);
> - m_bufU = NULL;
> - X265_FREE(m_bufV);
> - m_bufV = NULL;
> + X265_FREE(m_buf[0]);
> + m_buf[0] = NULL;
> + m_buf[1] = NULL;
> + m_buf[2] = NULL;
> }
>
> void TComYuv::clear()
> {
> - ::memset(m_bufY, 0, (m_width * m_height) * sizeof(pixel));
> - ::memset(m_bufU, 0, (m_cwidth * m_cheight) * sizeof(pixel));
> - ::memset(m_bufV, 0, (m_cwidth * m_cheight) * sizeof(pixel));
> + ::memset(m_buf[0], 0, (m_width * m_height) * sizeof(pixel));
> + ::memset(m_buf[1], 0, (m_cwidth * m_cheight) * sizeof(pixel));
> + ::memset(m_buf[2], 0, (m_cwidth * m_cheight) * sizeof(pixel));
> }
>
> void TComYuv::copyToPicYuv(TComPicYuv* destPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx)
> {
> pixel* dstY = destPicYuv->getLumaAddr(cuAddr, absZOrderIdx);
>
> - primitives.luma_copy_pp[m_part](dstY, destPicYuv->getStride(), m_bufY, getStride());
> + primitives.luma_copy_pp[m_part](dstY, destPicYuv->getStride(), m_buf[0], getStride());
>
> pixel* dstU = destPicYuv->getCbAddr(cuAddr, absZOrderIdx);
> pixel* dstV = destPicYuv->getCrAddr(cuAddr, absZOrderIdx);
> - primitives.chroma[m_csp].copy_pp[m_part](dstU, destPicYuv->getCStride(), m_bufU, getCStride());
> - primitives.chroma[m_csp].copy_pp[m_part](dstV, destPicYuv->getCStride(), m_bufV, getCStride());
> + primitives.chroma[m_csp].copy_pp[m_part](dstU, destPicYuv->getCStride(), m_buf[1], getCStride());
> + primitives.chroma[m_csp].copy_pp[m_part](dstV, destPicYuv->getCStride(), m_buf[2], getCStride());
> }
>
> void TComYuv::copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx)
> {
> pixel* srcY = srcPicYuv->getLumaAddr(cuAddr, absZOrderIdx);
>
> - primitives.luma_copy_pp[m_part](m_bufY, getStride(), srcY, srcPicYuv->getStride());
> + primitives.luma_copy_pp[m_part](m_buf[0], getStride(), srcY, srcPicYuv->getStride());
>
> pixel* srcU = srcPicYuv->getCbAddr(cuAddr, absZOrderIdx);
> pixel* srcV = srcPicYuv->getCrAddr(cuAddr, absZOrderIdx);
> - primitives.chroma[m_csp].copy_pp[m_part](m_bufU, getCStride(), srcU, srcPicYuv->getCStride());
> - primitives.chroma[m_csp].copy_pp[m_part](m_bufV, getCStride(), srcV, srcPicYuv->getCStride());
> + primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], getCStride(), srcU, srcPicYuv->getCStride());
> + primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], getCStride(), srcV, srcPicYuv->getCStride());
> }
>
> void TComYuv::copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx)
> {
> pixel* dstY = dstPicYuv->getLumaAddr(partIdx);
>
> - primitives.luma_copy_pp[m_part](dstY, dstPicYuv->getStride(), m_bufY, getStride());
> + primitives.luma_copy_pp[m_part](dstY, dstPicYuv->getStride(), m_buf[0], getStride());
>
> pixel* dstU = dstPicYuv->getCbAddr(partIdx);
> pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> - primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPicYuv->getCStride(), m_bufU, getCStride());
> - primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPicYuv->getCStride(), m_bufV, getCStride());
> + primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPicYuv->getCStride(), m_buf[1], getCStride());
> + primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPicYuv->getCStride(), m_buf[2], getCStride());
> }
>
> void TComYuv::copyPartToYuv(TComYuv* dstPicYuv, uint32_t partIdx)
> @@ -196,41 +199,17 @@
>
> void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
> {
> + X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
> +
> int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
>
> - if (chromaId == 1)
> - {
> - pixel* srcU = getCbAddr(partIdx);
> - int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> + pixel* src = getChromaAddr(chromaId, partIdx);
> + int16_t* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
>
> - uint32_t srcstride = getCStride();
> - uint32_t dststride = dstPicYuv->m_cwidth;
> + uint32_t srcstride = getCStride();
> + uint32_t dststride = dstPicYuv->m_cwidth;
>
> - primitives.chroma[m_csp].copy_ps[part](dstU, dststride, srcU, srcstride);
> - }
> - else if (chromaId == 2)
> - {
> - pixel* srcV = getCrAddr(partIdx);
> - int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> -
> - uint32_t srcstride = getCStride();
> - uint32_t dststride = dstPicYuv->m_cwidth;
> -
> - primitives.chroma[m_csp].copy_ps[part](dstV, dststride, srcV, srcstride);
> - }
> - else
> - {
> - pixel* srcU = getCbAddr(partIdx);
> - pixel* srcV = getCrAddr(partIdx);
> - int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> - int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> -
> - uint32_t srcstride = getCStride();
> - uint32_t dststride = dstPicYuv->m_cwidth;
> -
> - primitives.chroma[m_csp].copy_ps[part](dstU, dststride, srcU, srcstride);
> - primitives.chroma[m_csp].copy_ps[part](dstV, dststride, srcV, srcstride);
> - }
> + primitives.chroma[m_csp].copy_ps[part](dst, dststride, src, srcstride);
> }
>
> void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibCommon/TComYuv.h
> --- a/source/Lib/TLibCommon/TComYuv.h Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibCommon/TComYuv.h Wed May 28 16:48:47 2014 +0900
> @@ -65,9 +65,7 @@
> // YUV buffer
> // ------------------------------------------------------------------------------------------------------------------
>
> - pixel* m_bufY;
> - pixel* m_bufU;
> - pixel* m_bufV;
> + pixel* m_buf[3];
>
> // ------------------------------------------------------------------------------------------------------------------
> // Parameter for general YUV buffer usage
> @@ -98,30 +96,12 @@
> return blkX + blkY * width;
> }
>
> - static int getAddrOffset(uint32_t unitIdx, uint32_t size, uint32_t width)
> - {
> - int blkX = (unitIdx * size) & (width - 1);
> - int blkY = (unitIdx * size) & ~(width - 1);
> -
> - return blkX + blkY * size;
> - }
> -
> - int getChromaAddrOffset(uint32_t unitIdx, uint32_t size, uint32_t width)
> - {
> - int blkX = (unitIdx * size) & (width - 1);
> - int blkY = (unitIdx * size) & ~(width - 1);
> -
> - if (m_csp == CHROMA_422) blkY <<= 1;
> -
> - return blkX + blkY * size;
> - }
> -
> public:
>
> int m_part; // partitionFromSizes(m_width, m_height)
>
> TComYuv();
> - virtual ~TComYuv();
> + ~TComYuv();
>
> // ------------------------------------------------------------------------------------------------------------------
> // Memory management
> @@ -171,25 +151,22 @@
> // ------------------------------------------------------------------------------------------------------------------
>
> // Access starting position of YUV buffer
> - pixel* getLumaAddr() { return m_bufY; }
> + pixel* getLumaAddr() { return m_buf[0]; }
>
> - pixel* getCbAddr() { return m_bufU; }
> + pixel* getCbAddr() { return m_buf[1]; }
>
> - pixel* getCrAddr() { return m_bufV; }
> + pixel* getCrAddr() { return m_buf[2]; }
> +
> + pixel* getChromaAddr(uint32_t chromaId) { return m_buf[chromaId]; }
>
> // Access starting position of YUV partition unit buffer
> - pixel* getLumaAddr(uint32_t partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }
> + pixel* getLumaAddr(uint32_t partUnitIdx) { return m_buf[0] + getAddrOffset(partUnitIdx, m_width); }
>
> - pixel* getCbAddr(uint32_t partUnitIdx) { return m_bufU + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> + pixel* getCbAddr(uint32_t partUnitIdx) { return m_buf[1] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> - pixel* getCrAddr(uint32_t partUnitIdx) { return m_bufV + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> + pixel* getCrAddr(uint32_t partUnitIdx) { return m_buf[2] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> - // Access starting position of YUV transform unit buffer
> - pixel* getLumaAddr(uint32_t transUnitIdx, uint32_t blkSize) { return m_bufY + getAddrOffset(transUnitIdx, blkSize, m_width); }
> -
> - pixel* getCbAddr(uint32_t transUnitIdx, uint32_t blkSize) { return m_bufU + getChromaAddrOffset(transUnitIdx, blkSize, m_cwidth); }
> -
> - pixel* getCrAddr(uint32_t transUnitIdx, uint32_t blkSize) { return m_bufV + getChromaAddrOffset(transUnitIdx, blkSize, m_cwidth); }
> + pixel* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> // Get stride value of YUV buffer
> uint32_t getStride() { return m_width; }
> @@ -209,6 +186,7 @@
> // -------------------------------------------------------------------------------------------------------------------
>
> int getHorzChromaShift() { return m_hChromaShift; }
> +
> int getVertChromaShift() { return m_vChromaShift; }
> };
> }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibEncoder/TEncEntropy.cpp
> --- a/source/Lib/TLibEncoder/TEncEntropy.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Wed May 28 16:48:47 2014 +0900
> @@ -376,11 +376,11 @@
>
> uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
>
> - for (uint32_t chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> TComTURecurse tuIterator;
> initTUEntropySection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, m_bakAbsPartIdx);
> - coeff_t* coeffChroma = (chromaId == 1) ? cu->getCoeffCb() : cu->getCoeffCr();
> + coeff_t* coeffChroma = cu->getCoeff((TextType)chromaId);
> do
> {
> uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
> @@ -399,11 +399,11 @@
> uint32_t trSizeC = tuSize >> hChromaShift;
> const bool splitIntoSubTUs = (chFmt == CHROMA_422);
> uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> (depth << 1);
> - for (uint32_t chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> TComTURecurse tuIterator;
> initTUEntropySection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
> - coeff_t* coeffChroma = (chromaId == 1) ? cu->getCoeffCb() : cu->getCoeffCr();
> + coeff_t* coeffChroma = cu->getCoeff((TextType)chromaId);
> do
> {
> uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Wed May 28 16:48:47 2014 +0900
> @@ -52,14 +52,14 @@
>
> TEncSearch::TEncSearch()
> {
> - m_qtTempCoeffY = NULL;
> - m_qtTempCoeffCb = NULL;
> - m_qtTempCoeffCr = NULL;
> + m_qtTempCoeff[0] = NULL;
> + m_qtTempCoeff[1] = NULL;
> + m_qtTempCoeff[2] = NULL;
> m_qtTempTrIdx = NULL;
> m_qtTempShortYuv = NULL;
> - m_qtTempTUCoeffY = NULL;
> - m_qtTempTUCoeffCb = NULL;
> - m_qtTempTUCoeffCr = NULL;
> + m_qtTempTUCoeff[0] = NULL;
> + m_qtTempTUCoeff[1] = NULL;
> + m_qtTempTUCoeff[2] = NULL;
> for (int i = 0; i < 3; i++)
> {
> m_qtTempTransformSkipFlag[i] = NULL;
> @@ -81,16 +81,16 @@
> const uint32_t numLayersToAllocate = m_cfg->m_quadtreeTULog2MaxSize - m_cfg->m_quadtreeTULog2MinSize + 1;
> for (uint32_t i = 0; i < numLayersToAllocate; ++i)
> {
> - X265_FREE(m_qtTempCoeffY[i]);
> + X265_FREE(m_qtTempCoeff[0][i]);
> m_qtTempShortYuv[i].destroy();
> }
> }
> - X265_FREE(m_qtTempTUCoeffY);
> + X265_FREE(m_qtTempTUCoeff[0]);
> X265_FREE(m_qtTempTrIdx);
> X265_FREE(m_qtTempCbf[0]);
> X265_FREE(m_qtTempTransformSkipFlag[0]);
>
> - delete[] m_qtTempCoeffY;
> + delete[] m_qtTempCoeff[0];
> delete[] m_qtTempShortYuv;
> m_qtTempTransformSkipYuv.destroy();
> }
> @@ -110,17 +110,17 @@
> m_refLagPixels = cfg->param->frameNumThreads > 1 ? cfg->param->searchRange : cfg->param->sourceHeight;
>
> const uint32_t numLayersToAllocate = cfg->m_quadtreeTULog2MaxSize - cfg->m_quadtreeTULog2MinSize + 1;
> - m_qtTempCoeffY = new coeff_t*[numLayersToAllocate * 3];
> - m_qtTempCoeffCb = m_qtTempCoeffY + numLayersToAllocate;
> - m_qtTempCoeffCr = m_qtTempCoeffY + numLayersToAllocate * 2;
> + m_qtTempCoeff[0] = new coeff_t*[numLayersToAllocate * 3];
> + m_qtTempCoeff[1] = m_qtTempCoeff[0] + numLayersToAllocate;
> + m_qtTempCoeff[2] = m_qtTempCoeff[0] + numLayersToAllocate * 2;
> m_qtTempShortYuv = new ShortYuv[numLayersToAllocate];
> uint32_t sizeL = g_maxCUSize * g_maxCUSize;
> uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> for (uint32_t i = 0; i < numLayersToAllocate; ++i)
> {
> - m_qtTempCoeffY[i] = X265_MALLOC(coeff_t, sizeL + sizeC * 2);
> - m_qtTempCoeffCb[i] = m_qtTempCoeffY[i] + sizeL;
> - m_qtTempCoeffCr[i] = m_qtTempCoeffY[i] + sizeL + sizeC;
> + m_qtTempCoeff[0][i] = X265_MALLOC(coeff_t, sizeL + sizeC * 2);
> + m_qtTempCoeff[1][i] = m_qtTempCoeff[0][i] + sizeL;
> + m_qtTempCoeff[2][i] = m_qtTempCoeff[0][i] + sizeL + sizeC;
> m_qtTempShortYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, cfg->param->internalCsp);
> }
>
> @@ -133,9 +133,9 @@
> m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] + numPartitions;
> m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] + numPartitions * 2;
>
> - CHECKED_MALLOC(m_qtTempTUCoeffY, coeff_t, MAX_TS_SIZE * MAX_TS_SIZE * 3);
> - m_qtTempTUCoeffCb = m_qtTempTUCoeffY + MAX_TS_SIZE * MAX_TS_SIZE;
> - m_qtTempTUCoeffCr = m_qtTempTUCoeffY + MAX_TS_SIZE * MAX_TS_SIZE * 2;
> + CHECKED_MALLOC(m_qtTempTUCoeff[0], coeff_t, MAX_TS_SIZE * MAX_TS_SIZE * 3);
> + m_qtTempTUCoeff[1] = m_qtTempTUCoeff[0] + MAX_TS_SIZE * MAX_TS_SIZE;
> + m_qtTempTUCoeff[2] = m_qtTempTUCoeff[0] + MAX_TS_SIZE * MAX_TS_SIZE * 2;
>
> return m_qtTempTransformSkipYuv.create(g_maxCUSize, g_maxCUSize, cfg->param->internalCsp);
>
> @@ -268,19 +268,7 @@
> height = splitIntoSubTUs ? height >> 1 : height;
> uint32_t coeffOffset = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (cspx + cspy));
> uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> - coeff_t* coeff = 0;
> - switch (ttype)
> - {
> - case TEXT_LUMA: coeff = m_qtTempCoeffY[qtLayer];
> - break;
> - case TEXT_CHROMA_U: coeff = m_qtTempCoeffCb[qtLayer];
> - break;
> - case TEXT_CHROMA_V: coeff = m_qtTempCoeffCr[qtLayer];
> - break;
> - default: X265_CHECK(0, "invalid texture type\n");
> - }
> -
> - coeff += coeffOffset;
> + coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
>
> if (width == height)
> {
> @@ -393,14 +381,7 @@
> uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
> {
> m_entropyCoder->resetBits();
> - if (chromaId == TEXT_CHROMA_U)
> - {
> - xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_U, splitIntoSubTUs);
> - }
> - else if (chromaId == TEXT_CHROMA_V)
> - {
> - xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_V, splitIntoSubTUs);
> - }
> + xEncCoeffQT(cu, trDepth, absPartIdx, (TextType)chromaId, splitIntoSubTUs);
> return m_entropyCoder->getNumberOfWrittenBits();
> }
>
> @@ -424,7 +405,7 @@
> uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeff = m_qtTempCoeffY[qtLayer] + coeffOffsetY;
> + coeff_t* coeff = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
>
> int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
>
> @@ -512,20 +493,20 @@
> }
> }
>
> - TextType ttype = (chromaId == 1) ? TEXT_CHROMA_U : TEXT_CHROMA_V;
> + TextType ttype = (TextType)chromaId;
> uint32_t tuSize = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
> uint32_t stride = fencYuv->getCStride();
> - pixel* fenc = (chromaId == 1) ? fencYuv->getCbAddr(absPartIdx) : fencYuv->getCrAddr(absPartIdx);
> - pixel* pred = (chromaId == 1) ? predYuv->getCbAddr(absPartIdx) : predYuv->getCrAddr(absPartIdx);
> - int16_t* residual = (chromaId == 1) ? resiYuv->getCbAddr(absPartIdx) : resiYuv->getCrAddr(absPartIdx);
> + pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
> + pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
> + int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdx);
>
> uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> - coeff_t* coeff = (chromaId == 1 ? m_qtTempCoeffCb[qtlayer] : m_qtTempCoeffCr[qtlayer]) + coeffOffsetC;
> - int16_t* reconQt = (chromaId == 1) ? m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdx) : m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdx);
> + coeff_t* coeff = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> + int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
> uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> - pixel* reconIPred = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> + pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
> bool useTransformSkipChroma = !!cu->getTransformSkip(absPartIdx, ttype);
> int part = partitionFromSize(tuSize);
> @@ -942,12 +923,12 @@
> //===== copy transform coefficients =====
> uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> - coeff_t* coeffDestY = cu->getCoeffY() + coeffOffsetY;
> + coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffDestY = cu->getCoeffY() + coeffOffsetY;
> ::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
> //===== copy reconstruction =====
> - m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2);
> + m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2);
> }
> else
> {
> @@ -968,13 +949,13 @@
> //===== copy transform coefficients =====
> uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> - coeff_t* coeffDstY = m_qtTempTUCoeffY;
> + coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffDstY = m_qtTempTUCoeff[0];
>
> ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
> //===== copy reconstruction =====
> - m_qtTempShortYuv[qtlayer].copyPartToPartLuma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2);
> + m_qtTempShortYuv[qtlayer].copyPartToPartLuma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2);
> }
>
> void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
> @@ -986,8 +967,8 @@
> //===== copy transform coefficients =====
> uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffDstY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> - coeff_t* coeffSrcY = m_qtTempTUCoeffY;
> + coeff_t* coeffDstY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffSrcY = m_qtTempTUCoeff[0];
>
> ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
> @@ -1036,18 +1017,9 @@
> uint32_t numCoeffC = width * height;
> uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
>
> - if (chromaId == 1)
> - {
> - coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> - coeff_t* coeffDstU = m_qtTempTUCoeffCb;
> - ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> - }
> - if (chromaId == 2)
> - {
> - coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> - coeff_t* coeffDstV = m_qtTempTUCoeffCr;
> - ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
> - }
> + coeff_t* coeffSrc = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> + coeff_t* coeffDst = m_qtTempTUCoeff[chromaId];
> + ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
>
> //===== copy reconstruction =====
> uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
> @@ -1055,9 +1027,9 @@
> }
> }
>
> -void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
> +void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId)
> {
> - assert(chromaId == 1 || chromaId == 2);
> + X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
>
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t trMode = cu->getTransformIdx(absPartIdx);
> @@ -1067,12 +1039,15 @@
> uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> int chFmt = cu->getChromaFormat();
> -
> + const bool splitIntoSubTUs = (chFmt == CHROMA_422);
> +
> + uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
> bool bChromaSame = false;
> if (trSizeLog2 == 2 && !(chFmt == CHROMA_444))
> {
> X265_CHECK(trDepth > 0, "invalid trDepth\n");
> trDepth--;
> + trSizeCLog2++;
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
> bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
> bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
> @@ -1084,24 +1059,13 @@
> }
>
> //===== copy transform coefficients =====
> - uint32_t trWidthC = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
> - uint32_t trHeightC = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
> - trHeightC = splitIntoSubTUs ? trHeightC >> 1 : trHeightC;
> - uint32_t numCoeffC = trWidthC * trHeightC;
> + uint32_t trSizeC = 1 << trSizeCLog2;
> + uint32_t numCoeffC = 1 << trSizeCLog2 * 2;
> uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
>
> - if (chromaId == 1)
> - {
> - coeff_t* coeffDstU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> - coeff_t* coeffSrcU = m_qtTempTUCoeffCb;
> - ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> - }
> - if (chromaId == 2)
> - {
> - coeff_t* coeffDstV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> - coeff_t* coeffSrcV = m_qtTempTUCoeffCr;
> - ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
> - }
> + coeff_t* coeffDst = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> + coeff_t* coeffSrc = m_qtTempTUCoeff[chromaId];
> + ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
>
> //===== copy reconstruction =====
> uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
> @@ -1111,18 +1075,9 @@
> uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
>
> - if (chromaId == 1)
> - {
> - pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> - int16_t* reconQt = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdx);
> - primitives.blockcpy_ps(trWidthC, trHeightC, reconIPred, reconIPredStride, reconQt, reconQtStride);
> - }
> - if (chromaId == 2)
> - {
> - pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> - int16_t* reconQt = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdx);
> - primitives.blockcpy_ps(trWidthC, trHeightC, reconIPred, reconIPredStride, reconQt, reconQtStride);
> - }
> + pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> + int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
> + primitives.blockcpy_ps(trSizeC, trSizeC, reconIPred, reconIPredStride, reconQt, reconQtStride);
> }
> }
>
> @@ -1216,7 +1171,7 @@
> }
> }
>
> - for (int chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> TComTURecurse tuIterator;
> uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
> @@ -1225,7 +1180,7 @@
> do
> {
> uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> - pixel* pred = (chromaId == 1) ? predYuv->getCbAddr(absPartIdxC) : predYuv->getCrAddr(absPartIdxC);
> + pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdxC);
>
> //===== init availability pattern =====
> TComPattern::initAdiPatternChroma(cu, absPartIdxC, actualTrDepth, m_predBuf, chromaId);
> @@ -1298,7 +1253,7 @@
>
> if (bestModeId == firstCheckId)
> {
> - xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
> + xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId);
> cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
> @@ -1379,10 +1334,10 @@
> uint32_t numCoeffC = width * height;
> uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
>
> - coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> - coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> - coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
> - coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
> + coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> + coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> + coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
> + coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
> ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
>
> @@ -1433,7 +1388,7 @@
> const bool splitIntoSubTUs = (chFmt == CHROMA_422);
> int sizeIdx = g_convertToBit[tuSize];
>
> - for (int chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
> + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> TComTURecurse tuIterator;
> uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
> @@ -1442,20 +1397,21 @@
> do
> {
> uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> - cu->setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
> -
> - TextType ttype = (chromaId == 1) ? TEXT_CHROMA_U : TEXT_CHROMA_V;
> - pixel* fenc = (chromaId == 1) ? fencYuv->getCbAddr(absPartIdxC) : fencYuv->getCrAddr(absPartIdxC);
> - pixel* pred = (chromaId == 1) ? predYuv->getCbAddr(absPartIdxC) : predYuv->getCrAddr(absPartIdxC);
> - int16_t* residual = (chromaId == 1) ? resiYuv->getCbAddr(absPartIdxC) : resiYuv->getCrAddr(absPartIdxC);
> - pixel* recon = (chromaId == 1) ? reconYuv->getCbAddr(absPartIdxC) : reconYuv->getCrAddr(absPartIdxC);
> +
> + TextType ttype = (TextType)chromaId;
> + pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdxC);
> + pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdxC);
> + int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdxC);
> + pixel* recon = reconYuv->getChromaAddr(chromaId, absPartIdxC);
> uint32_t coeffOffsetC = absPartIdxC << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> - coeff_t* coeff = (chromaId == 1 ? cu->getCoeffCb() : cu->getCoeffCr()) + coeffOffsetC;
> + coeff_t* coeff = cu->getCoeff(ttype) + coeffOffsetC;
> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
> - pixel* reconIPred = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> + pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
> +
> //bool useTransformSkipChroma = cu->getTransformSkip(absPartIdxC, ttype);
> const bool useTransformSkipChroma = false;
> + cu->setTransformSkipPartRange(0, ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
>
> uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
> //===== update chroma mode =====
> @@ -1576,7 +1532,7 @@
>
> //===== determine set of modes to be tested (using prediction signal only) =====
> const int numModesAvailable = 35; //total number of Intra modes
> - pixel* fenc = fencYuv->getLumaAddr(pu, tuSize);
> + pixel* fenc = fencYuv->getLumaAddr(partOffset);
> uint32_t stride = predYuv->getStride();
> uint32_t rdModeList[FAST_UDI_MAX_RDMODE_NUM];
> int numModesForFullRD = intraModeNumFast[sizeIdx];
> @@ -1858,11 +1814,11 @@
> }
> chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
> uint64_t cost = 0;
> - for (int chromaId = 0; chromaId < 2; chromaId++)
> + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> - pixel* fenc = (chromaId > 0 ? fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
> - pixel* pred = (chromaId > 0 ? predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
> - pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId + 1, tuSize, m_predBuf);
> + pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
> + pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
> + pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
>
> //===== get prediction signal =====
> predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, scaleTuSize, chFmt);
> @@ -2056,14 +2012,7 @@
> else
> {
> reconStride = cu->getPic()->getPicYuvRec()->getCStride();
> - if (eText == TEXT_CHROMA_U)
> - {
> - reconPic = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + absPartIdx);
> - }
> - else
> - {
> - reconPic = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + absPartIdx);
> - }
> + reconPic = cu->getPic()->getPicYuvRec()->getChromaAddr(eText, cu->getAddr(), cu->getZorderIdxInCU() + absPartIdx);
> shiftPcm = X265_DEPTH - cu->getSlice()->getSPS()->getPCMBitDepthChroma();
> }
>
> @@ -2877,7 +2826,7 @@
>
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> - coeff_t *coeffCurY = cu->getCoeffY() + coeffOffsetY;
> + coeff_t *coeffCurY = cu->getCoeffY() + coeffOffsetY;
> coeff_t *coeffCurU = cu->getCoeffCb() + coeffOffsetC;
> coeff_t *coeffCurV = cu->getCoeffCr() + coeffOffsetC;
>
> @@ -3081,9 +3030,9 @@
> const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> - coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> - coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> - coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> + coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> + coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
>
> cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
> bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip() && !cu->getCUTransquantBypass(0);
> @@ -3677,7 +3626,7 @@
> bestCBF[TEXT_LUMA] = cu->getCbf(absPartIdx, TEXT_LUMA, trMode);
> if (bCodeChroma)
> {
> - for (uint32_t chromId = TEXT_CHROMA_U; chromId < MAX_NUM_COMPONENT; chromId++)
> + for (uint32_t chromId = TEXT_CHROMA_U; chromId <= TEXT_CHROMA_V; chromId++)
> {
> bestCBF[chromId] = cu->getCbf(absPartIdx, (TextType)chromId, trMode);
> if (splitIntoSubTUs)
> @@ -3707,7 +3656,7 @@
> bestCBF[TEXT_LUMA] = cu->getCbf(absPartIdx, TEXT_LUMA, trMode);
> if (bCodeChroma)
> {
> - for (uint32_t chromId = TEXT_CHROMA_U; chromId < MAX_NUM_COMPONENT; chromId++)
> + for (uint32_t chromId = TEXT_CHROMA_U; chromId <= TEXT_CHROMA_V; chromId++)
> {
> bestCBF[chromId] = cu->getCbf(absPartIdx, (TextType)chromId, trMode);
> if (splitIntoSubTUs)
> @@ -3796,7 +3745,7 @@
> const uint32_t numberOfSections = splitIntoSubTUs ? 2 : 1;
> uint32_t partIdxesPerSubTU = absPartIdxStep >> (splitIntoSubTUs ? 1 : 0);
>
> - for (uint32_t chromId = TEXT_CHROMA_U; chromId < MAX_NUM_COMPONENT; chromId++)
> + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> for (uint32_t subTUIndex = 0; subTUIndex < numberOfSections; subTUIndex++)
> {
> @@ -3804,12 +3753,12 @@
>
> if (splitIntoSubTUs)
> {
> - const uint8_t combinedCBF = (bestsubTUCBF[chromId][subTUIndex] << subTUDepth) | (bestCBF[chromId] << trMode);
> - cu->setCbfPartRange(combinedCBF, (TextType)chromId, subTUPartIdx, partIdxesPerSubTU);
> + const uint8_t combinedCBF = (bestsubTUCBF[chromaId][subTUIndex] << subTUDepth) | (bestCBF[chromaId] << trMode);
> + cu->setCbfPartRange(combinedCBF, (TextType)chromaId, subTUPartIdx, partIdxesPerSubTU);
> }
> else
> {
> - cu->setCbfPartRange((bestCBF[chromId] << trMode), (TextType)chromId, subTUPartIdx, partIdxesPerSubTU);
> + cu->setCbfPartRange((bestCBF[chromaId] << trMode), (TextType)chromaId, subTUPartIdx, partIdxesPerSubTU);
> }
> }
> }
> @@ -3872,7 +3821,7 @@
> //Luma
> const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> + coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
>
> //Chroma
> bool bCodeChroma = true;
> @@ -3898,8 +3847,8 @@
> if (bCodeChroma)
> {
> uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> - coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> - coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> + coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> + coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> uint32_t trSizeC = 1 << trSizeCLog2;
>
> if (!splitIntoSubTUs)
> @@ -3976,7 +3925,7 @@
> if (bSpatial)
> {
> uint32_t trSize = 1 << trSizeLog2;
> - m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize, trSize);
> + m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize);
>
> if (bCodeChroma)
> {
> @@ -3987,18 +3936,18 @@
> {
> uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
> - coeff_t* coeffDstY = cu->getCoeffY() + coeffOffsetY;
> + coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffDstY = cu->getCoeffY() + coeffOffsetY;
> ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
> if (bCodeChroma)
> {
> uint32_t numCoeffC = 1 << (trSizeCLog2 * 2 + (chFmt == CHROMA_422));
> uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
>
> - coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
> - coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
> - coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
> - coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
> + coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> + coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> + coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
> + coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
> ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
> }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h Tue May 27 23:22:21 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.h Wed May 28 16:48:47 2014 +0900
> @@ -115,15 +115,11 @@
>
> ShortYuv* m_qtTempShortYuv;
>
> - coeff_t** m_qtTempCoeffY;
> - coeff_t** m_qtTempCoeffCb;
> - coeff_t** m_qtTempCoeffCr;
> + coeff_t** m_qtTempCoeff[3];
> uint8_t* m_qtTempTrIdx;
> uint8_t* m_qtTempCbf[3];
>
> - coeff_t* m_qtTempTUCoeffY;
> - coeff_t* m_qtTempTUCoeffCb;
> - coeff_t* m_qtTempTUCoeffCr;
> + coeff_t* m_qtTempTUCoeff[3];
> uint8_t* m_qtTempTransformSkipFlag[3];
> TComYuv m_qtTempTransformSkipYuv;
>
> @@ -230,7 +226,7 @@
> void xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> void xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
> - void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
> + void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId);
>
> // --------------------------------------------------------------------------------------------
> // Inter search (AMP)
> diff -r 807ee7f1597b -r 8e2f16c13099 source/common/shortyuv.cpp
> --- a/source/common/shortyuv.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/common/shortyuv.cpp Wed May 28 16:48:47 2014 +0900
> @@ -35,13 +35,15 @@
>
> ShortYuv::ShortYuv()
> {
> - m_bufY = NULL;
> - m_bufCb = NULL;
> - m_bufCr = NULL;
> + m_buf[0] = NULL;
> + m_buf[1] = NULL;
> + m_buf[2] = NULL;
> }
>
> ShortYuv::~ShortYuv()
> -{}
> +{
> + destroy();
> +}
>
> bool ShortYuv::create(uint32_t width, uint32_t height, int csp)
> {
> @@ -56,9 +58,12 @@
> m_cwidth = width >> m_hChromaShift;
> m_cheight = height >> m_vChromaShift;
>
> - CHECKED_MALLOC(m_bufY, int16_t, width * height);
> - CHECKED_MALLOC(m_bufCb, int16_t, m_cwidth * m_cheight);
> - CHECKED_MALLOC(m_bufCr, int16_t, m_cwidth * m_cheight);
> + uint32_t sizeL = width * height;
> + uint32_t sizeC = m_cwidth * m_cheight;
> + X265_CHECK((sizeC & 15) == 0, "invalid size");
> + CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> + m_buf[1] = m_buf[0] + sizeL;
> + m_buf[2] = m_buf[0] + sizeL + sizeC;
> return true;
>
> fail:
> @@ -67,19 +72,17 @@
>
> void ShortYuv::destroy()
> {
> - X265_FREE(m_bufY);
> - m_bufY = NULL;
> - X265_FREE(m_bufCb);
> - m_bufCb = NULL;
> - X265_FREE(m_bufCr);
> - m_bufCr = NULL;
> + X265_FREE(m_buf[0]);
> + m_buf[0] = NULL;
> + m_buf[1] = NULL;
> + m_buf[2] = NULL;
> }
>
> void ShortYuv::clear()
> {
> - ::memset(m_bufY, 0, (m_width * m_height) * sizeof(int16_t));
> - ::memset(m_bufCb, 0, (m_cwidth * m_cheight) * sizeof(int16_t));
> - ::memset(m_bufCr, 0, (m_cwidth * m_cheight) * sizeof(int16_t));
> + ::memset(m_buf[0], 0, (m_width * m_height) * sizeof(int16_t));
> + ::memset(m_buf[1], 0, (m_cwidth * m_cheight) * sizeof(int16_t));
> + ::memset(m_buf[2], 0, (m_cwidth * m_cheight) * sizeof(int16_t));
> }
>
> void ShortYuv::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partSize)
> @@ -116,6 +119,24 @@
> primitives.pixeladd_ss(cpartSize, cpartSize, getCrAddr(), m_cwidth, srcV0, srcV1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
> }
>
> +void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize)
> +{
> + int part = partitionFromSize(partSize);
> + int16_t* src = getLumaAddr(partIdx);
> + int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
> +
> + primitives.luma_copy_ss[part](dst, dstPicYuv->m_width, src, m_width);
> +}
> +
> +void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize)
> +{
> + int part = partitionFromSize(partSize);
> + int16_t* src = getLumaAddr(partIdx);
> + pixel* dst = dstPicYuv->getLumaAddr(partIdx);
> +
> + primitives.luma_copy_sp[part](dst, dstPicYuv->getStride(), src, m_width);
> +}
> +
> void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
> {
> int part = partitionFromSizes(width, height);
> @@ -181,35 +202,15 @@
>
> void ShortYuv::copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
> {
> + X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
> +
> int part = partitionFromSize(lumaSize);
>
> - if (chromaId == 0)
> - {
> - int16_t* srcU = getCbAddr(partIdx);
> - int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> - uint32_t srcStride = m_cwidth;
> - uint32_t dstStride = dstPicYuv->m_cwidth;
> - primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
> - }
> - else if (chromaId == 1)
> - {
> - int16_t* srcV = getCrAddr(partIdx);
> - int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> - uint32_t srcStride = m_cwidth;
> - uint32_t dstStride = dstPicYuv->m_cwidth;
> - primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
> - }
> - else
> - {
> - int16_t* srcU = getCbAddr(partIdx);
> - int16_t* srcV = getCrAddr(partIdx);
> - int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
> - int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
> - uint32_t srcStride = m_cwidth;
> - uint32_t dstStride = dstPicYuv->m_cwidth;
> - primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, srcStride);
> - primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, srcStride);
> - }
> + int16_t* src = getChromaAddr(chromaId, partIdx);
> + int16_t* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
> + uint32_t srcStride = m_cwidth;
> + uint32_t dstStride = dstPicYuv->m_cwidth;
> + primitives.chroma[m_csp].copy_ss[part](dst, dstStride, src, srcStride);
> }
>
> void ShortYuv::copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
> @@ -218,20 +219,9 @@
>
> int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
>
> - if (chromaId == 1)
> - {
> - int16_t* srcU = getCbAddr(partIdx);
> - pixel* dstU = dstPicYuv->getCbAddr(partIdx);
> - uint32_t srcStride = m_cwidth;
> - uint32_t dstStride = dstPicYuv->getCStride();
> - primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
> - }
> - else
> - {
> - int16_t* srcV = getCrAddr(partIdx);
> - pixel* dstV = dstPicYuv->getCrAddr(partIdx);
> - uint32_t srcStride = m_cwidth;
> - uint32_t dstStride = dstPicYuv->getCStride();
> - primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
> - }
> + int16_t* src = getChromaAddr(chromaId, partIdx);
> + pixel* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
> + uint32_t srcStride = m_cwidth;
> + uint32_t dstStride = dstPicYuv->getCStride();
> + primitives.chroma[m_csp].copy_sp[part](dst, dstStride, src, srcStride);
> }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/common/shortyuv.h
> --- a/source/common/shortyuv.h Tue May 27 23:22:21 2014 +0530
> +++ b/source/common/shortyuv.h Wed May 28 16:48:47 2014 +0900
> @@ -37,9 +37,7 @@
> {
> public:
>
> - int16_t* m_bufY;
> - int16_t* m_bufCb;
> - int16_t* m_bufCr;
> + int16_t* m_buf[3];
>
> uint32_t m_width;
> uint32_t m_height;
> @@ -51,7 +49,7 @@
> int m_vChromaShift;
>
> ShortYuv();
> - virtual ~ShortYuv();
> + ~ShortYuv();
>
> int getChromaAddrOffset(uint32_t partUnitIdx, uint32_t width)
> {
> @@ -69,56 +67,37 @@
> return blkX + blkY * width;
> }
>
> - static int getAddrOffset(uint32_t idx, uint32_t size, uint32_t width)
> - {
> - int blkX = (idx * size) & (width - 1);
> - int blkY = (idx * size) & ~(width - 1);
> -
> - return blkX + blkY * size;
> - }
> -
> - int getChromaAddrOffset(uint32_t unitIdx, uint32_t size, uint32_t width)
> - {
> - int blkX = (unitIdx * size) & (width - 1);
> - int blkY = (unitIdx * size) & ~(width - 1);
> -
> - if (m_csp == CHROMA_422) blkY <<= 1;
> -
> - return blkX + blkY * size;
> - }
> -
> bool create(uint32_t width, uint32_t height, int csp);
>
> void destroy();
> void clear();
>
> - int16_t* getLumaAddr() { return m_bufY; }
> + int16_t* getLumaAddr() { return m_buf[0]; }
>
> - int16_t* getCbAddr() { return m_bufCb; }
> + int16_t* getCbAddr() { return m_buf[1]; }
>
> - int16_t* getCrAddr() { return m_bufCr; }
> + int16_t* getCrAddr() { return m_buf[2]; }
> +
> + int16_t* getChromaAddr(uint32_t chromaId) { return m_buf[chromaId]; }
>
> // Access starting position of YUV partition unit buffer
> - int16_t* getLumaAddr(uint32_t partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }
> + int16_t* getLumaAddr(uint32_t partUnitIdx) { return m_buf[0] + getAddrOffset(partUnitIdx, m_width); }
>
> - int16_t* getCbAddr(uint32_t partUnitIdx) { return m_bufCb + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> + int16_t* getCbAddr(uint32_t partUnitIdx) { return m_buf[1] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> - int16_t* getCrAddr(uint32_t partUnitIdx) { return m_bufCr + getChromaAddrOffset(partUnitIdx, m_cwidth); }
> + int16_t* getCrAddr(uint32_t partUnitIdx) { return m_buf[2] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> - // Access starting position of YUV transform unit buffer
> - int16_t* getLumaAddr(uint32_t partIdx, uint32_t size) { return m_bufY + getAddrOffset(partIdx, size, m_width); }
> -
> - int16_t* getCbAddr(uint32_t partIdx, uint32_t size) { return m_bufCb + getChromaAddrOffset(partIdx, size, m_cwidth); }
> -
> - int16_t* getCrAddr(uint32_t partIdx, uint32_t size) { return m_bufCr + getChromaAddrOffset(partIdx, size, m_cwidth); }
> + int16_t* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
>
> void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partSize);
> void addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize);
>
> + void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize);
> void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
> void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
> void copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
>
> + void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize);
> void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
> void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
> void copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs);
> @@ -128,6 +107,7 @@
> // -------------------------------------------------------------------------------------------------------------------
>
> int getHorzChromaShift() { return m_hChromaShift; }
> +
> int getVertChromaShift() { return m_vChromaShift; }
> };
> }
> diff -r 807ee7f1597b -r 8e2f16c13099 source/encoder/reference.cpp
> --- a/source/encoder/reference.cpp Tue May 27 23:22:21 2014 +0530
> +++ b/source/encoder/reference.cpp Wed May 28 16:48:47 2014 +0900
> @@ -43,7 +43,7 @@
> intptr_t startpad = pic->m_lumaMarginY * lumaStride + pic->m_lumaMarginX;
>
> /* directly reference the pre-extended integer pel plane */
> - fpelPlane = pic->m_picBufY + startpad;
> + fpelPlane = pic->m_picBuf[0] + startpad;
> isWeighted = false;
>
> if (w)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list