[x265] [PATCH REBASE] add support for Monochrome color space (X265_CSP_I400)
Deepthi Nandakumar
deepthi at multicorewareinc.com
Thu Oct 8 06:33:56 CEST 2015
Thanks, Steve. I have this monochrome patch from Fabrice, an SEI update
from Luca of libav, and numerous asm patches from the dev team waiting to
be pushed in.
Once the scenecut bug is solved, I plan to tag 1.8 - and push these in. 1.9
should probably be tagged in another 3-4 weeks, since there are many
features which will not make it into 1.8.
On Wed, Oct 7, 2015 at 10:20 PM, Steve Borho <steve at borho.org> wrote:
> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1442853618 18000
> # Mon Sep 21 11:40:18 2015 -0500
> # Node ID 5602b4bc1fec175e7c5bf14ef18978a50e3bc07f
> # Parent f8b8ebdc54578e6735216d8b9abce5ba80c05bd8
> add support for Monochrome color space (X265_CSP_I400)
>
> This patch was extracted from changes made by Fabrice Bellard for BPG
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/deblock.cpp
> --- a/source/common/deblock.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/deblock.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -109,7 +109,7 @@
> for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
> {
> edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
> - if (!((e0 + e) & chromaMask))
> + if (cu->m_chromaFormat != X265_CSP_I400 && !((e0 + e) &
> chromaMask))
> edgeFilterChroma(cu, absPartIdx, depth, dir, e,
> blockStrength);
> }
> }
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/frame.cpp
> --- a/source/common/frame.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/frame.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -73,14 +73,20 @@
> * end of the picture accessing uninitialized pixels */
> int maxHeight = sps.numCuInHeight * g_maxCUSize;
> memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) *
> m_reconPic->m_stride * maxHeight);
> - memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> - memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> + if (m_reconPic->m_picCsp != X265_CSP_I400)
> + {
> + memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> + memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> + }
>
> /* use pre-calculated cu/pu offsets cached in the SPS structure */
> - m_reconPic->m_cuOffsetC = sps.cuOffsetC;
> m_reconPic->m_cuOffsetY = sps.cuOffsetY;
> - m_reconPic->m_buOffsetC = sps.buOffsetC;
> m_reconPic->m_buOffsetY = sps.buOffsetY;
> + if (m_reconPic->m_picCsp != X265_CSP_I400)
> + {
> + m_reconPic->m_cuOffsetC = sps.cuOffsetC;
> + m_reconPic->m_buOffsetC = sps.buOffsetC;
> + }
> }
> return ok;
> }
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/param.cpp
> --- a/source/common/param.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/param.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -1069,7 +1069,7 @@
>
> CHECK(param->sourceWidth < (int)param->maxCUSize ||
> param->sourceHeight < (int)param->maxCUSize,
> "Picture size must be at least one CTU");
> - CHECK(param->internalCsp < X265_CSP_I420 || X265_CSP_I444 <
> param->internalCsp,
> + CHECK(param->internalCsp < X265_CSP_I400 || X265_CSP_I444 <
> param->internalCsp,
> "Color space must be i420, i422, or i444");
> CHECK(param->sourceWidth & !!CHROMA_H_SHIFT(param->internalCsp),
> "Picture width must be an integer multiple of the specified
> chroma subsampling");
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/picyuv.cpp
> --- a/source/common/picyuv.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/picyuv.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -70,12 +70,16 @@
> int maxHeight = numCuInHeight * g_maxCUSize;
>
> CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight +
> (m_lumaMarginY * 2)));
> - CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> - CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> + m_picOrg[0] = m_picBuf[0] + m_lumaMarginY * m_stride +
> m_lumaMarginX;
>
> - m_picOrg[0] = m_picBuf[0] + m_lumaMarginY * m_stride +
> m_lumaMarginX;
> - m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> - m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> + if (m_picCsp != X265_CSP_I400)
> + {
> + CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> + CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> +
> + m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> + m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> + }
>
> return true;
>
> @@ -90,24 +94,32 @@
> {
> uint32_t numPartitions = 1 << (g_unitSizeDepth * 2);
> CHECKED_MALLOC(m_cuOffsetY, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
> - CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
> + if (m_picCsp != X265_CSP_I400)
> + {
> + CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
> + }
> for (uint32_t cuRow = 0; cuRow < sps.numCuInHeight; cuRow++)
> {
> for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
> {
> m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride *
> cuRow * g_maxCUSize + cuCol * g_maxCUSize;
> - m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC *
> cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >>
> m_hChromaShift);
> + if (m_picCsp != X265_CSP_I400)
> + m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC
> * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >>
> m_hChromaShift);
> }
> }
>
> CHECKED_MALLOC(m_buOffsetY, intptr_t, (size_t)numPartitions);
> - CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
> + if (m_picCsp != X265_CSP_I400)
> + {
> + CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
> + }
> for (uint32_t idx = 0; idx < numPartitions; ++idx)
> {
> intptr_t x = g_zscanToPelX[idx];
> intptr_t y = g_zscanToPelY[idx];
> m_buOffsetY[idx] = m_stride * y + x;
> - m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >>
> m_hChromaShift);
> + if (m_picCsp != X265_CSP_I400)
> + m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >>
> m_hChromaShift);
> }
>
> return true;
> @@ -168,8 +180,11 @@
> int shift = (X265_DEPTH - 8);
>
> primitives.planecopy_cp(yChar, pic.stride[0] /
> sizeof(*yChar), yPixel, m_stride, width, height, shift);
> - primitives.planecopy_cp(uChar, pic.stride[1] /
> sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> - primitives.planecopy_cp(vChar, pic.stride[2] /
> sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> + if (m_picCsp != X265_CSP_I400)
> + {
> + primitives.planecopy_cp(uChar, pic.stride[1] /
> sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> + primitives.planecopy_cp(vChar, pic.stride[2] /
> sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> + }
> }
> #else /* Case for (X265_DEPTH == 8) */
> // TODO: Does we need this path? may merge into above in future
> @@ -190,15 +205,18 @@
> yChar += pic.stride[0] / sizeof(*yChar);
> }
>
> - for (int r = 0; r < height >> m_vChromaShift; r++)
> + if (m_picCsp != X265_CSP_I400)
> {
> - memcpy(uPixel, uChar, (width >> m_hChromaShift) *
> sizeof(pixel));
> - memcpy(vPixel, vChar, (width >> m_hChromaShift) *
> sizeof(pixel));
> + for (int r = 0; r < height >> m_vChromaShift; r++)
> + {
> + memcpy(uPixel, uChar, (width >> m_hChromaShift) *
> sizeof(pixel));
> + memcpy(vPixel, vChar, (width >> m_hChromaShift) *
> sizeof(pixel));
>
> - uPixel += m_strideC;
> - vPixel += m_strideC;
> - uChar += pic.stride[1] / sizeof(*uChar);
> - vChar += pic.stride[2] / sizeof(*vChar);
> + uPixel += m_strideC;
> + vPixel += m_strideC;
> + uChar += pic.stride[1] / sizeof(*uChar);
> + vChar += pic.stride[2] / sizeof(*vChar);
> + }
> }
> }
> #endif /* (X265_DEPTH > 8) */
> @@ -220,15 +238,21 @@
> {
> /* shift right and mask pixels to final size */
> primitives.planecopy_sp(yShort, pic.stride[0] /
> sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
> - primitives.planecopy_sp(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> - primitives.planecopy_sp(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> + if (m_picCsp != X265_CSP_I400)
> + {
> + primitives.planecopy_sp(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> + primitives.planecopy_sp(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> + }
> }
> else /* Case for (pic.bitDepth <= X265_DEPTH) */
> {
> /* shift left and mask pixels to final size */
> primitives.planecopy_sp_shl(yShort, pic.stride[0] /
> sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
> - primitives.planecopy_sp_shl(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> - primitives.planecopy_sp_shl(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> + if (m_picCsp != X265_CSP_I400)
> + {
> + primitives.planecopy_sp_shl(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> + primitives.planecopy_sp_shl(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> + }
> }
> }
>
> @@ -250,30 +274,36 @@
> Y += m_stride;
> }
>
> - for (int r = 0; r < height >> m_vChromaShift; r++)
> + if (m_picCsp != X265_CSP_I400)
> {
> - for (int x = 0; x < padx >> m_hChromaShift; x++)
> + for (int r = 0; r < height >> m_vChromaShift; r++)
> {
> - U[(width >> m_hChromaShift) + x] = U[(width >>
> m_hChromaShift) - 1];
> - V[(width >> m_hChromaShift) + x] = V[(width >>
> m_hChromaShift) - 1];
> + for (int x = 0; x < padx >> m_hChromaShift; x++)
> + {
> + U[(width >> m_hChromaShift) + x] = U[(width >>
> m_hChromaShift) - 1];
> + V[(width >> m_hChromaShift) + x] = V[(width >>
> m_hChromaShift) - 1];
> + }
> +
> + U += m_strideC;
> + V += m_strideC;
> }
> -
> - U += m_strideC;
> - V += m_strideC;
> }
>
> /* extend the bottom if height was not multiple of the minimum CU
> size */
> Y = m_picOrg[0] + (height - 1) * m_stride;
> - U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
> - V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
>
> for (int i = 1; i <= pady; i++)
> memcpy(Y + i * m_stride, Y, (width + padx) * sizeof(pixel));
>
> - for (int j = 1; j <= pady >> m_vChromaShift; j++)
> + if (m_picCsp != X265_CSP_I400)
> {
> - memcpy(U + j * m_strideC, U, ((width + padx) >> m_hChromaShift) *
> sizeof(pixel));
> - memcpy(V + j * m_strideC, V, ((width + padx) >> m_hChromaShift) *
> sizeof(pixel));
> + U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
> + V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
> + for (int j = 1; j <= pady >> m_vChromaShift; j++)
> + {
> + memcpy(U + j * m_strideC, U, ((width + padx) >>
> m_hChromaShift) * sizeof(pixel));
> + memcpy(V + j * m_strideC, V, ((width + padx) >>
> m_hChromaShift) * sizeof(pixel));
> + }
> }
> }
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/predict.cpp
> --- a/source/common/predict.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/predict.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -85,6 +85,14 @@
> int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx];
> int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx];
>
> + /* XXX: disable chroma at a higher level ? */
> + if (cu.m_chromaFormat == X265_CSP_I400)
> + {
> + bChroma = false;
> + if (!bLuma)
> + return;
> + }
> +
> if (cu.m_slice->isInterP())
> {
> /* P Slice */
> @@ -99,7 +107,8 @@
>
> if (cu.m_slice->m_pps->bUseWeightPred && wp0->bPresentFlag)
> {
> - for (int plane = 0; plane < 3; plane++)
> + int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
> + for (int plane = 0; plane < numPlanes; plane++)
> {
> wv0[plane].w = wp0[plane].inputWeight;
> wv0[plane].offset = wp0[plane].inputOffset * (1 <<
> (X265_DEPTH - 8));
> @@ -136,13 +145,14 @@
>
> if (cu.m_slice->m_pps->bUseWeightedBiPred)
> {
> + int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
> +
> pwp0 = refIdx0 >= 0 ?
> cu.m_slice->m_weightPredTable[0][refIdx0] : NULL;
> pwp1 = refIdx1 >= 0 ?
> cu.m_slice->m_weightPredTable[1][refIdx1] : NULL;
> -
> if (pwp0 && pwp1 && (pwp0->bPresentFlag ||
> pwp1->bPresentFlag))
> {
> /* biprediction weighting */
> - for (int plane = 0; plane < 3; plane++)
> + for (int plane = 0; plane < numPlanes; plane++)
> {
> wv0[plane].w = pwp0[plane].inputWeight;
> wv0[plane].o = pwp0[plane].inputOffset * (1 <<
> (X265_DEPTH - 8));
> @@ -159,7 +169,7 @@
> {
> /* uniprediction weighting, always outputs to wv0 */
> const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1;
> - for (int plane = 0; plane < 3; plane++)
> + for (int plane = 0; plane < numPlanes; plane++)
> {
> wv0[plane].w = pwp[plane].inputWeight;
> wv0[plane].offset = pwp[plane].inputOffset * (1 <<
> (X265_DEPTH - 8));
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/shortyuv.cpp
> --- a/source/common/shortyuv.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/shortyuv.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -40,19 +40,26 @@
> bool ShortYuv::create(uint32_t size, int csp)
> {
> m_csp = csp;
> - m_hChromaShift = CHROMA_H_SHIFT(csp);
> - m_vChromaShift = CHROMA_V_SHIFT(csp);
> -
> m_size = size;
> - m_csize = size >> m_hChromaShift;
>
> size_t sizeL = size * size;
> - size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> - X265_CHECK((sizeC & 15) == 0, "invalid size");
> -
> - CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> - m_buf[1] = m_buf[0] + sizeL;
> - m_buf[2] = m_buf[0] + sizeL + sizeC;
> + if (m_csp != X265_CSP_I400)
> + {
> + m_hChromaShift = CHROMA_H_SHIFT(csp);
> + m_vChromaShift = CHROMA_V_SHIFT(csp);
> + m_csize = size >> m_hChromaShift;
> + size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> + X265_CHECK((sizeC & 15) == 0, "invalid size");
> + CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> + m_buf[1] = m_buf[0] + sizeL;
> + m_buf[2] = m_buf[0] + sizeL + sizeC;
> + }
> + else
> + {
> + m_csize = 0;
> + CHECKED_MALLOC(m_buf[0], int16_t, sizeL);
> + }
> +
> return true;
>
> fail:
> @@ -67,16 +74,22 @@
> void ShortYuv::clear()
> {
> memset(m_buf[0], 0, (m_size * m_size) * sizeof(int16_t));
> - memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
> - memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
> + if (m_csp != X265_CSP_I400)
> + {
> + memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
> + memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
> + }
> }
>
> void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t
> log2Size)
> {
> const int sizeIdx = log2Size - 2;
> primitives.cu[sizeIdx].sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0],
> srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> - primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize,
> srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> - primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize,
> srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> + if (m_csp != X265_CSP_I400)
> + {
> + primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize,
> srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> + primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize,
> srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> + }
> }
>
> void ShortYuv::copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx,
> uint32_t log2Size) const
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/yuv.cpp
> --- a/source/common/yuv.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/yuv.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -84,10 +84,13 @@
> pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
> primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0],
> m_size);
>
> - pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> - pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC,
> m_buf[1], m_csize);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC,
> m_buf[2], m_csize);
> + if (m_csp != X265_CSP_I400)
> + {
> + pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> + pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
> + primitives.chroma[m_csp].cu[m_part].copy_pp(dstU,
> dstPic.m_strideC, m_buf[1], m_csize);
> + primitives.chroma[m_csp].cu[m_part].copy_pp(dstV,
> dstPic.m_strideC, m_buf[2], m_csize);
> + }
> }
>
> void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t
> absPartIdx)
> @@ -95,10 +98,13 @@
> const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
> primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY,
> srcPic.m_stride);
>
> - const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> - const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU,
> srcPic.m_strideC);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV,
> srcPic.m_strideC);
> + if (m_csp != X265_CSP_I400)
> + {
> + const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> + const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> + primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize,
> srcU, srcPic.m_strideC);
> + primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize,
> srcV, srcPic.m_strideC);
> + }
> }
>
> void Yuv::copyFromYuv(const Yuv& srcYuv)
> @@ -106,8 +112,11 @@
> X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
>
> primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0],
> srcYuv.m_size);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize,
> srcYuv.m_buf[1], srcYuv.m_csize);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize,
> srcYuv.m_buf[2], srcYuv.m_csize);
> + if (m_csp != X265_CSP_I400)
> + {
> + primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize,
> srcYuv.m_buf[1], srcYuv.m_csize);
> + primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize,
> srcYuv.m_buf[2], srcYuv.m_csize);
> + }
> }
>
> /* This version is intended for use by ME, which required FENC_STRIDE for
> luma fenc pixels */
> @@ -132,10 +141,13 @@
> pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
> primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
>
> - pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> - pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize,
> m_buf[1], m_csize);
> - primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize,
> m_buf[2], m_csize);
> + if (m_csp != X265_CSP_I400)
> + {
> + pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> + pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> + primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize,
> m_buf[1], m_csize);
> + primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize,
> m_buf[2], m_csize);
> + }
> }
>
> void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
> @@ -144,19 +156,25 @@
> pixel* dstY = dstYuv.m_buf[0];
> primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY,
> m_size);
>
> - pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> - pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
> - pixel* dstU = dstYuv.m_buf[1];
> - pixel* dstV = dstYuv.m_buf[2];
> - primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU,
> dstYuv.m_csize, srcU, m_csize);
> - primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV,
> dstYuv.m_csize, srcV, m_csize);
> + if (m_csp != X265_CSP_I400)
> + {
> + pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> + pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
> + pixel* dstU = dstYuv.m_buf[1];
> + pixel* dstV = dstYuv.m_buf[2];
> + primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU,
> dstYuv.m_csize, srcU, m_csize);
> + primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV,
> dstYuv.m_csize, srcV, m_csize);
> + }
> }
>
> void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t
> log2SizeL)
> {
> primitives.cu[log2SizeL - 2].add_ps(m_buf[0], m_size,
> srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> - primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize,
> srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> - primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize,
> srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> + if (m_csp != X265_CSP_I400)
> + {
> + primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1],
> m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize,
> srcYuv1.m_csize);
> + primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2],
> m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize,
> srcYuv1.m_csize);
> + }
> }
>
> void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1,
> uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool
> bChroma)
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/entropy.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -430,7 +430,8 @@
> if (slice.m_sps->bUseSAO)
> {
> WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
> - WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
> + if (slice.m_sps->chromaFormatIdc != X265_CSP_I400)
> + WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
> }
>
> // check if numRefIdx match the defaults (1, hard-coded in PPS). If
> not, override
> @@ -723,19 +724,23 @@
> uint32_t hChromaShift = cu.m_hChromaShift;
> uint32_t vChromaShift = cu.m_vChromaShift;
> bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
> - if (!curDepth || !bSmallChroma)
> +
> + if (cu.m_chromaFormat != X265_CSP_I400)
> {
> - if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth -
> 1))
> - codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth,
> !subdiv);
> - if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth -
> 1))
> - codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth,
> !subdiv);
> + if (!curDepth || !bSmallChroma)
> + {
> + if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U,
> curDepth - 1))
> + codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth,
> !subdiv);
> + if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V,
> curDepth - 1))
> + codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth,
> !subdiv);
> + }
> + else
> + {
> + X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size
> match failure\n");
> + X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size
> match failure\n");
> + }
> }
> - else
> - {
> - X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size
> match failure\n");
> - X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size
> match failure\n");
> - }
> -
> +
> if (subdiv)
> {
> --log2CurSize;
> @@ -782,6 +787,9 @@
> return;
> }
>
> + if (cu.m_chromaFormat == X265_CSP_I400)
> + return;
> +
> if (bSmallChroma)
> {
> if ((absPartIdx & 3) != 3)
> @@ -1011,7 +1019,7 @@
> void Entropy::codePredWeightTable(const Slice& slice)
> {
> const WeightParam *wp;
> - bool bChroma = true; // 4:0:0 not yet supported
> + bool bChroma = (slice.m_sps->chromaFormatIdc !=
> X265_CSP_I400);
> bool bDenomCoded = false;
> int numRefDirs = slice.m_sliceType == B_SLICE ? 2 : 1;
> uint32_t totalSignalledWeightFlags = 0;
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/framefilter.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -167,25 +167,31 @@
>
> // Border extend Left and Right
> primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr),
> reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
> - primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> - primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> + if (reconPic->m_picCsp != X265_CSP_I400)
> + {
> + primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> + primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> + }
>
> // Border extend Top
> if (!row)
> {
> const intptr_t stride = reconPic->m_stride;
> - const intptr_t strideC = reconPic->m_strideC;
> pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) -
> reconPic->m_lumaMarginX;
> - pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
> - pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
>
> for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
> memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
>
> - for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> + if (reconPic->m_picCsp != X265_CSP_I400)
> {
> - memcpy(pixU - (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> - memcpy(pixV - (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> + const intptr_t strideC = reconPic->m_strideC;
> + pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
> + pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
> + for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> + {
> + memcpy(pixU - (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> + memcpy(pixV - (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> + }
> }
> }
>
> @@ -193,17 +199,20 @@
> if (row == m_numRows - 1)
> {
> const intptr_t stride = reconPic->m_stride;
> - const intptr_t strideC = reconPic->m_strideC;
> pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) -
> reconPic->m_lumaMarginX + (realH - 1) * stride;
> - pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> - pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
> memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
>
> - for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> + if (reconPic->m_picCsp != X265_CSP_I400)
> {
> - memcpy(pixU + (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> - memcpy(pixV + (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> + const intptr_t strideC = reconPic->m_strideC;
> + pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> + pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> + for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> + {
> + memcpy(pixU + (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> + memcpy(pixV + (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> + }
> }
> }
>
> @@ -220,16 +229,19 @@
> uint32_t height = getCUHeight(row);
>
> uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr),
> reconPic->getLumaAddr(cuAddr), stride, width, height);
> - height >>= m_vChromaShift;
> - width >>= m_hChromaShift;
> - stride = reconPic->m_strideC;
> + m_frameEncoder->m_SSDY += ssdY;
> + if (reconPic->m_picCsp != X265_CSP_I400)
> + {
> + height >>= m_vChromaShift;
> + width >>= m_hChromaShift;
> + stride = reconPic->m_strideC;
> +
> + uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr),
> reconPic->getCbAddr(cuAddr), stride, width, height);
> + uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr),
> reconPic->getCrAddr(cuAddr), stride, width, height);
>
> - uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr),
> reconPic->getCbAddr(cuAddr), stride, width, height);
> - uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr),
> reconPic->getCrAddr(cuAddr), stride, width, height);
> -
> - m_frameEncoder->m_SSDY += ssdY;
> - m_frameEncoder->m_SSDU += ssdU;
> - m_frameEncoder->m_SSDV += ssdV;
> + m_frameEncoder->m_SSDU += ssdU;
> + m_frameEncoder->m_SSDV += ssdV;
> + }
> }
> if (m_param->bEnableSsim && m_ssimBuf)
> {
> @@ -264,12 +276,15 @@
> }
>
> updateMD5Plane(m_frameEncoder->m_state[0],
> reconPic->getLumaAddr(cuAddr), width, height, stride);
> - width >>= m_hChromaShift;
> - height >>= m_vChromaShift;
> - stride = reconPic->m_strideC;
> -
> - updateMD5Plane(m_frameEncoder->m_state[1],
> reconPic->getCbAddr(cuAddr), width, height, stride);
> - updateMD5Plane(m_frameEncoder->m_state[2],
> reconPic->getCrAddr(cuAddr), width, height, stride);
> + if (reconPic->m_picCsp != X265_CSP_I400)
> + {
> + width >>= m_hChromaShift;
> + height >>= m_vChromaShift;
> + stride = reconPic->m_strideC;
> +
> + updateMD5Plane(m_frameEncoder->m_state[1],
> reconPic->getCbAddr(cuAddr), width, height, stride);
> + updateMD5Plane(m_frameEncoder->m_state[2],
> reconPic->getCrAddr(cuAddr), width, height, stride);
> + }
> }
> else if (m_param->decodedPictureHashSEI == 2)
> {
> @@ -279,12 +294,15 @@
> if (!row)
> m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] =
> m_frameEncoder->m_crc[2] = 0xffff;
> updateCRC(reconPic->getLumaAddr(cuAddr),
> m_frameEncoder->m_crc[0], height, width, stride);
> - width >>= m_hChromaShift;
> - height >>= m_vChromaShift;
> - stride = reconPic->m_strideC;
> -
> - updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1],
> height, width, stride);
> - updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2],
> height, width, stride);
> + if (reconPic->m_picCsp != X265_CSP_I400)
> + {
> + width >>= m_hChromaShift;
> + height >>= m_vChromaShift;
> + stride = reconPic->m_strideC;
> +
> + updateCRC(reconPic->getCbAddr(cuAddr),
> m_frameEncoder->m_crc[1], height, width, stride);
> + updateCRC(reconPic->getCrAddr(cuAddr),
> m_frameEncoder->m_crc[2], height, width, stride);
> + }
> }
> else if (m_param->decodedPictureHashSEI == 3)
> {
> @@ -295,13 +313,16 @@
> if (!row)
> m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1]
> = m_frameEncoder->m_checksum[2] = 0;
> updateChecksum(reconPic->m_picOrg[0],
> m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight);
> - width >>= m_hChromaShift;
> - height >>= m_vChromaShift;
> - stride = reconPic->m_strideC;
> - cuHeight >>= m_vChromaShift;
> -
> - updateChecksum(reconPic->m_picOrg[1],
> m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
> - updateChecksum(reconPic->m_picOrg[2],
> m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
> + if (reconPic->m_picCsp != X265_CSP_I400)
> + {
> + width >>= m_hChromaShift;
> + height >>= m_vChromaShift;
> + stride = reconPic->m_strideC;
> + cuHeight >>= m_vChromaShift;
> +
> + updateChecksum(reconPic->m_picOrg[1],
> m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
> + updateChecksum(reconPic->m_picOrg[2],
> m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
> + }
> }
>
> if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 *
> (int)m_frameEncoder->m_numRows)
> @@ -415,15 +436,18 @@
>
> primitives.cu[size].copy_pp(dst, reconPic->m_stride, src,
> fencPic->m_stride);
>
> - pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
> - pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
> + int csp = fencPic->m_picCsp;
> + if (csp != X265_CSP_I400)
> + {
> + pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
> + pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
>
> - pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
> - pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
> + pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
> + pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
>
> - int csp = fencPic->m_picCsp;
> - primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC,
> srcCb, fencPic->m_strideC);
> - primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC,
> srcCr, fencPic->m_strideC);
> + primitives.chroma[csp].cu[size].copy_pp(dstCb,
> reconPic->m_strideC, srcCb, fencPic->m_strideC);
> + primitives.chroma[csp].cu[size].copy_pp(dstCr,
> reconPic->m_strideC, srcCr, fencPic->m_strideC);
> + }
> }
>
> /* Original YUV restoration for CU in lossless coding */
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/sao.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -106,9 +106,15 @@
> bool SAO::create(x265_param* param)
> {
> m_param = param;
> - m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
> - m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
> -
> + if (param->internalCsp != X265_CSP_I400)
> + {
> + m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
> + m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
> + m_numPlanes = 3;
> + }
> + else
> + m_numPlanes = 1;
> +
> m_numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
> m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) /
> g_maxCUSize;
>
> @@ -224,7 +230,7 @@
> }
>
> saoParam->bSaoFlag[0] = true;
> - saoParam->bSaoFlag[1] = true;
> + saoParam->bSaoFlag[1] = (m_numPlanes > 1);
>
> m_numNoSao[0] = 0; // Luma
> m_numNoSao[1] = 0; // Chroma
> @@ -1132,7 +1138,7 @@
> m_entropyCoder.codeSaoMerge(0);
> m_entropyCoder.store(m_rdContexts.temp);
> // reset stats Y, Cb, Cr
> - for (int plane = 0; plane < 3; plane++)
> + for (int plane = 0; plane < m_numPlanes; plane++)
> {
> for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
> {
> @@ -1161,7 +1167,8 @@
>
> saoComponentParamDist(saoParam, addr, addrUp, addrLeft,
> &mergeSaoParam[0][0], mergeDist);
>
> - sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft,
> mergeSaoParam, mergeDist);
> + if (m_numPlanes > 1)
> + sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft,
> mergeSaoParam, mergeDist);
>
> if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
> {
> @@ -1172,7 +1179,7 @@
> m_entropyCoder.codeSaoMerge(0);
> if (allowMerge[1])
> m_entropyCoder.codeSaoMerge(0);
> - for (int plane = 0; plane < 3; plane++)
> + for (int plane = 0; plane < m_numPlanes; plane++)
> {
> if (saoParam->bSaoFlag[plane > 0])
>
> m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
> @@ -1202,7 +1209,7 @@
> SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP :
> SAO_MERGE_LEFT;
> bestCost = mergeCost;
> m_entropyCoder.store(m_rdContexts.temp);
> - for (int plane = 0; plane < 3; plane++)
> + for (int plane = 0; plane < m_numPlanes; plane++)
> {
> mergeSaoParam[plane][mergeIdx].mergeMode =
> mergeMode;
> if (saoParam->bSaoFlag[plane > 0])
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/sao.h
> --- a/source/encoder/sao.h Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/sao.h Mon Sep 21 11:40:18 2015 -0500
> @@ -85,6 +85,7 @@
>
> int m_numCuInWidth;
> int m_numCuInHeight;
> + int m_numPlanes;
> int m_hChromaShift;
> int m_vChromaShift;
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/search.cpp
> --- a/source/encoder/search.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/search.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -1169,7 +1169,8 @@
>
> intraMode.initCosts();
> intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom,
> tuDepthRange, sharedModes);
> - intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom,
> sharedChromaModes);
> + if (m_csp != X265_CSP_I400)
> + intraMode.chromaDistortion += estIntraPredChromaQT(intraMode,
> cuGeom, sharedChromaModes);
> intraMode.distortion += intraMode.lumaDistortion +
> intraMode.chromaDistortion;
>
> m_entropyCoder.resetBits();
> @@ -2499,9 +2500,14 @@
> // Luma
> int part = partitionFromLog2Size(cu.m_log2CUSize[0]);
> interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
> - // Chroma
> - interMode.chromaDistortion = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> - interMode.chromaDistortion += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> + if (m_csp != X265_CSP_I400)
> + {
> + // Chroma
> + interMode.chromaDistortion = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> + interMode.chromaDistortion += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> + }
> + else
> + interMode.chromaDistortion = 0;
> interMode.distortion = interMode.lumaDistortion +
> interMode.chromaDistortion;
>
> m_entropyCoder.load(m_rqt[depth].cur);
> @@ -2553,9 +2559,12 @@
> if (!tqBypass)
> {
> sse_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> - cbf0Dist += m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> - cbf0Dist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
> -
> + if (m_csp != X265_CSP_I400)
> + {
> + cbf0Dist += m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> + cbf0Dist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
> + }
> +
> /* Consider the RD cost of not signaling any residual */
> m_entropyCoder.load(m_rqt[depth].cur);
> m_entropyCoder.resetBits();
> @@ -2624,8 +2633,14 @@
>
> // update with clipped distortion and cost (qp estimation loop uses
> unclipped values)
> sse_ret_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
> - sse_ret_t bestChromaDist = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> - bestChromaDist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> + sse_ret_t bestChromaDist;
> + if (m_csp != X265_CSP_I400)
> + {
> + bestChromaDist = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> + bestChromaDist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> + }
> + else
> + bestChromaDist = 0;
> if (m_rdCost.m_psyRd)
> interMode.psyEnergy = m_rdCost.psyCost(sizeIdx,
> fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
> interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> @@ -2798,15 +2813,22 @@
> X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must
> be set\n");
>
> uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> - bool bCodeChroma = true;
> + bool bCodeChroma;
> uint32_t tuDepthC = tuDepth;
> - if (log2TrSizeC < 2)
> +
> + if (m_csp != X265_CSP_I400)
> {
> - X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth,
> "invalid tuDepth\n");
> - log2TrSizeC = 2;
> - tuDepthC--;
> - bCodeChroma = !(absPartIdx & 3);
> + bCodeChroma = true;
> + if (log2TrSizeC < 2)
> + {
> + X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 &&
> tuDepth, "invalid tuDepth\n");
> + log2TrSizeC = 2;
> + tuDepthC--;
> + bCodeChroma = !(absPartIdx & 3);
> + }
> }
> + else
> + bCodeChroma = false;
>
> // code full block
> Cost fullCost;
> @@ -3383,15 +3405,22 @@
> const uint32_t qtLayer = log2TrSize - 2;
>
> uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> - bool bCodeChroma = true;
> + bool bCodeChroma;
> uint32_t tuDepthC = tuDepth;
> - if (log2TrSizeC < 2)
> +
> + if (m_csp != X265_CSP_I400)
> {
> - X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth,
> "invalid tuDepth\n");
> - log2TrSizeC = 2;
> - tuDepthC--;
> - bCodeChroma = !(absPartIdx & 3);
> + bCodeChroma = true;
> + if (log2TrSizeC < 2)
> + {
> + X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 &&
> tuDepth, "invalid tuDepth\n");
> + log2TrSizeC = 2;
> + tuDepthC--;
> + bCodeChroma = !(absPartIdx & 3);
> + }
> }
> + else
> + bCodeChroma = false;
>
> m_rqt[qtLayer].resiQtYuv.copyPartToPartLuma(resiYuv, absPartIdx,
> log2TrSize);
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/slicetype.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -74,17 +74,18 @@
> uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX,
> uint32_t blockY, int csp)
> {
> intptr_t stride = curFrame->m_fencPic->m_stride;
> - intptr_t cStride = curFrame->m_fencPic->m_strideC;
> intptr_t blockOffsetLuma = blockX + (blockY * stride);
> - int hShift = CHROMA_H_SHIFT(csp);
> - int vShift = CHROMA_V_SHIFT(csp);
> - intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >> vShift)
> * cStride);
>
> - uint32_t var;
> -
> - var = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] +
> blockOffsetLuma, stride, 0, csp);
> - var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] +
> blockOffsetChroma, cStride, 1, csp);
> - var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] +
> blockOffsetChroma, cStride, 2, csp);
> + uint32_t var = acEnergyPlane(curFrame,
> curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
> + if (csp != X265_CSP_I400)
> + {
> + intptr_t cStride = curFrame->m_fencPic->m_strideC;
> + int hShift = CHROMA_H_SHIFT(csp);
> + int vShift = CHROMA_V_SHIFT(csp);
> + intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >>
> vShift) * cStride);
> + var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] +
> blockOffsetChroma, cStride, 1, csp);
> + var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] +
> blockOffsetChroma, cStride, 2, csp);
> + }
> x265_emms();
> return var;
> }
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20151008/6051552e/attachment-0001.html>
More information about the x265-devel
mailing list