[x265] [PATCH REBASE] add support for Monochrome color space (X265_CSP_I400)

Deepthi Nandakumar deepthi at multicorewareinc.com
Thu Oct 8 06:33:56 CEST 2015


Thanks, Steve. I have this monochrome patch from Fabrice, an SEI update
from Luca of libav, and numerous asm patches from the dev team waiting to
be pushed in.

Once the scenecut bug is solved, I plan to tag 1.8 - and push these in. 1.9
should probably be tagged in another 3-4 weeks, since there are many
features which will not make it into 1.8.

On Wed, Oct 7, 2015 at 10:20 PM, Steve Borho <steve at borho.org> wrote:

> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1442853618 18000
> #      Mon Sep 21 11:40:18 2015 -0500
> # Node ID 5602b4bc1fec175e7c5bf14ef18978a50e3bc07f
> # Parent  f8b8ebdc54578e6735216d8b9abce5ba80c05bd8
> add support for Monochrome color space (X265_CSP_I400)
>
> This patch was extracted from changes made by Fabrice Bellard for BPG
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/deblock.cpp
> --- a/source/common/deblock.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/deblock.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -109,7 +109,7 @@
>      for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
>      {
>          edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
> -        if (!((e0 + e) & chromaMask))
> +        if (cu->m_chromaFormat != X265_CSP_I400 && !((e0 + e) &
> chromaMask))
>              edgeFilterChroma(cu, absPartIdx, depth, dir, e,
> blockStrength);
>      }
>  }
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/frame.cpp
> --- a/source/common/frame.cpp   Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/frame.cpp   Mon Sep 21 11:40:18 2015 -0500
> @@ -73,14 +73,20 @@
>           * end of the picture accessing uninitialized pixels */
>          int maxHeight = sps.numCuInHeight * g_maxCUSize;
>          memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) *
> m_reconPic->m_stride * maxHeight);
> -        memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> -        memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> +        if (m_reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> +            memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) *
> m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> +        }
>
>          /* use pre-calculated cu/pu offsets cached in the SPS structure */
> -        m_reconPic->m_cuOffsetC = sps.cuOffsetC;
>          m_reconPic->m_cuOffsetY = sps.cuOffsetY;
> -        m_reconPic->m_buOffsetC = sps.buOffsetC;
>          m_reconPic->m_buOffsetY = sps.buOffsetY;
> +        if (m_reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            m_reconPic->m_cuOffsetC = sps.cuOffsetC;
> +            m_reconPic->m_buOffsetC = sps.buOffsetC;
> +        }
>      }
>      return ok;
>  }
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/param.cpp
> --- a/source/common/param.cpp   Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/param.cpp   Mon Sep 21 11:40:18 2015 -0500
> @@ -1069,7 +1069,7 @@
>
>      CHECK(param->sourceWidth < (int)param->maxCUSize ||
> param->sourceHeight < (int)param->maxCUSize,
>            "Picture size must be at least one CTU");
> -    CHECK(param->internalCsp < X265_CSP_I420 || X265_CSP_I444 <
> param->internalCsp,
> +    CHECK(param->internalCsp < X265_CSP_I400 || X265_CSP_I444 <
> param->internalCsp,
>            "Color space must be i420, i422, or i444");
>      CHECK(param->sourceWidth & !!CHROMA_H_SHIFT(param->internalCsp),
>            "Picture width must be an integer multiple of the specified
> chroma subsampling");
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/picyuv.cpp
> --- a/source/common/picyuv.cpp  Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/picyuv.cpp  Mon Sep 21 11:40:18 2015 -0500
> @@ -70,12 +70,16 @@
>      int maxHeight = numCuInHeight * g_maxCUSize;
>
>      CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight +
> (m_lumaMarginY * 2)));
> -    CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> -    CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> +    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * m_stride  +
> m_lumaMarginX;
>
> -    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * m_stride  +
> m_lumaMarginX;
> -    m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> -    m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> +    if (m_picCsp != X265_CSP_I400)
> +    {
> +        CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> +        CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> +
> +        m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> +        m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC +
> m_chromaMarginX;
> +    }
>
>      return true;
>
> @@ -90,24 +94,32 @@
>  {
>      uint32_t numPartitions = 1 << (g_unitSizeDepth * 2);
>      CHECKED_MALLOC(m_cuOffsetY, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
> -    CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
> +    if (m_picCsp != X265_CSP_I400)
> +    {
> +        CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
> +    }
>      for (uint32_t cuRow = 0; cuRow < sps.numCuInHeight; cuRow++)
>      {
>          for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
>          {
>              m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride *
> cuRow * g_maxCUSize + cuCol * g_maxCUSize;
> -            m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC *
> cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >>
> m_hChromaShift);
> +            if (m_picCsp != X265_CSP_I400)
> +                m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC
> * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >>
> m_hChromaShift);
>          }
>      }
>
>      CHECKED_MALLOC(m_buOffsetY, intptr_t, (size_t)numPartitions);
> -    CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
> +    if (m_picCsp != X265_CSP_I400)
> +    {
> +        CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
> +    }
>      for (uint32_t idx = 0; idx < numPartitions; ++idx)
>      {
>          intptr_t x = g_zscanToPelX[idx];
>          intptr_t y = g_zscanToPelY[idx];
>          m_buOffsetY[idx] = m_stride * y + x;
> -        m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >>
> m_hChromaShift);
> +        if (m_picCsp != X265_CSP_I400)
> +            m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >>
> m_hChromaShift);
>      }
>
>      return true;
> @@ -168,8 +180,11 @@
>              int shift = (X265_DEPTH - 8);
>
>              primitives.planecopy_cp(yChar, pic.stride[0] /
> sizeof(*yChar), yPixel, m_stride, width, height, shift);
> -            primitives.planecopy_cp(uChar, pic.stride[1] /
> sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> -            primitives.planecopy_cp(vChar, pic.stride[2] /
> sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> +            if (m_picCsp != X265_CSP_I400)
> +            {
> +                primitives.planecopy_cp(uChar, pic.stride[1] /
> sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> +                primitives.planecopy_cp(vChar, pic.stride[2] /
> sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift);
> +            }
>          }
>  #else /* Case for (X265_DEPTH == 8) */
>          // TODO: Does we need this path? may merge into above in future
> @@ -190,15 +205,18 @@
>                  yChar += pic.stride[0] / sizeof(*yChar);
>              }
>
> -            for (int r = 0; r < height >> m_vChromaShift; r++)
> +            if (m_picCsp != X265_CSP_I400)
>              {
> -                memcpy(uPixel, uChar, (width >> m_hChromaShift) *
> sizeof(pixel));
> -                memcpy(vPixel, vChar, (width >> m_hChromaShift) *
> sizeof(pixel));
> +                for (int r = 0; r < height >> m_vChromaShift; r++)
> +                {
> +                    memcpy(uPixel, uChar, (width >> m_hChromaShift) *
> sizeof(pixel));
> +                    memcpy(vPixel, vChar, (width >> m_hChromaShift) *
> sizeof(pixel));
>
> -                uPixel += m_strideC;
> -                vPixel += m_strideC;
> -                uChar += pic.stride[1] / sizeof(*uChar);
> -                vChar += pic.stride[2] / sizeof(*vChar);
> +                    uPixel += m_strideC;
> +                    vPixel += m_strideC;
> +                    uChar += pic.stride[1] / sizeof(*uChar);
> +                    vChar += pic.stride[2] / sizeof(*vChar);
> +                }
>              }
>          }
>  #endif /* (X265_DEPTH > 8) */
> @@ -220,15 +238,21 @@
>          {
>              /* shift right and mask pixels to final size */
>              primitives.planecopy_sp(yShort, pic.stride[0] /
> sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
> -            primitives.planecopy_sp(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> -            primitives.planecopy_sp(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> +            if (m_picCsp != X265_CSP_I400)
> +            {
> +                primitives.planecopy_sp(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> +                primitives.planecopy_sp(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> +            }
>          }
>          else /* Case for (pic.bitDepth <= X265_DEPTH) */
>          {
>              /* shift left and mask pixels to final size */
>              primitives.planecopy_sp_shl(yShort, pic.stride[0] /
> sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
> -            primitives.planecopy_sp_shl(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> -            primitives.planecopy_sp_shl(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> +            if (m_picCsp != X265_CSP_I400)
> +            {
> +                primitives.planecopy_sp_shl(uShort, pic.stride[1] /
> sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> +                primitives.planecopy_sp_shl(vShort, pic.stride[2] /
> sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >>
> m_vChromaShift, shift, mask);
> +            }
>          }
>      }
>
> @@ -250,30 +274,36 @@
>          Y += m_stride;
>      }
>
> -    for (int r = 0; r < height >> m_vChromaShift; r++)
> +    if (m_picCsp != X265_CSP_I400)
>      {
> -        for (int x = 0; x < padx >> m_hChromaShift; x++)
> +        for (int r = 0; r < height >> m_vChromaShift; r++)
>          {
> -            U[(width >> m_hChromaShift) + x] = U[(width >>
> m_hChromaShift) - 1];
> -            V[(width >> m_hChromaShift) + x] = V[(width >>
> m_hChromaShift) - 1];
> +            for (int x = 0; x < padx >> m_hChromaShift; x++)
> +            {
> +                U[(width >> m_hChromaShift) + x] = U[(width >>
> m_hChromaShift) - 1];
> +                V[(width >> m_hChromaShift) + x] = V[(width >>
> m_hChromaShift) - 1];
> +            }
> +
> +            U += m_strideC;
> +            V += m_strideC;
>          }
> -
> -        U += m_strideC;
> -        V += m_strideC;
>      }
>
>      /* extend the bottom if height was not multiple of the minimum CU
> size */
>      Y = m_picOrg[0] + (height - 1) * m_stride;
> -    U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
> -    V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
>
>      for (int i = 1; i <= pady; i++)
>          memcpy(Y + i * m_stride, Y, (width + padx) * sizeof(pixel));
>
> -    for (int j = 1; j <= pady >> m_vChromaShift; j++)
> +    if (m_picCsp != X265_CSP_I400)
>      {
> -        memcpy(U + j * m_strideC, U, ((width + padx) >> m_hChromaShift) *
> sizeof(pixel));
> -        memcpy(V + j * m_strideC, V, ((width + padx) >> m_hChromaShift) *
> sizeof(pixel));
> +        U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
> +        V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
> +        for (int j = 1; j <= pady >> m_vChromaShift; j++)
> +        {
> +            memcpy(U + j * m_strideC, U, ((width + padx) >>
> m_hChromaShift) * sizeof(pixel));
> +            memcpy(V + j * m_strideC, V, ((width + padx) >>
> m_hChromaShift) * sizeof(pixel));
> +        }
>      }
>  }
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/predict.cpp
> --- a/source/common/predict.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/predict.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -85,6 +85,14 @@
>      int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx];
>      int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx];
>
> +    /* XXX: disable chroma at a higher level ? */
> +    if (cu.m_chromaFormat == X265_CSP_I400)
> +    {
> +        bChroma = false;
> +        if (!bLuma)
> +            return;
> +    }
> +
>      if (cu.m_slice->isInterP())
>      {
>          /* P Slice */
> @@ -99,7 +107,8 @@
>
>          if (cu.m_slice->m_pps->bUseWeightPred && wp0->bPresentFlag)
>          {
> -            for (int plane = 0; plane < 3; plane++)
> +            int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
> +            for (int plane = 0; plane < numPlanes; plane++)
>              {
>                  wv0[plane].w      = wp0[plane].inputWeight;
>                  wv0[plane].offset = wp0[plane].inputOffset * (1 <<
> (X265_DEPTH - 8));
> @@ -136,13 +145,14 @@
>
>          if (cu.m_slice->m_pps->bUseWeightedBiPred)
>          {
> +            int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
> +
>              pwp0 = refIdx0 >= 0 ?
> cu.m_slice->m_weightPredTable[0][refIdx0] : NULL;
>              pwp1 = refIdx1 >= 0 ?
> cu.m_slice->m_weightPredTable[1][refIdx1] : NULL;
> -
>              if (pwp0 && pwp1 && (pwp0->bPresentFlag ||
> pwp1->bPresentFlag))
>              {
>                  /* biprediction weighting */
> -                for (int plane = 0; plane < 3; plane++)
> +                for (int plane = 0; plane < numPlanes; plane++)
>                  {
>                      wv0[plane].w = pwp0[plane].inputWeight;
>                      wv0[plane].o = pwp0[plane].inputOffset * (1 <<
> (X265_DEPTH - 8));
> @@ -159,7 +169,7 @@
>              {
>                  /* uniprediction weighting, always outputs to wv0 */
>                  const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1;
> -                for (int plane = 0; plane < 3; plane++)
> +                for (int plane = 0; plane < numPlanes; plane++)
>                  {
>                      wv0[plane].w = pwp[plane].inputWeight;
>                      wv0[plane].offset = pwp[plane].inputOffset * (1 <<
> (X265_DEPTH - 8));
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/shortyuv.cpp
> --- a/source/common/shortyuv.cpp        Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/shortyuv.cpp        Mon Sep 21 11:40:18 2015 -0500
> @@ -40,19 +40,26 @@
>  bool ShortYuv::create(uint32_t size, int csp)
>  {
>      m_csp = csp;
> -    m_hChromaShift = CHROMA_H_SHIFT(csp);
> -    m_vChromaShift = CHROMA_V_SHIFT(csp);
> -
>      m_size = size;
> -    m_csize = size >> m_hChromaShift;
>
>      size_t sizeL = size * size;
> -    size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> -    X265_CHECK((sizeC & 15) == 0, "invalid size");
> -
> -    CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> -    m_buf[1] = m_buf[0] + sizeL;
> -    m_buf[2] = m_buf[0] + sizeL + sizeC;
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        m_hChromaShift = CHROMA_H_SHIFT(csp);
> +        m_vChromaShift = CHROMA_V_SHIFT(csp);
> +        m_csize = size >> m_hChromaShift;
> +        size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> +        X265_CHECK((sizeC & 15) == 0, "invalid size");
> +        CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> +        m_buf[1] = m_buf[0] + sizeL;
> +        m_buf[2] = m_buf[0] + sizeL + sizeC;
> +    }
> +    else
> +    {
> +        m_csize = 0;
> +        CHECKED_MALLOC(m_buf[0], int16_t, sizeL);
> +    }
> +
>      return true;
>
>  fail:
> @@ -67,16 +74,22 @@
>  void ShortYuv::clear()
>  {
>      memset(m_buf[0], 0, (m_size  * m_size) *  sizeof(int16_t));
> -    memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
> -    memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
> +        memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
> +    }
>  }
>
>  void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t
> log2Size)
>  {
>      const int sizeIdx = log2Size - 2;
>      primitives.cu[sizeIdx].sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0],
> srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> -    primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize,
> srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> -    primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize,
> srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize,
> srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> +        primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize,
> srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> +    }
>  }
>
>  void ShortYuv::copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx,
> uint32_t log2Size) const
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/yuv.cpp
> --- a/source/common/yuv.cpp     Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/common/yuv.cpp     Mon Sep 21 11:40:18 2015 -0500
> @@ -84,10 +84,13 @@
>      pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
>      primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0],
> m_size);
>
> -    pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> -    pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC,
> m_buf[1], m_csize);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC,
> m_buf[2], m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> +        pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU,
> dstPic.m_strideC, m_buf[1], m_csize);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV,
> dstPic.m_strideC, m_buf[2], m_csize);
> +    }
>  }
>
>  void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t
> absPartIdx)
> @@ -95,10 +98,13 @@
>      const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
>      primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY,
> srcPic.m_stride);
>
> -    const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> -    const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU,
> srcPic.m_strideC);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV,
> srcPic.m_strideC);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> +        const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize,
> srcU, srcPic.m_strideC);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize,
> srcV, srcPic.m_strideC);
> +    }
>  }
>
>  void Yuv::copyFromYuv(const Yuv& srcYuv)
> @@ -106,8 +112,11 @@
>      X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
>
>      primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0],
> srcYuv.m_size);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize,
> srcYuv.m_buf[1], srcYuv.m_csize);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize,
> srcYuv.m_buf[2], srcYuv.m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize,
> srcYuv.m_buf[1], srcYuv.m_csize);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize,
> srcYuv.m_buf[2], srcYuv.m_csize);
> +    }
>  }
>
>  /* This version is intended for use by ME, which required FENC_STRIDE for
> luma fenc pixels */
> @@ -132,10 +141,13 @@
>      pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
>      primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
>
> -    pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> -    pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize,
> m_buf[1], m_csize);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize,
> m_buf[2], m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> +        pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize,
> m_buf[1], m_csize);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize,
> m_buf[2], m_csize);
> +    }
>  }
>
>  void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
> @@ -144,19 +156,25 @@
>      pixel* dstY = dstYuv.m_buf[0];
>      primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY,
> m_size);
>
> -    pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> -    pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
> -    pixel* dstU = dstYuv.m_buf[1];
> -    pixel* dstV = dstYuv.m_buf[2];
> -    primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU,
> dstYuv.m_csize, srcU, m_csize);
> -    primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV,
> dstYuv.m_csize, srcV, m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> +        pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
> +        pixel* dstU = dstYuv.m_buf[1];
> +        pixel* dstV = dstYuv.m_buf[2];
> +        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU,
> dstYuv.m_csize, srcU, m_csize);
> +        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV,
> dstYuv.m_csize, srcV, m_csize);
> +    }
>  }
>
>  void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t
> log2SizeL)
>  {
>      primitives.cu[log2SizeL - 2].add_ps(m_buf[0], m_size,
> srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> -    primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize,
> srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> -    primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize,
> srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1],
> m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize,
> srcYuv1.m_csize);
> +        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2],
> m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize,
> srcYuv1.m_csize);
> +    }
>  }
>
>  void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1,
> uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool
> bChroma)
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp        Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/entropy.cpp        Mon Sep 21 11:40:18 2015 -0500
> @@ -430,7 +430,8 @@
>      if (slice.m_sps->bUseSAO)
>      {
>          WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
> -        WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
> +        if (slice.m_sps->chromaFormatIdc != X265_CSP_I400)
> +            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
>      }
>
>      // check if numRefIdx match the defaults (1, hard-coded in PPS). If
> not, override
> @@ -723,19 +724,23 @@
>      uint32_t hChromaShift = cu.m_hChromaShift;
>      uint32_t vChromaShift = cu.m_vChromaShift;
>      bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
> -    if (!curDepth || !bSmallChroma)
> +
> +    if (cu.m_chromaFormat != X265_CSP_I400)
>      {
> -        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth -
> 1))
> -            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth,
> !subdiv);
> -        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth -
> 1))
> -            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth,
> !subdiv);
> +        if (!curDepth || !bSmallChroma)
> +        {
> +            if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U,
> curDepth - 1))
> +                codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth,
> !subdiv);
> +            if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V,
> curDepth - 1))
> +                codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth,
> !subdiv);
> +        }
> +        else
> +        {
> +            X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size
> match failure\n");
> +            X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size
> match failure\n");
> +        }
>      }
> -    else
> -    {
> -        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size
> match failure\n");
> -        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) ==
> cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size
> match failure\n");
> -    }
> -
> +
>      if (subdiv)
>      {
>          --log2CurSize;
> @@ -782,6 +787,9 @@
>              return;
>      }
>
> +    if (cu.m_chromaFormat == X265_CSP_I400)
> +        return;
> +
>      if (bSmallChroma)
>      {
>          if ((absPartIdx & 3) != 3)
> @@ -1011,7 +1019,7 @@
>  void Entropy::codePredWeightTable(const Slice& slice)
>  {
>      const WeightParam *wp;
> -    bool            bChroma      = true; // 4:0:0 not yet supported
> +    bool            bChroma      = (slice.m_sps->chromaFormatIdc !=
> X265_CSP_I400);
>      bool            bDenomCoded  = false;
>      int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
>      uint32_t        totalSignalledWeightFlags = 0;
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp    Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/framefilter.cpp    Mon Sep 21 11:40:18 2015 -0500
> @@ -167,25 +167,31 @@
>
>      // Border extend Left and Right
>      primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr),
> reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
> -    primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> -    primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> +    if (reconPic->m_picCsp != X265_CSP_I400)
> +    {
> +        primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> +        primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr),
> reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >>
> m_vChromaShift, reconPic->m_chromaMarginX);
> +    }
>
>      // Border extend Top
>      if (!row)
>      {
>          const intptr_t stride = reconPic->m_stride;
> -        const intptr_t strideC = reconPic->m_strideC;
>          pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) -
> reconPic->m_lumaMarginX;
> -        pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
> -        pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
>
>          for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
>              memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
>
> -        for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +        if (reconPic->m_picCsp != X265_CSP_I400)
>          {
> -            memcpy(pixU - (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> -            memcpy(pixV - (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> +            const intptr_t strideC = reconPic->m_strideC;
> +            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
> +            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX;
> +            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +            {
> +                memcpy(pixU - (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> +                memcpy(pixV - (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> +            }
>          }
>      }
>
> @@ -193,17 +199,20 @@
>      if (row == m_numRows - 1)
>      {
>          const intptr_t stride = reconPic->m_stride;
> -        const intptr_t strideC = reconPic->m_strideC;
>          pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) -
> reconPic->m_lumaMarginX + (realH - 1) * stride;
> -        pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> -        pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
>          for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
>              memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
>
> -        for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +        if (reconPic->m_picCsp != X265_CSP_I400)
>          {
> -            memcpy(pixU + (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> -            memcpy(pixV + (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> +            const intptr_t strideC = reconPic->m_strideC;
> +            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> +            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) -
> reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> +            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +            {
> +                memcpy(pixU + (y + 1) * strideC, pixU, strideC *
> sizeof(pixel));
> +                memcpy(pixV + (y + 1) * strideC, pixV, strideC *
> sizeof(pixel));
> +            }
>          }
>      }
>
> @@ -220,16 +229,19 @@
>          uint32_t height = getCUHeight(row);
>
>          uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr),
> reconPic->getLumaAddr(cuAddr), stride, width, height);
> -        height >>= m_vChromaShift;
> -        width  >>= m_hChromaShift;
> -        stride = reconPic->m_strideC;
> +        m_frameEncoder->m_SSDY += ssdY;
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            height >>= m_vChromaShift;
> +            width  >>= m_hChromaShift;
> +            stride = reconPic->m_strideC;
> +
> +            uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr),
> reconPic->getCbAddr(cuAddr), stride, width, height);
> +            uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr),
> reconPic->getCrAddr(cuAddr), stride, width, height);
>
> -        uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr),
> reconPic->getCbAddr(cuAddr), stride, width, height);
> -        uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr),
> reconPic->getCrAddr(cuAddr), stride, width, height);
> -
> -        m_frameEncoder->m_SSDY += ssdY;
> -        m_frameEncoder->m_SSDU += ssdU;
> -        m_frameEncoder->m_SSDV += ssdV;
> +            m_frameEncoder->m_SSDU += ssdU;
> +            m_frameEncoder->m_SSDV += ssdV;
> +        }
>      }
>      if (m_param->bEnableSsim && m_ssimBuf)
>      {
> @@ -264,12 +276,15 @@
>          }
>
>          updateMD5Plane(m_frameEncoder->m_state[0],
> reconPic->getLumaAddr(cuAddr), width, height, stride);
> -        width  >>= m_hChromaShift;
> -        height >>= m_vChromaShift;
> -        stride = reconPic->m_strideC;
> -
> -        updateMD5Plane(m_frameEncoder->m_state[1],
> reconPic->getCbAddr(cuAddr), width, height, stride);
> -        updateMD5Plane(m_frameEncoder->m_state[2],
> reconPic->getCrAddr(cuAddr), width, height, stride);
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            width  >>= m_hChromaShift;
> +            height >>= m_vChromaShift;
> +            stride = reconPic->m_strideC;
> +
> +            updateMD5Plane(m_frameEncoder->m_state[1],
> reconPic->getCbAddr(cuAddr), width, height, stride);
> +            updateMD5Plane(m_frameEncoder->m_state[2],
> reconPic->getCrAddr(cuAddr), width, height, stride);
> +        }
>      }
>      else if (m_param->decodedPictureHashSEI == 2)
>      {
> @@ -279,12 +294,15 @@
>          if (!row)
>              m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] =
> m_frameEncoder->m_crc[2] = 0xffff;
>          updateCRC(reconPic->getLumaAddr(cuAddr),
> m_frameEncoder->m_crc[0], height, width, stride);
> -        width  >>= m_hChromaShift;
> -        height >>= m_vChromaShift;
> -        stride = reconPic->m_strideC;
> -
> -        updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1],
> height, width, stride);
> -        updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2],
> height, width, stride);
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            width  >>= m_hChromaShift;
> +            height >>= m_vChromaShift;
> +            stride = reconPic->m_strideC;
> +
> +            updateCRC(reconPic->getCbAddr(cuAddr),
> m_frameEncoder->m_crc[1], height, width, stride);
> +            updateCRC(reconPic->getCrAddr(cuAddr),
> m_frameEncoder->m_crc[2], height, width, stride);
> +        }
>      }
>      else if (m_param->decodedPictureHashSEI == 3)
>      {
> @@ -295,13 +313,16 @@
>          if (!row)
>              m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1]
> = m_frameEncoder->m_checksum[2] = 0;
>          updateChecksum(reconPic->m_picOrg[0],
> m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight);
> -        width  >>= m_hChromaShift;
> -        height >>= m_vChromaShift;
> -        stride = reconPic->m_strideC;
> -        cuHeight >>= m_vChromaShift;
> -
> -        updateChecksum(reconPic->m_picOrg[1],
> m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
> -        updateChecksum(reconPic->m_picOrg[2],
> m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            width  >>= m_hChromaShift;
> +            height >>= m_vChromaShift;
> +            stride = reconPic->m_strideC;
> +            cuHeight >>= m_vChromaShift;
> +
> +            updateChecksum(reconPic->m_picOrg[1],
> m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
> +            updateChecksum(reconPic->m_picOrg[2],
> m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
> +        }
>      }
>
>      if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 *
> (int)m_frameEncoder->m_numRows)
> @@ -415,15 +436,18 @@
>
>      primitives.cu[size].copy_pp(dst, reconPic->m_stride, src,
> fencPic->m_stride);
>
> -    pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
> -    pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
> +    int csp = fencPic->m_picCsp;
> +    if (csp != X265_CSP_I400)
> +    {
> +        pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
> +        pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
>
> -    pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
> -    pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
> +        pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
> +        pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
>
> -    int csp = fencPic->m_picCsp;
> -    primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC,
> srcCb, fencPic->m_strideC);
> -    primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC,
> srcCr, fencPic->m_strideC);
> +        primitives.chroma[csp].cu[size].copy_pp(dstCb,
> reconPic->m_strideC, srcCb, fencPic->m_strideC);
> +        primitives.chroma[csp].cu[size].copy_pp(dstCr,
> reconPic->m_strideC, srcCr, fencPic->m_strideC);
> +    }
>  }
>
>  /* Original YUV restoration for CU in lossless coding */
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp    Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/sao.cpp    Mon Sep 21 11:40:18 2015 -0500
> @@ -106,9 +106,15 @@
>  bool SAO::create(x265_param* param)
>  {
>      m_param = param;
> -    m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
> -    m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
> -
> +    if (param->internalCsp != X265_CSP_I400)
> +    {
> +        m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
> +        m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
> +        m_numPlanes = 3;
> +    }
> +    else
> +        m_numPlanes = 1;
> +
>      m_numCuInWidth =  (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
>      m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) /
> g_maxCUSize;
>
> @@ -224,7 +230,7 @@
>      }
>
>      saoParam->bSaoFlag[0] = true;
> -    saoParam->bSaoFlag[1] = true;
> +    saoParam->bSaoFlag[1] = (m_numPlanes > 1);
>
>      m_numNoSao[0] = 0; // Luma
>      m_numNoSao[1] = 0; // Chroma
> @@ -1132,7 +1138,7 @@
>              m_entropyCoder.codeSaoMerge(0);
>          m_entropyCoder.store(m_rdContexts.temp);
>          // reset stats Y, Cb, Cr
> -        for (int plane = 0; plane < 3; plane++)
> +        for (int plane = 0; plane < m_numPlanes; plane++)
>          {
>              for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
>              {
> @@ -1161,7 +1167,8 @@
>
>          saoComponentParamDist(saoParam, addr, addrUp, addrLeft,
> &mergeSaoParam[0][0], mergeDist);
>
> -        sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft,
> mergeSaoParam, mergeDist);
> +        if (m_numPlanes > 1)
> +            sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft,
> mergeSaoParam, mergeDist);
>
>          if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
>          {
> @@ -1172,7 +1179,7 @@
>                  m_entropyCoder.codeSaoMerge(0);
>              if (allowMerge[1])
>                  m_entropyCoder.codeSaoMerge(0);
> -            for (int plane = 0; plane < 3; plane++)
> +            for (int plane = 0; plane < m_numPlanes; plane++)
>              {
>                  if (saoParam->bSaoFlag[plane > 0])
>
>  m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
> @@ -1202,7 +1209,7 @@
>                      SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP :
> SAO_MERGE_LEFT;
>                      bestCost = mergeCost;
>                      m_entropyCoder.store(m_rdContexts.temp);
> -                    for (int plane = 0; plane < 3; plane++)
> +                    for (int plane = 0; plane < m_numPlanes; plane++)
>                      {
>                          mergeSaoParam[plane][mergeIdx].mergeMode =
> mergeMode;
>                          if (saoParam->bSaoFlag[plane > 0])
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/sao.h
> --- a/source/encoder/sao.h      Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/sao.h      Mon Sep 21 11:40:18 2015 -0500
> @@ -85,6 +85,7 @@
>
>      int         m_numCuInWidth;
>      int         m_numCuInHeight;
> +    int         m_numPlanes;
>      int         m_hChromaShift;
>      int         m_vChromaShift;
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/search.cpp
> --- a/source/encoder/search.cpp Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/search.cpp Mon Sep 21 11:40:18 2015 -0500
> @@ -1169,7 +1169,8 @@
>
>      intraMode.initCosts();
>      intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom,
> tuDepthRange, sharedModes);
> -    intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom,
> sharedChromaModes);
> +    if (m_csp != X265_CSP_I400)
> +        intraMode.chromaDistortion += estIntraPredChromaQT(intraMode,
> cuGeom, sharedChromaModes);
>      intraMode.distortion += intraMode.lumaDistortion +
> intraMode.chromaDistortion;
>
>      m_entropyCoder.resetBits();
> @@ -2499,9 +2500,14 @@
>      // Luma
>      int part = partitionFromLog2Size(cu.m_log2CUSize[0]);
>      interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
> -    // Chroma
> -    interMode.chromaDistortion = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> -    interMode.chromaDistortion += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        // Chroma
> +        interMode.chromaDistortion = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> +        interMode.chromaDistortion += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    }
> +    else
> +        interMode.chromaDistortion = 0;
>      interMode.distortion = interMode.lumaDistortion +
> interMode.chromaDistortion;
>
>      m_entropyCoder.load(m_rqt[depth].cur);
> @@ -2553,9 +2559,12 @@
>      if (!tqBypass)
>      {
>          sse_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> -        cbf0Dist += m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> -        cbf0Dist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
> -
> +        if (m_csp != X265_CSP_I400)
> +        {
> +            cbf0Dist += m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> +            cbf0Dist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
> +        }
> +
>          /* Consider the RD cost of not signaling any residual */
>          m_entropyCoder.load(m_rqt[depth].cur);
>          m_entropyCoder.resetBits();
> @@ -2624,8 +2633,14 @@
>
>      // update with clipped distortion and cost (qp estimation loop uses
> unclipped values)
>      sse_ret_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
> -    sse_ret_t bestChromaDist = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> -    bestChromaDist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    sse_ret_t bestChromaDist;
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        bestChromaDist = m_rdCost.scaleChromaDist(1,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1],
> fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> +        bestChromaDist += m_rdCost.scaleChromaDist(2,
> primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2],
> fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    }
> +    else
> +        bestChromaDist = 0;
>      if (m_rdCost.m_psyRd)
>          interMode.psyEnergy = m_rdCost.psyCost(sizeIdx,
> fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
>      interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> @@ -2798,15 +2813,22 @@
>      X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must
> be set\n");
>
>      uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> -    bool bCodeChroma = true;
> +    bool bCodeChroma;
>      uint32_t tuDepthC = tuDepth;
> -    if (log2TrSizeC < 2)
> +
> +    if (m_csp != X265_CSP_I400)
>      {
> -        X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth,
> "invalid tuDepth\n");
> -        log2TrSizeC = 2;
> -        tuDepthC--;
> -        bCodeChroma = !(absPartIdx & 3);
> +        bCodeChroma = true;
> +        if (log2TrSizeC < 2)
> +        {
> +            X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 &&
> tuDepth, "invalid tuDepth\n");
> +            log2TrSizeC = 2;
> +            tuDepthC--;
> +            bCodeChroma = !(absPartIdx & 3);
> +        }
>      }
> +    else
> +        bCodeChroma = false;
>
>      // code full block
>      Cost fullCost;
> @@ -3383,15 +3405,22 @@
>      const uint32_t qtLayer = log2TrSize - 2;
>
>      uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> -    bool bCodeChroma = true;
> +    bool bCodeChroma;
>      uint32_t tuDepthC = tuDepth;
> -    if (log2TrSizeC < 2)
> +
> +    if (m_csp != X265_CSP_I400)
>      {
> -        X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth,
> "invalid tuDepth\n");
> -        log2TrSizeC = 2;
> -        tuDepthC--;
> -        bCodeChroma = !(absPartIdx & 3);
> +        bCodeChroma = true;
> +        if (log2TrSizeC < 2)
> +        {
> +            X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 &&
> tuDepth, "invalid tuDepth\n");
> +            log2TrSizeC = 2;
> +            tuDepthC--;
> +            bCodeChroma = !(absPartIdx & 3);
> +        }
>      }
> +    else
> +        bCodeChroma = false;
>
>      m_rqt[qtLayer].resiQtYuv.copyPartToPartLuma(resiYuv, absPartIdx,
> log2TrSize);
>
> diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Mon Sep 28 14:34:41 2015 +0530
> +++ b/source/encoder/slicetype.cpp      Mon Sep 21 11:40:18 2015 -0500
> @@ -74,17 +74,18 @@
>  uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX,
> uint32_t blockY, int csp)
>  {
>      intptr_t stride = curFrame->m_fencPic->m_stride;
> -    intptr_t cStride = curFrame->m_fencPic->m_strideC;
>      intptr_t blockOffsetLuma = blockX + (blockY * stride);
> -    int hShift = CHROMA_H_SHIFT(csp);
> -    int vShift = CHROMA_V_SHIFT(csp);
> -    intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >> vShift)
> * cStride);
>
> -    uint32_t var;
> -
> -    var  = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] +
> blockOffsetLuma, stride, 0, csp);
> -    var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] +
> blockOffsetChroma, cStride, 1, csp);
> -    var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] +
> blockOffsetChroma, cStride, 2, csp);
> +    uint32_t var = acEnergyPlane(curFrame,
> curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
> +    if (csp != X265_CSP_I400)
> +    {
> +        intptr_t cStride = curFrame->m_fencPic->m_strideC;
> +        int hShift = CHROMA_H_SHIFT(csp);
> +        int vShift = CHROMA_V_SHIFT(csp);
> +        intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >>
> vShift) * cStride);
> +        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] +
> blockOffsetChroma, cStride, 1, csp);
> +        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] +
> blockOffsetChroma, cStride, 2, csp);
> +    }
>      x265_emms();
>      return var;
>  }
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20151008/6051552e/attachment-0001.html>


More information about the x265-devel mailing list