[x265] [PATCH] add support for Monochrome color space (X265_CSP_I400)

Steve Borho steve at borho.org
Mon Sep 21 20:13:44 CEST 2015


On 09/21, Steve Borho wrote:
> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1442853618 18000
> #      Mon Sep 21 11:40:18 2015 -0500
> # Node ID 9eaf43c0d001706bce9ae8d287e3ec104208749f
> # Parent  57f027dfb3088eabef5f442be2a2e38fb0d4052f
> add support for Monochrome color space (X265_CSP_I400)
> 
> This patch was extracted from changes made by Fabrice Bellard for BPG

Note that Fabrice has added monochrome to an x265 fork that is kept
under the BPG source, and this patch back-ports those changes to the
mainline tree under the x265 dual license.

BPG is only using I and P frames and so we will need to do some
validation to determine which features are compatible with 4:0:0 and
then disable/prevent/or fix those features which are not compatible.
We'll also need to add some monochrome clips to the automated tests.

BPG uses monochrome for both greyscale images and for the optional
fourth (alpha) channel. http://bellard.org/bpg/

> diff -r 57f027dfb308 -r 9eaf43c0d001 source/common/deblock.cpp
> --- a/source/common/deblock.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/deblock.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -108,7 +108,7 @@
>      for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
>      {
>          edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
> -        if (!((e0 + e) & chromaMask))
> +        if (cu->m_chromaFormat != X265_CSP_I400 && !((e0 + e) & chromaMask))
>              edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
>      }
>  }
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/common/frame.cpp
> --- a/source/common/frame.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/frame.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -73,14 +73,20 @@
>           * end of the picture accessing uninitialized pixels */
>          int maxHeight = sps.numCuInHeight * g_maxCUSize;
>          memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) * m_reconPic->m_stride * maxHeight);
> -        memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> -        memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> +        if (m_reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> +            memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> +        }
>  
>          /* use pre-calculated cu/pu offsets cached in the SPS structure */
> -        m_reconPic->m_cuOffsetC = sps.cuOffsetC;
>          m_reconPic->m_cuOffsetY = sps.cuOffsetY;
> -        m_reconPic->m_buOffsetC = sps.buOffsetC;
>          m_reconPic->m_buOffsetY = sps.buOffsetY;
> +        if (m_reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            m_reconPic->m_cuOffsetC = sps.cuOffsetC;
> +            m_reconPic->m_buOffsetC = sps.buOffsetC;
> +        }
>      }
>      return ok;
>  }
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/common/param.cpp
> --- a/source/common/param.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/param.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -1069,7 +1069,7 @@
>  
>      CHECK(param->sourceWidth < (int)param->maxCUSize || param->sourceHeight < (int)param->maxCUSize,
>            "Picture size must be at least one CTU");
> -    CHECK(param->internalCsp < X265_CSP_I420 || X265_CSP_I444 < param->internalCsp,
> +    CHECK(param->internalCsp < X265_CSP_I400 || X265_CSP_I444 < param->internalCsp,
>            "Color space must be i420, i422, or i444");
>      CHECK(param->sourceWidth & !!CHROMA_H_SHIFT(param->internalCsp),
>            "Picture width must be an integer multiple of the specified chroma subsampling");
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/common/picyuv.cpp
> --- a/source/common/picyuv.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/picyuv.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -69,12 +69,16 @@
>      int maxHeight = numCuInHeight * g_maxCUSize;
>  
>      CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
> -    CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> -    CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> +    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * m_stride  + m_lumaMarginX;
>  
> -    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * m_stride  + m_lumaMarginX;
> -    m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC + m_chromaMarginX;
> -    m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC + m_chromaMarginX;
> +    if (m_picCsp != X265_CSP_I400)
> +    {
> +        CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> +        CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
> +        
> +        m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC + m_chromaMarginX;
> +        m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC + m_chromaMarginX;
> +    }
>  
>      return true;
>  
> @@ -89,24 +93,32 @@
>  {
>      uint32_t numPartitions = 1 << (g_unitSizeDepth * 2);
>      CHECKED_MALLOC(m_cuOffsetY, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
> -    CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
> +    if (m_picCsp != X265_CSP_I400)
> +    {
> +        CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
> +    }
>      for (uint32_t cuRow = 0; cuRow < sps.numCuInHeight; cuRow++)
>      {
>          for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
>          {
>              m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride * cuRow * g_maxCUSize + cuCol * g_maxCUSize;
> -            m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >> m_hChromaShift);
> +            if (m_picCsp != X265_CSP_I400)
> +                m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >> m_hChromaShift);
>          }
>      }
>  
>      CHECKED_MALLOC(m_buOffsetY, intptr_t, (size_t)numPartitions);
> -    CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
> +    if (m_picCsp != X265_CSP_I400)
> +    {
> +        CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
> +    }
>      for (uint32_t idx = 0; idx < numPartitions; ++idx)
>      {
>          intptr_t x = g_zscanToPelX[idx];
>          intptr_t y = g_zscanToPelY[idx];
>          m_buOffsetY[idx] = m_stride * y + x;
> -        m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >> m_hChromaShift);
> +        if (m_picCsp != X265_CSP_I400)
> +            m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >> m_hChromaShift);
>      }
>  
>      return true;
> @@ -167,8 +179,11 @@
>              int shift = (X265_DEPTH - 8);
>  
>              primitives.planecopy_cp(yChar, pic.stride[0] / sizeof(*yChar), yPixel, m_stride, width, height, shift);
> -            primitives.planecopy_cp(uChar, pic.stride[1] / sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
> -            primitives.planecopy_cp(vChar, pic.stride[2] / sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
> +            if (m_picCsp != X265_CSP_I400)
> +            {
> +                primitives.planecopy_cp(uChar, pic.stride[1] / sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
> +                primitives.planecopy_cp(vChar, pic.stride[2] / sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
> +            }
>          }
>  #else /* Case for (X265_DEPTH == 8) */
>          // TODO: Does we need this path? may merge into above in future
> @@ -189,15 +204,18 @@
>                  yChar += pic.stride[0] / sizeof(*yChar);
>              }
>  
> -            for (int r = 0; r < height >> m_vChromaShift; r++)
> +            if (m_picCsp != X265_CSP_I400)
>              {
> -                memcpy(uPixel, uChar, (width >> m_hChromaShift) * sizeof(pixel));
> -                memcpy(vPixel, vChar, (width >> m_hChromaShift) * sizeof(pixel));
> +                for (int r = 0; r < height >> m_vChromaShift; r++)
> +                {
> +                    memcpy(uPixel, uChar, (width >> m_hChromaShift) * sizeof(pixel));
> +                    memcpy(vPixel, vChar, (width >> m_hChromaShift) * sizeof(pixel));
>  
> -                uPixel += m_strideC;
> -                vPixel += m_strideC;
> -                uChar += pic.stride[1] / sizeof(*uChar);
> -                vChar += pic.stride[2] / sizeof(*vChar);
> +                    uPixel += m_strideC;
> +                    vPixel += m_strideC;
> +                    uChar += pic.stride[1] / sizeof(*uChar);
> +                    vChar += pic.stride[2] / sizeof(*vChar);
> +                }
>              }
>          }
>  #endif /* (X265_DEPTH > 8) */
> @@ -219,15 +237,21 @@
>          {
>              /* shift right and mask pixels to final size */
>              primitives.planecopy_sp(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
> -            primitives.planecopy_sp(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> -            primitives.planecopy_sp(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> +            if (m_picCsp != X265_CSP_I400)
> +            {
> +                primitives.planecopy_sp(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> +                primitives.planecopy_sp(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> +            }
>          }
>          else /* Case for (pic.bitDepth <= X265_DEPTH) */
>          {
>              /* shift left and mask pixels to final size */
>              primitives.planecopy_sp_shl(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
> -            primitives.planecopy_sp_shl(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> -            primitives.planecopy_sp_shl(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> +            if (m_picCsp != X265_CSP_I400)
> +            {
> +                primitives.planecopy_sp_shl(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> +                primitives.planecopy_sp_shl(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
> +            }
>          }
>      }
>  
> @@ -249,30 +273,36 @@
>          Y += m_stride;
>      }
>  
> -    for (int r = 0; r < height >> m_vChromaShift; r++)
> +    if (m_picCsp != X265_CSP_I400)
>      {
> -        for (int x = 0; x < padx >> m_hChromaShift; x++)
> +        for (int r = 0; r < height >> m_vChromaShift; r++)
>          {
> -            U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
> -            V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
> +            for (int x = 0; x < padx >> m_hChromaShift; x++)
> +            {
> +                U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
> +                V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
> +            }
> +
> +            U += m_strideC;
> +            V += m_strideC;
>          }
> -
> -        U += m_strideC;
> -        V += m_strideC;
>      }
>  
>      /* extend the bottom if height was not multiple of the minimum CU size */
>      Y = m_picOrg[0] + (height - 1) * m_stride;
> -    U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
> -    V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
>  
>      for (int i = 1; i <= pady; i++)
>          memcpy(Y + i * m_stride, Y, (width + padx) * sizeof(pixel));
>  
> -    for (int j = 1; j <= pady >> m_vChromaShift; j++)
> +    if (m_picCsp != X265_CSP_I400)
>      {
> -        memcpy(U + j * m_strideC, U, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
> -        memcpy(V + j * m_strideC, V, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
> +        U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
> +        V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
> +        for (int j = 1; j <= pady >> m_vChromaShift; j++)
> +        {
> +            memcpy(U + j * m_strideC, U, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
> +            memcpy(V + j * m_strideC, V, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
> +        }
>      }
>  }
>  
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/common/predict.cpp
> --- a/source/common/predict.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/predict.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -84,6 +84,14 @@
>      int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx];
>      int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx];
>  
> +    /* XXX: disable chroma at a higher level ? */
> +    if (cu.m_chromaFormat == X265_CSP_I400)
> +    {
> +        bChroma = false;
> +        if (!bLuma)
> +            return;
> +    }
> +    
>      if (cu.m_slice->isInterP())
>      {
>          /* P Slice */
> @@ -98,7 +106,8 @@
>  
>          if (cu.m_slice->m_pps->bUseWeightPred && wp0->bPresentFlag)
>          {
> -            for (int plane = 0; plane < 3; plane++)
> +            int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
> +            for (int plane = 0; plane < numPlanes; plane++)
>              {
>                  wv0[plane].w      = wp0[plane].inputWeight;
>                  wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
> @@ -135,13 +144,14 @@
>  
>          if (cu.m_slice->m_pps->bUseWeightedBiPred)
>          {
> +            int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
> +            
>              pwp0 = refIdx0 >= 0 ? cu.m_slice->m_weightPredTable[0][refIdx0] : NULL;
>              pwp1 = refIdx1 >= 0 ? cu.m_slice->m_weightPredTable[1][refIdx1] : NULL;
> -
>              if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
>              {
>                  /* biprediction weighting */
> -                for (int plane = 0; plane < 3; plane++)
> +                for (int plane = 0; plane < numPlanes; plane++)
>                  {
>                      wv0[plane].w = pwp0[plane].inputWeight;
>                      wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
> @@ -158,7 +168,7 @@
>              {
>                  /* uniprediction weighting, always outputs to wv0 */
>                  const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1;
> -                for (int plane = 0; plane < 3; plane++)
> +                for (int plane = 0; plane < numPlanes; plane++)
>                  {
>                      wv0[plane].w = pwp[plane].inputWeight;
>                      wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8));
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/common/shortyuv.cpp
> --- a/source/common/shortyuv.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/shortyuv.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -40,19 +40,26 @@
>  bool ShortYuv::create(uint32_t size, int csp)
>  {
>      m_csp = csp;
> -    m_hChromaShift = CHROMA_H_SHIFT(csp);
> -    m_vChromaShift = CHROMA_V_SHIFT(csp);
> -
>      m_size = size;
> -    m_csize = size >> m_hChromaShift;
>  
>      size_t sizeL = size * size;
> -    size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> -    X265_CHECK((sizeC & 15) == 0, "invalid size");
> -
> -    CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> -    m_buf[1] = m_buf[0] + sizeL;
> -    m_buf[2] = m_buf[0] + sizeL + sizeC;
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        m_hChromaShift = CHROMA_H_SHIFT(csp);
> +        m_vChromaShift = CHROMA_V_SHIFT(csp);
> +        m_csize = size >> m_hChromaShift;
> +        size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> +        X265_CHECK((sizeC & 15) == 0, "invalid size");
> +        CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
> +        m_buf[1] = m_buf[0] + sizeL;
> +        m_buf[2] = m_buf[0] + sizeL + sizeC;
> +    }
> +    else
> +    {
> +        m_csize = 0;
> +        CHECKED_MALLOC(m_buf[0], int16_t, sizeL);
> +    }
> +    
>      return true;
>  
>  fail:
> @@ -67,16 +74,22 @@
>  void ShortYuv::clear()
>  {
>      memset(m_buf[0], 0, (m_size  * m_size) *  sizeof(int16_t));
> -    memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
> -    memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
> +        memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
> +    }
>  }
>  
>  void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size)
>  {
>      const int sizeIdx = log2Size - 2;
>      primitives.cu[sizeIdx].sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> -    primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> -    primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> +        primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> +    }
>  }
>  
>  void ShortYuv::copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/common/yuv.cpp
> --- a/source/common/yuv.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/yuv.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -83,10 +83,13 @@
>      pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
>      primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size);
>  
> -    pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> -    pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> +        pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
> +    }
>  }
>  
>  void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
> @@ -94,10 +97,13 @@
>      const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
>      primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride);
>  
> -    const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> -    const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> +        const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
> +    }
>  }
>  
>  void Yuv::copyFromYuv(const Yuv& srcYuv)
> @@ -105,8 +111,11 @@
>      X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
>  
>      primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
> +    }
>  }
>  
>  /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
> @@ -131,10 +140,13 @@
>      pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
>      primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
>  
> -    pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> -    pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
> -    primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> +        pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
> +        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
> +    }
>  }
>  
>  void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
> @@ -143,19 +155,25 @@
>      pixel* dstY = dstYuv.m_buf[0];
>      primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size);
>  
> -    pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> -    pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
> -    pixel* dstU = dstYuv.m_buf[1];
> -    pixel* dstV = dstYuv.m_buf[2];
> -    primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
> -    primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> +        pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
> +        pixel* dstU = dstYuv.m_buf[1];
> +        pixel* dstV = dstYuv.m_buf[2];
> +        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
> +        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
> +    }
>  }
>  
>  void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
>  {
>      primitives.cu[log2SizeL - 2].add_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> -    primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> -    primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> +        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> +    }
>  }
>  
>  void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/encoder/entropy.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -429,7 +429,8 @@
>      if (slice.m_sps->bUseSAO)
>      {
>          WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
> -        WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
> +        if (slice.m_sps->chromaFormatIdc != X265_CSP_I400)
> +            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
>      }
>  
>      // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
> @@ -722,19 +723,23 @@
>      uint32_t hChromaShift = cu.m_hChromaShift;
>      uint32_t vChromaShift = cu.m_vChromaShift;
>      bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
> -    if (!curDepth || !bSmallChroma)
> +    
> +    if (cu.m_chromaFormat != X265_CSP_I400)
>      {
> -        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1))
> -            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
> -        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1))
> -            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
> +        if (!curDepth || !bSmallChroma)
> +        {
> +            if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1))
> +                codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
> +            if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1))
> +                codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
> +        }
> +        else
> +        {
> +            X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size match failure\n");
> +            X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size match failure\n");
> +        }
>      }
> -    else
> -    {
> -        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size match failure\n");
> -        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size match failure\n");
> -    }
> -
> +    
>      if (subdiv)
>      {
>          --log2CurSize;
> @@ -781,6 +786,9 @@
>              return;
>      }
>  
> +    if (cu.m_chromaFormat == X265_CSP_I400)
> +        return;
> +
>      if (bSmallChroma)
>      {
>          if ((absPartIdx & 3) != 3)
> @@ -1010,7 +1018,7 @@
>  void Entropy::codePredWeightTable(const Slice& slice)
>  {
>      const WeightParam *wp;
> -    bool            bChroma      = true; // 4:0:0 not yet supported
> +    bool            bChroma      = (slice.m_sps->chromaFormatIdc != X265_CSP_I400);
>      bool            bDenomCoded  = false;
>      int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
>      uint32_t        totalSignalledWeightFlags = 0;
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/encoder/framefilter.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -167,25 +167,31 @@
>  
>      // Border extend Left and Right
>      primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
> -    primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
> -    primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
> +    if (reconPic->m_picCsp != X265_CSP_I400)
> +    {
> +        primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
> +        primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
> +    }
>  
>      // Border extend Top
>      if (!row)
>      {
>          const intptr_t stride = reconPic->m_stride;
> -        const intptr_t strideC = reconPic->m_strideC;
>          pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX;
> -        pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
> -        pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
>  
>          for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
>              memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
>  
> -        for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +        if (reconPic->m_picCsp != X265_CSP_I400)
>          {
> -            memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
> -            memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
> +            const intptr_t strideC = reconPic->m_strideC;
> +            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
> +            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
> +            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +            {
> +                memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
> +                memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
> +            }
>          }
>      }
>  
> @@ -193,17 +199,20 @@
>      if (row == m_numRows - 1)
>      {
>          const intptr_t stride = reconPic->m_stride;
> -        const intptr_t strideC = reconPic->m_strideC;
>          pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride;
> -        pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> -        pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
>          for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
>              memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
>  
> -        for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +        if (reconPic->m_picCsp != X265_CSP_I400)
>          {
> -            memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
> -            memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
> +            const intptr_t strideC = reconPic->m_strideC;
> +            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> +            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
> +            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
> +            {
> +                memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
> +                memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
> +            }
>          }
>      }
>  
> @@ -220,16 +229,19 @@
>          uint32_t height = getCUHeight(row);
>  
>          uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height);
> -        height >>= m_vChromaShift;
> -        width  >>= m_hChromaShift;
> -        stride = reconPic->m_strideC;
> +        m_frameEncoder->m_SSDY += ssdY;
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            height >>= m_vChromaShift;
> +            width  >>= m_hChromaShift;
> +            stride = reconPic->m_strideC;
> +            
> +            uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height);
> +            uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height);
>  
> -        uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height);
> -        uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height);
> -
> -        m_frameEncoder->m_SSDY += ssdY;
> -        m_frameEncoder->m_SSDU += ssdU;
> -        m_frameEncoder->m_SSDV += ssdV;
> +            m_frameEncoder->m_SSDU += ssdU;
> +            m_frameEncoder->m_SSDV += ssdV;
> +        }
>      }
>      if (m_param->bEnableSsim && m_ssimBuf)
>      {
> @@ -264,12 +276,15 @@
>          }
>  
>          updateMD5Plane(m_frameEncoder->m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride);
> -        width  >>= m_hChromaShift;
> -        height >>= m_vChromaShift;
> -        stride = reconPic->m_strideC;
> -
> -        updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
> -        updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            width  >>= m_hChromaShift;
> +            height >>= m_vChromaShift;
> +            stride = reconPic->m_strideC;
> +            
> +            updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
> +            updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
> +        }
>      }
>      else if (m_param->decodedPictureHashSEI == 2)
>      {
> @@ -279,12 +294,15 @@
>          if (!row)
>              m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] = m_frameEncoder->m_crc[2] = 0xffff;
>          updateCRC(reconPic->getLumaAddr(cuAddr), m_frameEncoder->m_crc[0], height, width, stride);
> -        width  >>= m_hChromaShift;
> -        height >>= m_vChromaShift;
> -        stride = reconPic->m_strideC;
> -
> -        updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride);
> -        updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride);
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            width  >>= m_hChromaShift;
> +            height >>= m_vChromaShift;
> +            stride = reconPic->m_strideC;
> +            
> +            updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride);
> +            updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride);
> +        }
>      }
>      else if (m_param->decodedPictureHashSEI == 3)
>      {
> @@ -295,13 +313,16 @@
>          if (!row)
>              m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1] = m_frameEncoder->m_checksum[2] = 0;
>          updateChecksum(reconPic->m_picOrg[0], m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight);
> -        width  >>= m_hChromaShift;
> -        height >>= m_vChromaShift;
> -        stride = reconPic->m_strideC;
> -        cuHeight >>= m_vChromaShift;
> -
> -        updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
> -        updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
> +        if (reconPic->m_picCsp != X265_CSP_I400)
> +        {
> +            width  >>= m_hChromaShift;
> +            height >>= m_vChromaShift;
> +            stride = reconPic->m_strideC;
> +            cuHeight >>= m_vChromaShift;
> +            
> +            updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
> +            updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
> +        }
>      }
>  
>      if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 * (int)m_frameEncoder->m_numRows)
> @@ -415,15 +436,18 @@
>  
>      primitives.cu[size].copy_pp(dst, reconPic->m_stride, src, fencPic->m_stride);
>     
> -    pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
> -    pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
> +    int csp = fencPic->m_picCsp;
> +    if (csp != X265_CSP_I400)
> +    {
> +        pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
> +        pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
>  
> -    pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
> -    pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
> +        pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
> +        pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
>  
> -    int csp = fencPic->m_picCsp;
> -    primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
> -    primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
> +        primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
> +        primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
> +    }
>  }
>  
>  /* Original YUV restoration for CU in lossless coding */
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/encoder/sao.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -106,9 +106,15 @@
>  bool SAO::create(x265_param* param)
>  {
>      m_param = param;
> -    m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
> -    m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
> -
> +    if (param->internalCsp != X265_CSP_I400)
> +    {
> +        m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
> +        m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
> +        m_numPlanes = 3;
> +    }
> +    else
> +        m_numPlanes = 1;
> +  
>      m_numCuInWidth =  (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
>      m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
>  
> @@ -224,7 +230,7 @@
>      }
>  
>      saoParam->bSaoFlag[0] = true;
> -    saoParam->bSaoFlag[1] = true;
> +    saoParam->bSaoFlag[1] = (m_numPlanes > 1);
>  
>      m_numNoSao[0] = 0; // Luma
>      m_numNoSao[1] = 0; // Chroma
> @@ -1132,7 +1138,7 @@
>              m_entropyCoder.codeSaoMerge(0);
>          m_entropyCoder.store(m_rdContexts.temp);
>          // reset stats Y, Cb, Cr
> -        for (int plane = 0; plane < 3; plane++)
> +        for (int plane = 0; plane < m_numPlanes; plane++)
>          {
>              for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
>              {
> @@ -1161,7 +1167,8 @@
>  
>          saoComponentParamDist(saoParam, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist);
>  
> -        sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
> +        if (m_numPlanes > 1)
> +            sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
>  
>          if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
>          {
> @@ -1172,7 +1179,7 @@
>                  m_entropyCoder.codeSaoMerge(0);
>              if (allowMerge[1])
>                  m_entropyCoder.codeSaoMerge(0);
> -            for (int plane = 0; plane < 3; plane++)
> +            for (int plane = 0; plane < m_numPlanes; plane++)
>              {
>                  if (saoParam->bSaoFlag[plane > 0])
>                      m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
> @@ -1202,7 +1209,7 @@
>                      SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
>                      bestCost = mergeCost;
>                      m_entropyCoder.store(m_rdContexts.temp);
> -                    for (int plane = 0; plane < 3; plane++)
> +                    for (int plane = 0; plane < m_numPlanes; plane++)
>                      {
>                          mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
>                          if (saoParam->bSaoFlag[plane > 0])
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/encoder/sao.h
> --- a/source/encoder/sao.h	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/encoder/sao.h	Mon Sep 21 11:40:18 2015 -0500
> @@ -85,6 +85,7 @@
>  
>      int         m_numCuInWidth;
>      int         m_numCuInHeight;
> +    int         m_numPlanes;
>      int         m_hChromaShift;
>      int         m_vChromaShift;
>  
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/encoder/search.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -1168,7 +1168,8 @@
>  
>      intraMode.initCosts();
>      intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
> -    intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
> +    if (m_csp != X265_CSP_I400)
> +        intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
>      intraMode.distortion += intraMode.lumaDistortion + intraMode.chromaDistortion;
>  
>      m_entropyCoder.resetBits();
> @@ -2496,9 +2497,14 @@
>      // Luma
>      int part = partitionFromLog2Size(cu.m_log2CUSize[0]);
>      interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
> -    // Chroma
> -    interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> -    interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        // Chroma
> +        interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> +        interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    }
> +    else
> +        interMode.chromaDistortion = 0;
>      interMode.distortion = interMode.lumaDistortion + interMode.chromaDistortion;
>  
>      m_entropyCoder.load(m_rqt[depth].cur);
> @@ -2550,9 +2556,12 @@
>      if (!tqBypass)
>      {
>          sse_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> -        cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> -        cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
> -
> +        if (m_csp != X265_CSP_I400)
> +        {
> +            cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> +            cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
> +        }
> +        
>          /* Consider the RD cost of not signaling any residual */
>          m_entropyCoder.load(m_rqt[depth].cur);
>          m_entropyCoder.resetBits();
> @@ -2621,8 +2630,14 @@
>  
>      // update with clipped distortion and cost (qp estimation loop uses unclipped values)
>      sse_ret_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
> -    sse_ret_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> -    bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    sse_ret_t bestChromaDist;
> +    if (m_csp != X265_CSP_I400)
> +    {
> +        bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
> +        bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
> +    }
> +    else
> +        bestChromaDist = 0;
>      if (m_rdCost.m_psyRd)
>          interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
>      interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> @@ -2795,15 +2810,22 @@
>      X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
>  
>      uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> -    bool bCodeChroma = true;
> +    bool bCodeChroma;
>      uint32_t tuDepthC = tuDepth;
> -    if (log2TrSizeC < 2)
> +    
> +    if (m_csp != X265_CSP_I400)
>      {
> -        X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
> -        log2TrSizeC = 2;
> -        tuDepthC--;
> -        bCodeChroma = !(absPartIdx & 3);
> +        bCodeChroma = true;
> +        if (log2TrSizeC < 2)
> +        {
> +            X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
> +            log2TrSizeC = 2;
> +            tuDepthC--;
> +            bCodeChroma = !(absPartIdx & 3);
> +        }
>      }
> +    else
> +        bCodeChroma = false;
>  
>      // code full block
>      Cost fullCost;
> @@ -3380,15 +3402,22 @@
>      const uint32_t qtLayer = log2TrSize - 2;
>  
>      uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> -    bool bCodeChroma = true;
> +    bool bCodeChroma;
>      uint32_t tuDepthC = tuDepth;
> -    if (log2TrSizeC < 2)
> +
> +    if (m_csp != X265_CSP_I400)
>      {
> -        X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
> -        log2TrSizeC = 2;
> -        tuDepthC--;
> -        bCodeChroma = !(absPartIdx & 3);
> +        bCodeChroma = true;
> +        if (log2TrSizeC < 2)
> +        {
> +            X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
> +            log2TrSizeC = 2;
> +            tuDepthC--;
> +            bCodeChroma = !(absPartIdx & 3);
> +        }
>      }
> +    else
> +        bCodeChroma = false;
>  
>      m_rqt[qtLayer].resiQtYuv.copyPartToPartLuma(resiYuv, absPartIdx, log2TrSize);
>  
> diff -r 57f027dfb308 -r 9eaf43c0d001 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/encoder/slicetype.cpp	Mon Sep 21 11:40:18 2015 -0500
> @@ -74,17 +74,18 @@
>  uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp)
>  {
>      intptr_t stride = curFrame->m_fencPic->m_stride;
> -    intptr_t cStride = curFrame->m_fencPic->m_strideC;
>      intptr_t blockOffsetLuma = blockX + (blockY * stride);
> -    int hShift = CHROMA_H_SHIFT(csp);
> -    int vShift = CHROMA_V_SHIFT(csp);
> -    intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >> vShift) * cStride);
>  
> -    uint32_t var;
> -
> -    var  = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
> -    var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp);
> -    var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp);
> +    uint32_t var = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
> +    if (csp != X265_CSP_I400)
> +    {
> +        intptr_t cStride = curFrame->m_fencPic->m_strideC;
> +        int hShift = CHROMA_H_SHIFT(csp);
> +        int vShift = CHROMA_V_SHIFT(csp);
> +        intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >> vShift) * cStride);
> +        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp);
> +        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp);
> +    }
>      x265_emms();
>      return var;
>  }

-- 
Steve Borho



More information about the x265-devel mailing list