[x265] [PATCH REBASE] add support for Monochrome color space (X265_CSP_I400)

Steve Borho steve at borho.org
Wed Oct 7 18:50:12 CEST 2015


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1442853618 18000
#      Mon Sep 21 11:40:18 2015 -0500
# Node ID 5602b4bc1fec175e7c5bf14ef18978a50e3bc07f
# Parent  f8b8ebdc54578e6735216d8b9abce5ba80c05bd8
add support for Monochrome color space (X265_CSP_I400)

This patch was extracted from changes made by Fabrice Bellard for BPG

diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/deblock.cpp
--- a/source/common/deblock.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/common/deblock.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -109,7 +109,7 @@
     for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
     {
         edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
-        if (!((e0 + e) & chromaMask))
+        if (cu->m_chromaFormat != X265_CSP_I400 && !((e0 + e) & chromaMask))
             edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
     }
 }
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/frame.cpp
--- a/source/common/frame.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/common/frame.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -73,14 +73,20 @@
          * end of the picture accessing uninitialized pixels */
         int maxHeight = sps.numCuInHeight * g_maxCUSize;
         memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) * m_reconPic->m_stride * maxHeight);
-        memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
-        memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
+        if (m_reconPic->m_picCsp != X265_CSP_I400)
+        {
+            memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
+            memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
+        }
 
         /* use pre-calculated cu/pu offsets cached in the SPS structure */
-        m_reconPic->m_cuOffsetC = sps.cuOffsetC;
         m_reconPic->m_cuOffsetY = sps.cuOffsetY;
-        m_reconPic->m_buOffsetC = sps.buOffsetC;
         m_reconPic->m_buOffsetY = sps.buOffsetY;
+        if (m_reconPic->m_picCsp != X265_CSP_I400)
+        {
+            m_reconPic->m_cuOffsetC = sps.cuOffsetC;
+            m_reconPic->m_buOffsetC = sps.buOffsetC;
+        }
     }
     return ok;
 }
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/param.cpp
--- a/source/common/param.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/common/param.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -1069,7 +1069,7 @@
 
     CHECK(param->sourceWidth < (int)param->maxCUSize || param->sourceHeight < (int)param->maxCUSize,
           "Picture size must be at least one CTU");
-    CHECK(param->internalCsp < X265_CSP_I420 || X265_CSP_I444 < param->internalCsp,
+    CHECK(param->internalCsp < X265_CSP_I400 || X265_CSP_I444 < param->internalCsp,
           "Color space must be i420, i422, or i444");
     CHECK(param->sourceWidth & !!CHROMA_H_SHIFT(param->internalCsp),
           "Picture width must be an integer multiple of the specified chroma subsampling");
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/picyuv.cpp
--- a/source/common/picyuv.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/common/picyuv.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -70,12 +70,16 @@
     int maxHeight = numCuInHeight * g_maxCUSize;
 
     CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
-    CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
-    CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
+    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * m_stride  + m_lumaMarginX;
 
-    m_picOrg[0] = m_picBuf[0] + m_lumaMarginY   * m_stride  + m_lumaMarginX;
-    m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC + m_chromaMarginX;
-    m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC + m_chromaMarginX;
+    if (m_picCsp != X265_CSP_I400)
+    {
+        CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
+        CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
+        
+        m_picOrg[1] = m_picBuf[1] + m_chromaMarginY * m_strideC + m_chromaMarginX;
+        m_picOrg[2] = m_picBuf[2] + m_chromaMarginY * m_strideC + m_chromaMarginX;
+    }
 
     return true;
 
@@ -90,24 +94,32 @@
 {
     uint32_t numPartitions = 1 << (g_unitSizeDepth * 2);
     CHECKED_MALLOC(m_cuOffsetY, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
-    CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
+    if (m_picCsp != X265_CSP_I400)
+    {
+        CHECKED_MALLOC(m_cuOffsetC, intptr_t, sps.numCuInWidth * sps.numCuInHeight);
+    }
     for (uint32_t cuRow = 0; cuRow < sps.numCuInHeight; cuRow++)
     {
         for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
         {
             m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride * cuRow * g_maxCUSize + cuCol * g_maxCUSize;
-            m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >> m_hChromaShift);
+            if (m_picCsp != X265_CSP_I400)
+                m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >> m_hChromaShift);
         }
     }
 
     CHECKED_MALLOC(m_buOffsetY, intptr_t, (size_t)numPartitions);
-    CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
+    if (m_picCsp != X265_CSP_I400)
+    {
+        CHECKED_MALLOC(m_buOffsetC, intptr_t, (size_t)numPartitions);
+    }
     for (uint32_t idx = 0; idx < numPartitions; ++idx)
     {
         intptr_t x = g_zscanToPelX[idx];
         intptr_t y = g_zscanToPelY[idx];
         m_buOffsetY[idx] = m_stride * y + x;
-        m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >> m_hChromaShift);
+        if (m_picCsp != X265_CSP_I400)
+            m_buOffsetC[idx] = m_strideC * (y >> m_vChromaShift) + (x >> m_hChromaShift);
     }
 
     return true;
@@ -168,8 +180,11 @@
             int shift = (X265_DEPTH - 8);
 
             primitives.planecopy_cp(yChar, pic.stride[0] / sizeof(*yChar), yPixel, m_stride, width, height, shift);
-            primitives.planecopy_cp(uChar, pic.stride[1] / sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
-            primitives.planecopy_cp(vChar, pic.stride[2] / sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
+            if (m_picCsp != X265_CSP_I400)
+            {
+                primitives.planecopy_cp(uChar, pic.stride[1] / sizeof(*uChar), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
+                primitives.planecopy_cp(vChar, pic.stride[2] / sizeof(*vChar), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift);
+            }
         }
 #else /* Case for (X265_DEPTH == 8) */
         // TODO: Does we need this path? may merge into above in future
@@ -190,15 +205,18 @@
                 yChar += pic.stride[0] / sizeof(*yChar);
             }
 
-            for (int r = 0; r < height >> m_vChromaShift; r++)
+            if (m_picCsp != X265_CSP_I400)
             {
-                memcpy(uPixel, uChar, (width >> m_hChromaShift) * sizeof(pixel));
-                memcpy(vPixel, vChar, (width >> m_hChromaShift) * sizeof(pixel));
+                for (int r = 0; r < height >> m_vChromaShift; r++)
+                {
+                    memcpy(uPixel, uChar, (width >> m_hChromaShift) * sizeof(pixel));
+                    memcpy(vPixel, vChar, (width >> m_hChromaShift) * sizeof(pixel));
 
-                uPixel += m_strideC;
-                vPixel += m_strideC;
-                uChar += pic.stride[1] / sizeof(*uChar);
-                vChar += pic.stride[2] / sizeof(*vChar);
+                    uPixel += m_strideC;
+                    vPixel += m_strideC;
+                    uChar += pic.stride[1] / sizeof(*uChar);
+                    vChar += pic.stride[2] / sizeof(*vChar);
+                }
             }
         }
 #endif /* (X265_DEPTH > 8) */
@@ -220,15 +238,21 @@
         {
             /* shift right and mask pixels to final size */
             primitives.planecopy_sp(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
-            primitives.planecopy_sp(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
-            primitives.planecopy_sp(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+            if (m_picCsp != X265_CSP_I400)
+            {
+                primitives.planecopy_sp(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+                primitives.planecopy_sp(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+            }
         }
         else /* Case for (pic.bitDepth <= X265_DEPTH) */
         {
             /* shift left and mask pixels to final size */
             primitives.planecopy_sp_shl(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
-            primitives.planecopy_sp_shl(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
-            primitives.planecopy_sp_shl(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+            if (m_picCsp != X265_CSP_I400)
+            {
+                primitives.planecopy_sp_shl(uShort, pic.stride[1] / sizeof(*uShort), uPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+                primitives.planecopy_sp_shl(vShort, pic.stride[2] / sizeof(*vShort), vPixel, m_strideC, width >> m_hChromaShift, height >> m_vChromaShift, shift, mask);
+            }
         }
     }
 
@@ -250,30 +274,36 @@
         Y += m_stride;
     }
 
-    for (int r = 0; r < height >> m_vChromaShift; r++)
+    if (m_picCsp != X265_CSP_I400)
     {
-        for (int x = 0; x < padx >> m_hChromaShift; x++)
+        for (int r = 0; r < height >> m_vChromaShift; r++)
         {
-            U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
-            V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
+            for (int x = 0; x < padx >> m_hChromaShift; x++)
+            {
+                U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
+                V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
+            }
+
+            U += m_strideC;
+            V += m_strideC;
         }
-
-        U += m_strideC;
-        V += m_strideC;
     }
 
     /* extend the bottom if height was not multiple of the minimum CU size */
     Y = m_picOrg[0] + (height - 1) * m_stride;
-    U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
-    V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
 
     for (int i = 1; i <= pady; i++)
         memcpy(Y + i * m_stride, Y, (width + padx) * sizeof(pixel));
 
-    for (int j = 1; j <= pady >> m_vChromaShift; j++)
+    if (m_picCsp != X265_CSP_I400)
     {
-        memcpy(U + j * m_strideC, U, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
-        memcpy(V + j * m_strideC, V, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
+        U = m_picOrg[1] + ((height >> m_vChromaShift) - 1) * m_strideC;
+        V = m_picOrg[2] + ((height >> m_vChromaShift) - 1) * m_strideC;
+        for (int j = 1; j <= pady >> m_vChromaShift; j++)
+        {
+            memcpy(U + j * m_strideC, U, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
+            memcpy(V + j * m_strideC, V, ((width + padx) >> m_hChromaShift) * sizeof(pixel));
+        }
     }
 }
 
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/predict.cpp
--- a/source/common/predict.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/common/predict.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -85,6 +85,14 @@
     int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx];
     int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx];
 
+    /* XXX: disable chroma at a higher level ? */
+    if (cu.m_chromaFormat == X265_CSP_I400)
+    {
+        bChroma = false;
+        if (!bLuma)
+            return;
+    }
+    
     if (cu.m_slice->isInterP())
     {
         /* P Slice */
@@ -99,7 +107,8 @@
 
         if (cu.m_slice->m_pps->bUseWeightPred && wp0->bPresentFlag)
         {
-            for (int plane = 0; plane < 3; plane++)
+            int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
+            for (int plane = 0; plane < numPlanes; plane++)
             {
                 wv0[plane].w      = wp0[plane].inputWeight;
                 wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
@@ -136,13 +145,14 @@
 
         if (cu.m_slice->m_pps->bUseWeightedBiPred)
         {
+            int numPlanes = cu.m_chromaFormat == X265_CSP_I400 ? 1 : 3;
+            
             pwp0 = refIdx0 >= 0 ? cu.m_slice->m_weightPredTable[0][refIdx0] : NULL;
             pwp1 = refIdx1 >= 0 ? cu.m_slice->m_weightPredTable[1][refIdx1] : NULL;
-
             if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
             {
                 /* biprediction weighting */
-                for (int plane = 0; plane < 3; plane++)
+                for (int plane = 0; plane < numPlanes; plane++)
                 {
                     wv0[plane].w = pwp0[plane].inputWeight;
                     wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
@@ -159,7 +169,7 @@
             {
                 /* uniprediction weighting, always outputs to wv0 */
                 const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1;
-                for (int plane = 0; plane < 3; plane++)
+                for (int plane = 0; plane < numPlanes; plane++)
                 {
                     wv0[plane].w = pwp[plane].inputWeight;
                     wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8));
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/common/shortyuv.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -40,19 +40,26 @@
 bool ShortYuv::create(uint32_t size, int csp)
 {
     m_csp = csp;
-    m_hChromaShift = CHROMA_H_SHIFT(csp);
-    m_vChromaShift = CHROMA_V_SHIFT(csp);
-
     m_size = size;
-    m_csize = size >> m_hChromaShift;
 
     size_t sizeL = size * size;
-    size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
-    X265_CHECK((sizeC & 15) == 0, "invalid size");
-
-    CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
-    m_buf[1] = m_buf[0] + sizeL;
-    m_buf[2] = m_buf[0] + sizeL + sizeC;
+    if (m_csp != X265_CSP_I400)
+    {
+        m_hChromaShift = CHROMA_H_SHIFT(csp);
+        m_vChromaShift = CHROMA_V_SHIFT(csp);
+        m_csize = size >> m_hChromaShift;
+        size_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
+        X265_CHECK((sizeC & 15) == 0, "invalid size");
+        CHECKED_MALLOC(m_buf[0], int16_t, sizeL + sizeC * 2);
+        m_buf[1] = m_buf[0] + sizeL;
+        m_buf[2] = m_buf[0] + sizeL + sizeC;
+    }
+    else
+    {
+        m_csize = 0;
+        CHECKED_MALLOC(m_buf[0], int16_t, sizeL);
+    }
+    
     return true;
 
 fail:
@@ -67,16 +74,22 @@
 void ShortYuv::clear()
 {
     memset(m_buf[0], 0, (m_size  * m_size) *  sizeof(int16_t));
-    memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
-    memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
+    if (m_csp != X265_CSP_I400)
+    {
+        memset(m_buf[1], 0, (m_csize * m_csize) * sizeof(int16_t));
+        memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
+    }
 }
 
 void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size)
 {
     const int sizeIdx = log2Size - 2;
     primitives.cu[sizeIdx].sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
-    primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
-    primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
+    if (m_csp != X265_CSP_I400)
+    {
+        primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
+        primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
+    }
 }
 
 void ShortYuv::copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/common/yuv.cpp
--- a/source/common/yuv.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/common/yuv.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -84,10 +84,13 @@
     pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
     primitives.cu[m_part].copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size);
 
-    pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
-    pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
+    if (m_csp != X265_CSP_I400)
+    {
+        pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
+        pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
+    }
 }
 
 void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
@@ -95,10 +98,13 @@
     const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
     primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride);
 
-    const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
-    const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
+    if (m_csp != X265_CSP_I400)
+    {
+        const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
+        const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
+    }
 }
 
 void Yuv::copyFromYuv(const Yuv& srcYuv)
@@ -106,8 +112,11 @@
     X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
 
     primitives.cu[m_part].copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
+    if (m_csp != X265_CSP_I400)
+    {
+        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
+    }
 }
 
 /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
@@ -132,10 +141,13 @@
     pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
     primitives.cu[m_part].copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
 
-    pixel* dstU = dstYuv.getCbAddr(absPartIdx);
-    pixel* dstV = dstYuv.getCrAddr(absPartIdx);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
-    primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
+    if (m_csp != X265_CSP_I400)
+    {
+        pixel* dstU = dstYuv.getCbAddr(absPartIdx);
+        pixel* dstV = dstYuv.getCrAddr(absPartIdx);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
+    }
 }
 
 void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
@@ -144,19 +156,25 @@
     pixel* dstY = dstYuv.m_buf[0];
     primitives.cu[dstYuv.m_part].copy_pp(dstY, dstYuv.m_size, srcY, m_size);
 
-    pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
-    pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
-    pixel* dstU = dstYuv.m_buf[1];
-    pixel* dstV = dstYuv.m_buf[2];
-    primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
-    primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
+    if (m_csp != X265_CSP_I400)
+    {
+        pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
+        pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
+        pixel* dstU = dstYuv.m_buf[1];
+        pixel* dstV = dstYuv.m_buf[2];
+        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
+        primitives.chroma[m_csp].cu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
+    }
 }
 
 void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
 {
     primitives.cu[log2SizeL - 2].add_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
-    primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
-    primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
+    if (m_csp != X265_CSP_I400)
+    {
+        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
+        primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
+    }
 }
 
 void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/encoder/entropy.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -430,7 +430,8 @@
     if (slice.m_sps->bUseSAO)
     {
         WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
-        WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
+        if (slice.m_sps->chromaFormatIdc != X265_CSP_I400)
+            WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
     }
 
     // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
@@ -723,19 +724,23 @@
     uint32_t hChromaShift = cu.m_hChromaShift;
     uint32_t vChromaShift = cu.m_vChromaShift;
     bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
-    if (!curDepth || !bSmallChroma)
+    
+    if (cu.m_chromaFormat != X265_CSP_I400)
     {
-        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1))
-            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
-        if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1))
-            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
+        if (!curDepth || !bSmallChroma)
+        {
+            if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1))
+                codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
+            if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1))
+                codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
+        }
+        else
+        {
+            X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size match failure\n");
+            X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size match failure\n");
+        }
     }
-    else
-    {
-        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size match failure\n");
-        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size match failure\n");
-    }
-
+    
     if (subdiv)
     {
         --log2CurSize;
@@ -782,6 +787,9 @@
             return;
     }
 
+    if (cu.m_chromaFormat == X265_CSP_I400)
+        return;
+
     if (bSmallChroma)
     {
         if ((absPartIdx & 3) != 3)
@@ -1011,7 +1019,7 @@
 void Entropy::codePredWeightTable(const Slice& slice)
 {
     const WeightParam *wp;
-    bool            bChroma      = true; // 4:0:0 not yet supported
+    bool            bChroma      = (slice.m_sps->chromaFormatIdc != X265_CSP_I400);
     bool            bDenomCoded  = false;
     int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
     uint32_t        totalSignalledWeightFlags = 0;
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/encoder/framefilter.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -167,25 +167,31 @@
 
     // Border extend Left and Right
     primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
-    primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
-    primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
+    if (reconPic->m_picCsp != X265_CSP_I400)
+    {
+        primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
+        primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
+    }
 
     // Border extend Top
     if (!row)
     {
         const intptr_t stride = reconPic->m_stride;
-        const intptr_t strideC = reconPic->m_strideC;
         pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX;
-        pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
-        pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
 
         for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
             memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
 
-        for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
+        if (reconPic->m_picCsp != X265_CSP_I400)
         {
-            memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
-            memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+            const intptr_t strideC = reconPic->m_strideC;
+            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
+            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
+            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
+            {
+                memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
+                memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+            }
         }
     }
 
@@ -193,17 +199,20 @@
     if (row == m_numRows - 1)
     {
         const intptr_t stride = reconPic->m_stride;
-        const intptr_t strideC = reconPic->m_strideC;
         pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride;
-        pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
-        pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
         for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
             memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
 
-        for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
+        if (reconPic->m_picCsp != X265_CSP_I400)
         {
-            memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
-            memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+            const intptr_t strideC = reconPic->m_strideC;
+            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
+            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
+            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
+            {
+                memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
+                memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
+            }
         }
     }
 
@@ -220,16 +229,19 @@
         uint32_t height = getCUHeight(row);
 
         uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height);
-        height >>= m_vChromaShift;
-        width  >>= m_hChromaShift;
-        stride = reconPic->m_strideC;
+        m_frameEncoder->m_SSDY += ssdY;
+        if (reconPic->m_picCsp != X265_CSP_I400)
+        {
+            height >>= m_vChromaShift;
+            width  >>= m_hChromaShift;
+            stride = reconPic->m_strideC;
+            
+            uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height);
+            uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height);
 
-        uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr), reconPic->getCbAddr(cuAddr), stride, width, height);
-        uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr), reconPic->getCrAddr(cuAddr), stride, width, height);
-
-        m_frameEncoder->m_SSDY += ssdY;
-        m_frameEncoder->m_SSDU += ssdU;
-        m_frameEncoder->m_SSDV += ssdV;
+            m_frameEncoder->m_SSDU += ssdU;
+            m_frameEncoder->m_SSDV += ssdV;
+        }
     }
     if (m_param->bEnableSsim && m_ssimBuf)
     {
@@ -264,12 +276,15 @@
         }
 
         updateMD5Plane(m_frameEncoder->m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride);
-        width  >>= m_hChromaShift;
-        height >>= m_vChromaShift;
-        stride = reconPic->m_strideC;
-
-        updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
-        updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
+        if (reconPic->m_picCsp != X265_CSP_I400)
+        {
+            width  >>= m_hChromaShift;
+            height >>= m_vChromaShift;
+            stride = reconPic->m_strideC;
+            
+            updateMD5Plane(m_frameEncoder->m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
+            updateMD5Plane(m_frameEncoder->m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
+        }
     }
     else if (m_param->decodedPictureHashSEI == 2)
     {
@@ -279,12 +294,15 @@
         if (!row)
             m_frameEncoder->m_crc[0] = m_frameEncoder->m_crc[1] = m_frameEncoder->m_crc[2] = 0xffff;
         updateCRC(reconPic->getLumaAddr(cuAddr), m_frameEncoder->m_crc[0], height, width, stride);
-        width  >>= m_hChromaShift;
-        height >>= m_vChromaShift;
-        stride = reconPic->m_strideC;
-
-        updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride);
-        updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride);
+        if (reconPic->m_picCsp != X265_CSP_I400)
+        {
+            width  >>= m_hChromaShift;
+            height >>= m_vChromaShift;
+            stride = reconPic->m_strideC;
+            
+            updateCRC(reconPic->getCbAddr(cuAddr), m_frameEncoder->m_crc[1], height, width, stride);
+            updateCRC(reconPic->getCrAddr(cuAddr), m_frameEncoder->m_crc[2], height, width, stride);
+        }
     }
     else if (m_param->decodedPictureHashSEI == 3)
     {
@@ -295,13 +313,16 @@
         if (!row)
             m_frameEncoder->m_checksum[0] = m_frameEncoder->m_checksum[1] = m_frameEncoder->m_checksum[2] = 0;
         updateChecksum(reconPic->m_picOrg[0], m_frameEncoder->m_checksum[0], height, width, stride, row, cuHeight);
-        width  >>= m_hChromaShift;
-        height >>= m_vChromaShift;
-        stride = reconPic->m_strideC;
-        cuHeight >>= m_vChromaShift;
-
-        updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
-        updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
+        if (reconPic->m_picCsp != X265_CSP_I400)
+        {
+            width  >>= m_hChromaShift;
+            height >>= m_vChromaShift;
+            stride = reconPic->m_strideC;
+            cuHeight >>= m_vChromaShift;
+            
+            updateChecksum(reconPic->m_picOrg[1], m_frameEncoder->m_checksum[1], height, width, stride, row, cuHeight);
+            updateChecksum(reconPic->m_picOrg[2], m_frameEncoder->m_checksum[2], height, width, stride, row, cuHeight);
+        }
     }
 
     if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 * (int)m_frameEncoder->m_numRows)
@@ -415,15 +436,18 @@
 
     primitives.cu[size].copy_pp(dst, reconPic->m_stride, src, fencPic->m_stride);
    
-    pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
-    pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
+    int csp = fencPic->m_picCsp;
+    if (csp != X265_CSP_I400)
+    {
+        pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
+        pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
 
-    pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
-    pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
+        pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
+        pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
 
-    int csp = fencPic->m_picCsp;
-    primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
-    primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
+        primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
+        primitives.chroma[csp].cu[size].copy_pp(dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
+    }
 }
 
 /* Original YUV restoration for CU in lossless coding */
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/encoder/sao.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -106,9 +106,15 @@
 bool SAO::create(x265_param* param)
 {
     m_param = param;
-    m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
-    m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
-
+    if (param->internalCsp != X265_CSP_I400)
+    {
+        m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
+        m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
+        m_numPlanes = 3;
+    }
+    else
+        m_numPlanes = 1;
+  
     m_numCuInWidth =  (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
     m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
 
@@ -224,7 +230,7 @@
     }
 
     saoParam->bSaoFlag[0] = true;
-    saoParam->bSaoFlag[1] = true;
+    saoParam->bSaoFlag[1] = (m_numPlanes > 1);
 
     m_numNoSao[0] = 0; // Luma
     m_numNoSao[1] = 0; // Chroma
@@ -1132,7 +1138,7 @@
             m_entropyCoder.codeSaoMerge(0);
         m_entropyCoder.store(m_rdContexts.temp);
         // reset stats Y, Cb, Cr
-        for (int plane = 0; plane < 3; plane++)
+        for (int plane = 0; plane < m_numPlanes; plane++)
         {
             for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
             {
@@ -1161,7 +1167,8 @@
 
         saoComponentParamDist(saoParam, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist);
 
-        sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
+        if (m_numPlanes > 1)
+            sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
 
         if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
         {
@@ -1172,7 +1179,7 @@
                 m_entropyCoder.codeSaoMerge(0);
             if (allowMerge[1])
                 m_entropyCoder.codeSaoMerge(0);
-            for (int plane = 0; plane < 3; plane++)
+            for (int plane = 0; plane < m_numPlanes; plane++)
             {
                 if (saoParam->bSaoFlag[plane > 0])
                     m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
@@ -1202,7 +1209,7 @@
                     SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
                     bestCost = mergeCost;
                     m_entropyCoder.store(m_rdContexts.temp);
-                    for (int plane = 0; plane < 3; plane++)
+                    for (int plane = 0; plane < m_numPlanes; plane++)
                     {
                         mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
                         if (saoParam->bSaoFlag[plane > 0])
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/sao.h
--- a/source/encoder/sao.h	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/encoder/sao.h	Mon Sep 21 11:40:18 2015 -0500
@@ -85,6 +85,7 @@
 
     int         m_numCuInWidth;
     int         m_numCuInHeight;
+    int         m_numPlanes;
     int         m_hChromaShift;
     int         m_vChromaShift;
 
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/search.cpp
--- a/source/encoder/search.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/encoder/search.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -1169,7 +1169,8 @@
 
     intraMode.initCosts();
     intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
-    intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
+    if (m_csp != X265_CSP_I400)
+        intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
     intraMode.distortion += intraMode.lumaDistortion + intraMode.chromaDistortion;
 
     m_entropyCoder.resetBits();
@@ -2499,9 +2500,14 @@
     // Luma
     int part = partitionFromLog2Size(cu.m_log2CUSize[0]);
     interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
-    // Chroma
-    interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
-    interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+    if (m_csp != X265_CSP_I400)
+    {
+        // Chroma
+        interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+        interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+    }
+    else
+        interMode.chromaDistortion = 0;
     interMode.distortion = interMode.lumaDistortion + interMode.chromaDistortion;
 
     m_entropyCoder.load(m_rqt[depth].cur);
@@ -2553,9 +2559,12 @@
     if (!tqBypass)
     {
         sse_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
-        cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
-        cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
-
+        if (m_csp != X265_CSP_I400)
+        {
+            cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
+            cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
+        }
+        
         /* Consider the RD cost of not signaling any residual */
         m_entropyCoder.load(m_rqt[depth].cur);
         m_entropyCoder.resetBits();
@@ -2624,8 +2633,14 @@
 
     // update with clipped distortion and cost (qp estimation loop uses unclipped values)
     sse_ret_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
-    sse_ret_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
-    bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+    sse_ret_t bestChromaDist;
+    if (m_csp != X265_CSP_I400)
+    {
+        bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+        bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+    }
+    else
+        bestChromaDist = 0;
     if (m_rdCost.m_psyRd)
         interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
     interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
@@ -2798,15 +2813,22 @@
     X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
 
     uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-    bool bCodeChroma = true;
+    bool bCodeChroma;
     uint32_t tuDepthC = tuDepth;
-    if (log2TrSizeC < 2)
+    
+    if (m_csp != X265_CSP_I400)
     {
-        X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
-        log2TrSizeC = 2;
-        tuDepthC--;
-        bCodeChroma = !(absPartIdx & 3);
+        bCodeChroma = true;
+        if (log2TrSizeC < 2)
+        {
+            X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+            log2TrSizeC = 2;
+            tuDepthC--;
+            bCodeChroma = !(absPartIdx & 3);
+        }
     }
+    else
+        bCodeChroma = false;
 
     // code full block
     Cost fullCost;
@@ -3383,15 +3405,22 @@
     const uint32_t qtLayer = log2TrSize - 2;
 
     uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-    bool bCodeChroma = true;
+    bool bCodeChroma;
     uint32_t tuDepthC = tuDepth;
-    if (log2TrSizeC < 2)
+
+    if (m_csp != X265_CSP_I400)
     {
-        X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
-        log2TrSizeC = 2;
-        tuDepthC--;
-        bCodeChroma = !(absPartIdx & 3);
+        bCodeChroma = true;
+        if (log2TrSizeC < 2)
+        {
+            X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+            log2TrSizeC = 2;
+            tuDepthC--;
+            bCodeChroma = !(absPartIdx & 3);
+        }
     }
+    else
+        bCodeChroma = false;
 
     m_rqt[qtLayer].resiQtYuv.copyPartToPartLuma(resiYuv, absPartIdx, log2TrSize);
 
diff -r f8b8ebdc5457 -r 5602b4bc1fec source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Mon Sep 28 14:34:41 2015 +0530
+++ b/source/encoder/slicetype.cpp	Mon Sep 21 11:40:18 2015 -0500
@@ -74,17 +74,18 @@
 uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp)
 {
     intptr_t stride = curFrame->m_fencPic->m_stride;
-    intptr_t cStride = curFrame->m_fencPic->m_strideC;
     intptr_t blockOffsetLuma = blockX + (blockY * stride);
-    int hShift = CHROMA_H_SHIFT(csp);
-    int vShift = CHROMA_V_SHIFT(csp);
-    intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >> vShift) * cStride);
 
-    uint32_t var;
-
-    var  = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
-    var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp);
-    var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp);
+    uint32_t var = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
+    if (csp != X265_CSP_I400)
+    {
+        intptr_t cStride = curFrame->m_fencPic->m_strideC;
+        int hShift = CHROMA_H_SHIFT(csp);
+        int vShift = CHROMA_V_SHIFT(csp);
+        intptr_t blockOffsetChroma = (blockX >> hShift) + ((blockY >> vShift) * cStride);
+        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp);
+        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp);
+    }
     x265_emms();
     return var;
 }



More information about the x265-devel mailing list