[x265] [PATCH] Allow different Csp for picture

santhoshini at multicorewareinc.com santhoshini at multicorewareinc.com
Tue May 31 07:55:06 CEST 2016


# HG changeset patch
# User Santhoshini Sekar<santhoshini at multicorewareinc.com>
# Date 1464152658 -19800
#      Wed May 25 10:34:18 2016 +0530
# Node ID 028ccaa6486047ffafef38d78f9b00880908c0c3
# Parent  6d3849d648f0be5a8e334f1d75a2f7cf93c86cb3
Allow different Csp for picture

diff -r 6d3849d648f0 -r 028ccaa64860 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/cudata.cpp	Wed May 25 10:34:18 2016 +0530
@@ -527,7 +527,7 @@
 }
 
 /* Only called by encodeResidue, these fields can be modified during inter/intra coding */
-void CUData::updatePic(uint32_t depth) const
+void CUData::updatePic(uint32_t depth, int picCsp) const
 {
     CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
 
@@ -541,7 +541,7 @@
     uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
     memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
 
-    if (ctu.m_chromaFormat != X265_CSP_I400)
+    if (ctu.m_chromaFormat != X265_CSP_I400 && picCsp != X265_CSP_I400)
     {
         m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
         m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/cudata.h
--- a/source/common/cudata.h	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/cudata.h	Wed May 25 10:34:18 2016 +0530
@@ -224,7 +224,7 @@
 
     /* RD-0 methods called only from encodeResidue */
     void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp = true);
-    void     updatePic(uint32_t depth) const;
+    void     updatePic(uint32_t depth, int picCsp) const;
 
     void     setPartSizeSubParts(PartSize size)    { m_partSet(m_partSize, (uint8_t)size); }
     void     setPredModeSubParts(PredMode mode)    { m_partSet(m_predMode, (uint8_t)mode); }
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/frame.cpp
--- a/source/common/frame.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/frame.cpp	Wed May 25 10:34:18 2016 +0530
@@ -72,7 +72,7 @@
     m_reconPic = new PicYuv;
     m_param = param;
     m_encData->m_reconPic = m_reconPic;
-    bool ok = m_encData->create(*param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);
+    bool ok = m_encData->create(*param, sps, m_fencPic->m_picCsp) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);
     if (ok)
     {
         /* initialize right border of m_reconpicYuv as SAO may read beyond the
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/framedata.cpp
--- a/source/common/framedata.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/framedata.cpp	Wed May 25 10:34:18 2016 +0530
@@ -31,11 +31,12 @@
     memset(this, 0, sizeof(*this));
 }
 
-bool FrameData::create(const x265_param& param, const SPS& sps)
+bool FrameData::create(const x265_param& param, const SPS& sps, int csp)
 {
     m_param = ¶m;
     m_slice  = new Slice;
     m_picCTU = new CUData[sps.numCUsInFrame];
+    m_picCsp = csp;
 
     m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);
     for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/framedata.h
--- a/source/common/framedata.h	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/framedata.h	Wed May 25 10:34:18 2016 +0530
@@ -146,10 +146,11 @@
     double         m_avgQpRc;    /* avg QP as decided by rate-control */
     double         m_avgQpAq;    /* avg QP as decided by AQ in addition to rate-control */
     double         m_rateFactor; /* calculated based on the Frame QP */
+    int            m_picCsp;
 
     FrameData();
 
-    bool create(const x265_param& param, const SPS& sps);
+    bool create(const x265_param& param, const SPS& sps, int csp);
     void reinit(const SPS& sps);
     void destroy();
     inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/picyuv.cpp
--- a/source/common/picyuv.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/picyuv.cpp	Wed May 25 10:34:18 2016 +0530
@@ -180,6 +180,7 @@
      * warnings from valgrind about using uninitialized pixels */
     padx++;
     pady++;
+    m_picCsp = pic.colorSpace;
 
     X265_CHECK(pic.bitDepth >= 8, "pic.bitDepth check failure");
 
@@ -194,7 +195,7 @@
 
             primitives.planecopy_cp(yChar, pic.stride[0] / sizeof(*yChar), yPixel, m_stride, width, height, shift);
 
-            if (pic.colorSpace != X265_CSP_I400)
+            if (param.internalCsp != X265_CSP_I400)
             {
                 pixel *uPixel = m_picOrg[1];
                 pixel *vPixel = m_picOrg[2];
@@ -220,7 +221,7 @@
                 yChar += pic.stride[0] / sizeof(*yChar);
             }
 
-            if (pic.colorSpace != X265_CSP_I400)
+            if (param.internalCsp != X265_CSP_I400)
             {
                 pixel *uPixel = m_picOrg[1];
                 pixel *vPixel = m_picOrg[2];
@@ -262,7 +263,7 @@
             primitives.planecopy_sp_shl(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask);
         }
 
-        if (pic.colorSpace != X265_CSP_I400)
+        if (param.internalCsp != X265_CSP_I400)
         {
             pixel *uPixel = m_picOrg[1];
             pixel *vPixel = m_picOrg[2];
@@ -312,7 +313,7 @@
     for (int i = 1; i <= pady; i++)
         memcpy(Y + i * m_stride, Y, (width + padx) * sizeof(pixel));
 
-    if (pic.colorSpace != X265_CSP_I400)
+    if (param.internalCsp != X265_CSP_I400)
     {
         for (int r = 0; r < height >> m_vChromaShift; r++)
         {
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/shortyuv.cpp	Wed May 25 10:34:18 2016 +0530
@@ -78,11 +78,11 @@
     memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t));
 }
 
-void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size)
+void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size, int picCsp)
 {
     const int sizeIdx = log2Size - 2;
     primitives.cu[sizeIdx].sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
-    if (m_csp != X265_CSP_I400)
+    if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400)
     {
         primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
         primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/shortyuv.h
--- a/source/common/shortyuv.h	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/shortyuv.h	Wed May 25 10:34:18 2016 +0530
@@ -64,7 +64,7 @@
     const int16_t* getCrAddr(uint32_t absPartIdx) const                         { return m_buf[2] + getChromaAddrOffset(absPartIdx); }
     const int16_t* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) const { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx); }
 
-    void subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size);
+    void subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size, int picCsp);
 
     void copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const;
     void copyPartToPartChroma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const;
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/yuv.cpp
--- a/source/common/yuv.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/yuv.cpp	Wed May 25 10:34:18 2016 +0530
@@ -163,14 +163,19 @@
     }
 }
 
-void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
+void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp)
 {
     primitives.cu[log2SizeL - 2].add_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
-    if (m_csp != X265_CSP_I400)
+    if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400)
     {
         primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
         primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
     }
+    if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
+    {
+        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize);
+        primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize);
+    }
 }
 
 void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/yuv.h
--- a/source/common/yuv.h	Sun May 29 21:50:25 2016 +0800
+++ b/source/common/yuv.h	Wed May 25 10:34:18 2016 +0530
@@ -73,7 +73,7 @@
     void   copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const;
 
     // Clip(srcYuv0 + srcYuv1) -> m_buf .. aka recon = clip(pred + residual)
-    void   addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL);
+    void   addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp);
 
     // (srcYuv0 + srcYuv1)/2 for YUV partition (bidir averaging)
     void   addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/analysis.cpp	Wed May 25 10:34:18 2016 +0530
@@ -1194,7 +1194,7 @@
             if (m_param->rdLevel >= 3)
             {
                 /* Calculate RD cost of best inter option */
-                if (!m_bChromaSa8d && (m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
+                if ((!m_bChromaSa8d && (m_csp != X265_CSP_I400)) || (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
                 {
                     uint32_t numPU = bestInter->cu.getNumPartInter(0);
                     for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
@@ -1213,6 +1213,13 @@
                     if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
                         md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
                     {
+                        uint32_t numPU = md.pred[PRED_BIDIR].cu.getNumPartInter(0);
+                        if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
+                            for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+                            {
+                                PredictionUnit pu(md.pred[PRED_BIDIR].cu, cuGeom, puIdx);
+                                motionCompensation(md.pred[PRED_BIDIR].cu, pu, md.pred[PRED_BIDIR].predYuv, true, true);
+                            }
                         encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
                         checkBestMode(md.pred[PRED_BIDIR], depth);
                     }
@@ -1290,10 +1297,10 @@
 
                         uint32_t tuDepthRange[2];
                         cu.getInterTUQtDepthRange(tuDepthRange, 0);
-                        m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
+                        m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize, m_frame->m_fencPic->m_picCsp);
                         residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
                         if (cu.getQtRootCbf(0))
-                            md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]);
+                            md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0], m_frame->m_fencPic->m_picCsp);
                         else
                         {
                             md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv);
@@ -1524,6 +1531,13 @@
                 checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
                 if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
                 {
+                    uint32_t numPU = md.pred[PRED_BIDIR].cu.getNumPartInter(0);
+                    if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
+                        for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+                        {
+                            PredictionUnit pu(md.pred[PRED_BIDIR].cu, cuGeom, puIdx);
+                            motionCompensation(md.pred[PRED_BIDIR].cu, pu, md.pred[PRED_BIDIR].predYuv, true, true);
+                        }
                     encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
                     checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth);
                 }
@@ -1701,6 +1715,17 @@
                     ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]);
                 }
             }
+            if ((md.bestMode->cu.isInter(0) && !(md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)) && (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400))
+            {
+                uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
+
+                for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+                {
+                    PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
+                    motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, m_csp != X265_CSP_I400);
+                }
+                encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
+            }
         }
 
         if (m_bTryLossless)
@@ -1912,11 +1937,11 @@
         tempPred->cu.m_mv[1][0] = candMvField[i][1].mv;
         tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx;
         tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx;
-        motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400));
+        motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400));
 
         tempPred->sa8dBits = getTUBits(i, numMergeCand);
         tempPred->distortion = primitives.cu[sizeIdx].sa8d(fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size);
-        if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
+        if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400))
         {
             tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
             tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
@@ -1935,7 +1960,7 @@
         return;
 
     /* calculate the motion compensation for chroma for the best mode selected */
-    if (!m_bChromaSa8d && (m_csp != X265_CSP_I400)) /* Chroma MC was done above */
+    if ((!m_bChromaSa8d && (m_csp != X265_CSP_I400)) || (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)) /* Chroma MC was done above */
         motionCompensation(bestPred->cu, pu, bestPred->predYuv, false, true);
 
     if (m_param->rdLevel)
@@ -2118,14 +2143,14 @@
                 bestME[i].ref = m_reuseRef[refOffset + index++];
         }
     }
-    predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400), refMask);
+    predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400), refMask);
 
     /* predInterSearch sets interMode.sa8dBits */
     const Yuv& fencYuv = *interMode.fencYuv;
     Yuv& predYuv = interMode.predYuv;
     int part = partitionFromLog2Size(cuGeom.log2CUSize);
     interMode.distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
-    if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
+    if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400))
     {
         interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
         interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
@@ -2167,7 +2192,7 @@
                 bestME[i].ref = m_reuseRef[refOffset + index++];
         }
     }
-    predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400, refMask);
+    predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, refMask);
 
     /* predInterSearch sets interMode.sa8dBits, but this is ignored */
     encodeResAndCalcRdInterCU(interMode, cuGeom);
@@ -2230,10 +2255,10 @@
     cu.m_mvd[1][0] = bestME[1].mv - mvp1;
 
     PredictionUnit pu(cu, cuGeom, 0);
-    motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400));
+    motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400));
 
     int sa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size);
-    if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
+    if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400))
     {
         /* Add in chroma distortion */
         sa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize);
@@ -2264,7 +2289,7 @@
 
         int zsa8d;
 
-        if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
+        if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400))
         {
             cu.m_mv[0][0] = mvzero;
             cu.m_mv[1][0] = mvzero;
@@ -2312,9 +2337,9 @@
             if (m_bChromaSa8d) /* real MC was already performed */
                 bidir2Nx2N.predYuv.copyFromYuv(tmpPredYuv);
             else
-                motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_csp != X265_CSP_I400);
+                motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400);
         }
-        else if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
+        else if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400))
         {
             /* recover overwritten motion vectors */
             cu.m_mv[0][0] = bestME[0].mv;
@@ -2360,7 +2385,7 @@
         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
         residualTransformQuantIntra(*bestMode, cuGeom, 0, 0, tuDepthRange);
-        if (m_csp != X265_CSP_I400)
+        if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
         {
             getBestIntraModeChroma(*bestMode, cuGeom);
             residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
@@ -2384,7 +2409,7 @@
                                       fencYuv.m_buf[0], predY,
                                       fencYuv.m_size, predYuv.m_size);
 
-        if (m_csp != X265_CSP_I400)
+        if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
         {
             pixel* predU = predYuv.getCbAddr(absPartIdx);
             pixel* predV = predYuv.getCrAddr(absPartIdx);
@@ -2414,7 +2439,7 @@
         else
             primitives.cu[sizeIdx].copy_pp(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
                                            predY, predYuv.m_size);
-        if (m_csp != X265_CSP_I400)
+        if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
         {
              pixel* predU = predYuv.getCbAddr(absPartIdx);
              pixel* predV = predYuv.getCrAddr(absPartIdx);
@@ -2434,7 +2459,7 @@
         }
     }
 
-    cu.updatePic(cuGeom.depth);
+    cu.updatePic(cuGeom.depth, m_frame->m_fencPic->m_picCsp);
 }
 
 void Analysis::addSplitFlagCost(Mode& mode, uint32_t depth)
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/encoder.cpp	Wed May 25 10:34:18 2016 +0530
@@ -511,12 +511,6 @@
 
     if (pic_in)
     {
-        if (pic_in->colorSpace != m_param->internalCsp)
-        {
-            x265_log(m_param, X265_LOG_ERROR, "Unsupported chroma subsampling (%d) on input\n",
-                     pic_in->colorSpace);
-            return -1;
-        }
         if (pic_in->bitDepth < 8 || pic_in->bitDepth > 16)
         {
             x265_log(m_param, X265_LOG_ERROR, "Input bit depth (%d) must be between 8 and 16\n",
@@ -538,7 +532,7 @@
                 {
                     inFrame->m_fencPic->m_cuOffsetY = m_sps.cuOffsetY;
                     inFrame->m_fencPic->m_buOffsetY = m_sps.buOffsetY;
-                    if (pic_in->colorSpace != X265_CSP_I400)
+                    if (m_param->internalCsp != X265_CSP_I400)
                     {
                         inFrame->m_fencPic->m_cuOffsetC = m_sps.cuOffsetC;
                         inFrame->m_fencPic->m_buOffsetC = m_sps.buOffsetC;
@@ -558,7 +552,7 @@
                     {
                         m_sps.cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
                         m_sps.buOffsetY = inFrame->m_fencPic->m_buOffsetY;
-                        if (pic_in->colorSpace != X265_CSP_I400)
+                        if (m_param->internalCsp != X265_CSP_I400)
                         {
                             m_sps.cuOffsetC = inFrame->m_fencPic->m_cuOffsetC;
                             m_sps.cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/motion.cpp	Wed May 25 10:34:18 2016 +0530
@@ -183,7 +183,7 @@
 }
 
 /* Called by Search::predInterSearch() or --pme equivalent, chroma residual might be considered */
-void MotionEstimate::setSourcePU(const Yuv& srcFencYuv, int _ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int method, const int refine)
+void MotionEstimate::setSourcePU(const Yuv& srcFencYuv, int _ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int method, const int refine, bool bChroma)
 {
     partEnum = partitionFromSizes(pwidth, pheight);
     X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
@@ -200,7 +200,7 @@
 
     /* Enable chroma residual cost if subpelRefine level is greater than 2 and chroma block size
      * is an even multiple of 4x4 pixels (indicated by non-null chromaSatd pointer) */
-    bChromaSATD = subpelRefine > 2 && chromaSatd && (srcFencYuv.m_csp != X265_CSP_I400);
+    bChromaSATD = subpelRefine > 2 && chromaSatd && (srcFencYuv.m_csp != X265_CSP_I400 && bChroma);
     X265_CHECK(!(bChromaSATD && !workload[subpelRefine].hpel_satd), "Chroma SATD cannot be used with SAD hpel\n");
 
     ctuAddr = _ctuAddr;
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/motion.h
--- a/source/encoder/motion.h	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/motion.h	Wed May 25 10:34:18 2016 +0530
@@ -75,7 +75,7 @@
     /* Methods called at slice setup */
 
     void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int searchMethod, const int subpelRefine);
-    void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int searchMethod, const int subpelRefine);
+    void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int searchMethod, const int subpelRefine, bool bChroma);
 
     /* buf*() and motionEstimate() methods all use cached fenc pixels and thus
      * require setSourcePU() to be called prior. */
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/reference.cpp
--- a/source/encoder/reference.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/reference.cpp	Wed May 25 10:34:18 2016 +0530
@@ -68,7 +68,7 @@
         intptr_t stride = reconPic->m_stride;
         int cuHeight = g_maxCUSize;
 
-        for (int c = 0; c < (p.internalCsp != X265_CSP_I400 ? numInterpPlanes : 1); c++)
+        for (int c = 0; c < (p.internalCsp != X265_CSP_I400 && recPic->m_picCsp != X265_CSP_I400 ? numInterpPlanes : 1); c++)
         {
             if (c == 1)
             {
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/sao.cpp	Wed May 25 10:34:18 2016 +0530
@@ -255,7 +255,7 @@
     }
 
     saoParam->bSaoFlag[0] = true;
-    saoParam->bSaoFlag[1] = m_param->internalCsp != X265_CSP_I400;
+    saoParam->bSaoFlag[1] = m_param->internalCsp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400;
 
     m_numNoSao[0] = 0; // Luma
     m_numNoSao[1] = 0; // Chroma
@@ -935,7 +935,7 @@
     memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane));
 
     int plane_offset = 0;
-    for (int plane = 0; plane < (frame->m_param->internalCsp != X265_CSP_I400 ? NUM_PLANE : 1); plane++)
+    for (int plane = 0; plane < (frame->m_param->internalCsp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400? NUM_PLANE : 1); plane++)
     {
         if (plane == 1)
         {
@@ -1208,7 +1208,7 @@
 
     const int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ? addr - m_numCuInWidth : -1)};// left, up
 
-    bool chroma = m_param->internalCsp != X265_CSP_I400;
+    bool chroma = m_param->internalCsp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400;
     int planes = chroma ? 3 : 1;
 
     // reset stats Y, Cb, Cr
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/search.cpp	Wed May 25 10:34:18 2016 +0530
@@ -1727,6 +1727,12 @@
         else
             cu.getAllowedChromaDir(absPartIdxC, modeList);
 
+        if (m_frame->m_fencPic->m_picCsp  == X265_CSP_I400 && m_csp != X265_CSP_I400)
+        {
+            for (uint32_t l = 1; l < NUM_CHROMA_MODE; l++)
+                modeList[l] = modeList[0];
+            maxMode = 1;
+        }
         // check chroma modes
         for (uint32_t mode = minMode; mode < maxMode; mode++)
         {
@@ -1966,7 +1972,8 @@
         slave.m_frame = m_frame;
         slave.m_param = m_param;
         slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp);
-        slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr, pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height, m_param->searchMethod, m_param->subpelRefine);
+        bool bChroma = slave.m_frame->m_fencPic->m_picCsp != X265_CSP_I400;
+        slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr, pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height, m_param->searchMethod, m_param->subpelRefine, bChroma);
     }
 
     /* Perform ME, repeat until no more work is available */
@@ -2069,7 +2076,7 @@
         MotionData* bestME = interMode.bestME[puIdx];
         PredictionUnit pu(cu, cuGeom, puIdx);
 
-        m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine);
+        m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, bChromaMC);
 
         /* find best cost merge candidate. note: 2Nx2N merge and bidir are handled as separate modes */
         uint32_t mrgCost = numPart == 1 ? MAX_UINT : mergeEstimation(cu, cuGeom, pu, puIdx, merge);
@@ -2529,7 +2536,7 @@
     interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
     interMode.distortion = interMode.lumaDistortion;
     // Chroma
-    if (m_csp != X265_CSP_I400)
+    if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
     {
         interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
         interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
@@ -2570,7 +2577,7 @@
     uint32_t log2CUSize = cuGeom.log2CUSize;
     int sizeIdx = log2CUSize - 2;
 
-    resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
+    resiYuv->subtract(*fencYuv, *predYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);
 
     uint32_t tuDepthRange[2];
     cu.getInterTUQtDepthRange(tuDepthRange, 0);
@@ -2584,7 +2591,7 @@
     if (!tqBypass)
     {
         sse_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
-        if (m_csp != X265_CSP_I400)
+        if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
         {
             cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
             cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
@@ -2655,14 +2662,14 @@
     m_entropyCoder.store(interMode.contexts);
 
     if (cu.getQtRootCbf(0))
-        reconYuv->addClip(*predYuv, *resiYuv, log2CUSize);
+        reconYuv->addClip(*predYuv, *resiYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);
     else
         reconYuv->copyFromYuv(*predYuv);
 
     // update with clipped distortion and cost (qp estimation loop uses unclipped values)
     sse_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
     interMode.distortion = bestLumaDist;
-    if (m_csp != X265_CSP_I400)
+    if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
     {
         sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
         bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
@@ -2694,7 +2701,7 @@
     {
         // code full block
         uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-        uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0;
+        uint32_t codeChroma = (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) ? 1 : 0;
 
         uint32_t tuDepthC = tuDepth;
         if (log2TrSizeC < 2)
@@ -2802,14 +2809,14 @@
         {
             residualTransformQuantInter(mode, cuGeom, qPartIdx, tuDepth + 1, depthRange);
             ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA,     tuDepth + 1);
-            if (m_csp != X265_CSP_I400)
+            if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
             {
                 ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
                 vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
             }
         }
         cu.m_cbf[0][absPartIdx] |= ycbf << tuDepth;
-        if (m_csp != X265_CSP_I400)
+        if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
         {
             cu.m_cbf[1][absPartIdx] |= ucbf << tuDepth;
             cu.m_cbf[2][absPartIdx] |= vcbf << tuDepth;
@@ -2844,7 +2851,7 @@
     X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
 
     uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-    uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0;
+    uint32_t codeChroma = (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) ? 1 : 0;
     uint32_t tuDepthC = tuDepth;
     if (log2TrSizeC < 2)
     {
@@ -3095,6 +3102,19 @@
             }
         }
 
+        if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
+        {
+            for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+            {
+                TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
+                do
+                {
+                    uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+                    cu.setCbfPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
+                }
+                while(tuIterator.isNextSection());
+            }
+        }
         if (checkTransformSkipY)
         {
             sse_t nonZeroDistY = 0;
@@ -3304,14 +3324,14 @@
         {
             estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange);
             ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA,     tuDepth + 1);
-            if (m_csp != X265_CSP_I400)
+            if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
             {
                 ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
                 vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
             }
         }
         cu.m_cbf[0][absPartIdx] |= ycbf << tuDepth;
-        if (m_csp != X265_CSP_I400)
+        if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
         {
             cu.m_cbf[1][absPartIdx] |= ucbf << tuDepth;
             cu.m_cbf[2][absPartIdx] |= vcbf << tuDepth;
@@ -3403,7 +3423,7 @@
 
     const bool bSubdiv  = tuDepth < cu.m_tuDepth[absPartIdx];
     uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
-    if (m_csp != X265_CSP_I400)
+    if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
     {
         if (!(log2TrSize - m_hChromaShift < 2))
         {
@@ -3442,7 +3462,7 @@
     const uint32_t qtLayer = log2TrSize - 2;
 
     uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-    uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0;
+    uint32_t codeChroma = (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) ? 1 : 0;
     uint32_t tuDepthC = tuDepth;
     if (log2TrSizeC < 2)
     {
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/slicetype.cpp	Wed May 25 10:34:18 2016 +0530
@@ -83,7 +83,7 @@
     uint32_t var;
 
     var  = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
-    if (csp != X265_CSP_I400)
+    if (csp != X265_CSP_I400 && curFrame->m_fencPic->m_picCsp != X265_CSP_I400)
     {
         var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp);
         var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp);
diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Sun May 29 21:50:25 2016 +0800
+++ b/source/encoder/weightPrediction.cpp	Wed May 25 10:34:18 2016 +0530
@@ -233,7 +233,7 @@
     cache.numPredDir = slice.isInterP() ? 1 : 2;
     cache.lowresWidthInCU = fenc.width >> 3;
     cache.lowresHeightInCU = fenc.lines >> 3;
-    cache.csp = fencPic->m_picCsp;
+    cache.csp = param.internalCsp;
     cache.hshift = CHROMA_H_SHIFT(cache.csp);
     cache.vshift = CHROMA_V_SHIFT(cache.csp);
 
@@ -330,7 +330,7 @@
                 {
                     /* reference chroma planes must be extended prior to being
                      * used as motion compensation sources */
-                    if (!refFrame->m_bChromaExtended && param.internalCsp != X265_CSP_I400)
+                    if (!refFrame->m_bChromaExtended && param.internalCsp != X265_CSP_I400 && frame.m_fencPic->m_picCsp != X265_CSP_I400)
                     {
                         refFrame->m_bChromaExtended = true;
                         PicYuv *refPic = refFrame->m_fencPic;



More information about the x265-devel mailing list