[x265] fix rd=0,1

Wed Oct 29 17:38:04 CET 2014

# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1414600429 -32400
#      Thu Oct 30 01:33:49 2014 +0900
# Branch stable
# Node ID 2a5e13c6ee9351095e9a7aade3c52e8b4092b7f8
# Parent  da5ba239bf59a48d9b586c54bf2d0a5320043044
fix rd=0,1

diff -r da5ba239bf59 -r 2a5e13c6ee93 source/common/predict.cpp

--- a/source/common/predict.cpp	Wed Oct 29 09:13:25 2014 +0530
+++ b/source/common/predict.cpp	Thu Oct 30 01:33:49 2014 +0900
@@ -144,12 +144,17 @@
     primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0);
 }
 
-void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
+void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
 {
     m_predSlice = cu.m_slice;
     cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight);
     m_ctuAddr = cu.m_cuAddr;
     m_cuAbsPartIdx = cuGeom.encodeIdx;
+}
+
+void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
+{
+    initMotionCompensation(cu, cuGeom, partIdx);
 
     m_refIdx0      = cu.m_refIdx[0][m_puAbsPartIdx];
     m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx];
diff -r da5ba239bf59 -r 2a5e13c6ee93 source/common/predict.h
--- a/source/common/predict.h	Wed Oct 29 09:13:25 2014 +0530
+++ b/source/common/predict.h	Thu Oct 30 01:33:49 2014 +0900
@@ -117,6 +117,7 @@
 public:
 
     /* prepMotionCompensation needs to be called to prepare MC with CU-relevant data */
+    void initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx);
     void prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx);
     void motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma);
 
diff -r da5ba239bf59 -r 2a5e13c6ee93 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed Oct 29 09:13:25 2014 +0530
+++ b/source/encoder/analysis.cpp	Thu Oct 30 01:33:49 2014 +0900
@@ -882,7 +882,7 @@
         if (m_bTryLossless)
             tryLossless(cuGeom);
 
-        if (mightSplit && m_param->rdLevel > 1)
+        if (mightSplit)
             addSplitFlagCost(*md.bestMode, cuGeom.depth);
     }
 
@@ -934,7 +934,7 @@
         if (mightNotSplit)
             addSplitFlagCost(*splitPred, cuGeom.depth);
         else if (m_param->rdLevel <= 1)
-            splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->totalBits);
+            splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
         else
             updateModeCost(*splitPred);
 
@@ -1539,6 +1539,7 @@
     intraMode.totalBits = bbits;
     intraMode.distortion = bsad;
     intraMode.sa8dCost = bcost;
+    intraMode.sa8dBits = bbits;
 }
 
 void Analysis::encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom)
@@ -1601,8 +1602,6 @@
     uint32_t absPartIdx = cuGeom.encodeIdx;
     int sizeIdx = cuGeom.log2CUSize - 2;
 
-    /* at RD 0, the prediction pixels are accumulated into the top depth predYuv */
-    Yuv& predYuv = m_modeDepth[0].bestMode->predYuv;
     Yuv& fencYuv = m_modeDepth[0].fencYuv;
 
     /* reuse the bestMode data structures at the current depth */
@@ -1615,18 +1614,13 @@
 
     if (cu.m_predMode[0] == MODE_INTRA)
     {
-        uint32_t initTrDepth = cu.m_partSize[0] == SIZE_2Nx2N ? 0 : 1;
-
         uint32_t tuDepthRange[2];
         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
+        uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
         residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
         getBestIntraModeChroma(*bestMode, cuGeom);
         residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
-
-        /* copy the reconstructed part to the recon pic for later intra
-         * predictions */
-        reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, cu.m_cuAddr, absPartIdx);
     }
     else
     {
@@ -1636,16 +1630,22 @@
 
         ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
 
+        /* at RD 0, the prediction pixels are accumulated into the top depth predYuv */
+        Yuv& predYuv = m_modeDepth[0].bestMode->predYuv;
+        pixel* predY = predYuv.getLumaAddr(absPartIdx);
+        pixel* predU = predYuv.getCbAddr(absPartIdx);
+        pixel* predV = predYuv.getCrAddr(absPartIdx);
+
         primitives.luma_sub_ps[sizeIdx](resiYuv.m_buf[0], resiYuv.m_size,
-                                        fencYuv.getLumaAddr(absPartIdx), predYuv.getLumaAddr(absPartIdx),
+                                        fencYuv.getLumaAddr(absPartIdx), predY,
                                         fencYuv.m_size, predYuv.m_size);
 
         primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[1], resiYuv.m_csize,
-                                        fencYuv.getCbAddr(absPartIdx), predYuv.getCbAddr(absPartIdx),
+                                        fencYuv.getCbAddr(absPartIdx), predU,
                                         fencYuv.m_csize, predYuv.m_csize);
 
         primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[2], resiYuv.m_csize,
-                                        fencYuv.getCrAddr(absPartIdx), predYuv.getCrAddr(absPartIdx),
+                                        fencYuv.getCrAddr(absPartIdx), predV,
                                         fencYuv.m_csize, predYuv.m_csize);
 
         uint32_t tuDepthRange[2];
@@ -1655,23 +1655,36 @@
 
         if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
             cu.setSkipFlagSubParts(true);
-        else if (cu.getQtRootCbf(0))
+
+        PicYuv& reconPicYuv = *m_frame->m_reconPicYuv;
+        if (cu.getQtRootCbf(0)) // TODO: split to each component
         {
             /* residualTransformQuantInter() wrote transformed residual back into
              * resiYuv. Generate the recon pixels by adding it to the prediction */
 
             primitives.luma_add_ps[sizeIdx](reconYuv.m_buf[0], reconYuv.m_size,
-                                            predYuv.getLumaAddr(absPartIdx), resiYuv.m_buf[0], predYuv.m_size, resiYuv.m_size);
-
+                                            predY, resiYuv.m_buf[0], predYuv.m_size, resiYuv.m_size);
             primitives.chroma[m_csp].add_ps[sizeIdx](reconYuv.m_buf[1], reconYuv.m_csize,
-                                            predYuv.getCbAddr(absPartIdx), resiYuv.m_buf[1], predYuv.m_csize, resiYuv.m_csize);
+                                            predU, resiYuv.m_buf[1], predYuv.m_csize, resiYuv.m_csize);
             primitives.chroma[m_csp].add_ps[sizeIdx](reconYuv.m_buf[2], reconYuv.m_csize,
-                                            predYuv.getCrAddr(absPartIdx), resiYuv.m_buf[2], predYuv.m_csize, resiYuv.m_csize);
+                                            predV, resiYuv.m_buf[2], predYuv.m_csize, resiYuv.m_csize);
 
             /* copy the reconstructed part to the recon pic for later intra
              * predictions */
             reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, cu.m_cuAddr, absPartIdx);
         }
+        else
+        {
+            /* copy the prediction pixels to the recon pic for later intra
+             * predictions */
+
+            primitives.luma_copy_pp[sizeIdx](reconPicYuv.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPicYuv.m_stride,
+                                             predY, predYuv.m_size);
+            primitives.chroma[m_csp].copy_pp[sizeIdx](reconPicYuv.getCbAddr(cu.m_cuAddr, absPartIdx), reconPicYuv.m_strideC,
+                                                      predU, predYuv.m_csize);
+            primitives.chroma[m_csp].copy_pp[sizeIdx](reconPicYuv.getCrAddr(cu.m_cuAddr, absPartIdx), reconPicYuv.m_strideC,
+                                                      predV, predYuv.m_csize);
+        }
     }
 
     checkDQP(cu, cuGeom);
diff -r da5ba239bf59 -r 2a5e13c6ee93 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Wed Oct 29 09:13:25 2014 +0530
+++ b/source/encoder/search.cpp	Thu Oct 30 01:33:49 2014 +0900
@@ -643,7 +643,7 @@
         initAdiPattern(cu, cuGeom, absPartIdx, trDepth, lumaPredMode);
         predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
 
-        X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][0], "unexpected tskip flag in residualTransformQuantIntra\n");
+        X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], "unexpected tskip flag in residualTransformQuantIntra\n");
         cu.setTUDepthSubParts(trDepth, absPartIdx, fullDepth);
 
         primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
@@ -1101,7 +1101,7 @@
                 pixel*   fenc         = const_cast<pixel*>(mode.fencYuv->getChromaAddr(chromaId, absPartIdxC));
                 pixel*   pred         = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
                 int16_t* residual     = resiYuv.getChromaAddr(chromaId, absPartIdxC);
-                pixel*   recon        = mode.reconYuv.getChromaAddr(chromaId, absPartIdxC);
+                pixel*   recon        = mode.reconYuv.getChromaAddr(chromaId, absPartIdxC); // TODO: needed?
                 uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
                 coeff_t* coeff        = cu.m_trCoeff[ttype] + coeffOffsetC;
                 pixel*   picReconC    = m_frame->m_reconPicYuv->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);
@@ -1728,7 +1728,7 @@
     for (int puIdx = 0; puIdx < numPart; puIdx++)
     {
         /* sets m_puAbsPartIdx, m_puWidth, m_puHeight */
-        prepMotionCompensation(cu, cuGeom, puIdx);
+        initMotionCompensation(cu, cuGeom, puIdx);
 
         pixel* pu = fencPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
         m_me.setSourcePU(pu - fencPic->m_picOrg[0], m_puWidth, m_puHeight);
@@ -2327,6 +2327,7 @@
         residualTransformQuantIntra(mode, cuGeom, initTrDepth, 0, tuDepthRange);
         getBestIntraModeChroma(mode, cuGeom);
         residualQTIntraChroma(mode, cuGeom, 0, 0);
+        mode.reconYuv.copyFromPicYuv(*m_frame->m_reconPicYuv, cu.m_cuAddr, cuGeom.encodeIdx); // TODO: 
     }
 }