[x265] [PATCH] fix: support for Transquant Bypass mode

ashok at multicorewareinc.com ashok at multicorewareinc.com
Tue May 13 14:26:43 CEST 2014


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1399982012 -19800
#      Tue May 13 17:23:32 2014 +0530
# Node ID 8ce774039d126d484efe8deffc05c91663497cda
# Parent  a4d0d5679c28d6523e6b01a55fe548c3140118a6
fix: support for Transquant Bypass mode

diff -r a4d0d5679c28 -r 8ce774039d12 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Sun May 11 17:32:37 2014 +0900
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Tue May 13 17:23:32 2014 +0530
@@ -339,7 +339,7 @@
     m_cuMvField[1].clearMvField();
 }
 
-void TComDataCU::initEstData(uint32_t depth)
+void TComDataCU::initEstData(uint32_t depth, bool bIsLosslessMode)
 {
     m_totalCost        = MAX_INT64;
     m_sa8dCost         = MAX_INT64;
@@ -359,7 +359,7 @@
         m_skipFlag[i]   = false;
         m_partSizes[i] = SIZE_NONE;
         m_predModes[i] = MODE_NONE;
-        m_cuTransquantBypass[i] = false;
+        m_cuTransquantBypass[i] = bIsLosslessMode;
         m_iPCMFlags[i] = 0;
         m_bMergeFlags[i] = 0;
         m_lumaIntraDir[i] = DC_IDX;
diff -r a4d0d5679c28 -r 8ce774039d12 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Sun May 11 17:32:37 2014 +0900
+++ b/source/Lib/TLibCommon/TComDataCU.h	Tue May 13 17:23:32 2014 +0530
@@ -181,7 +181,7 @@
     void          destroy();
 
     void          initCU(TComPic* pic, uint32_t cuAddr);
-    void          initEstData(uint32_t depth);
+    void          initEstData(uint32_t depth, bool bIsLosslessMode);
     void          initEstData(uint32_t depth, int qp);
     void          initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth);
     void          initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp);
diff -r a4d0d5679c28 -r 8ce774039d12 source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp	Sun May 11 17:32:37 2014 +0900
+++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp	Tue May 13 17:23:32 2014 +0530
@@ -1368,10 +1368,10 @@
     uint32_t height;
     uint32_t pcmLeftShiftBit;
     uint32_t x, y;
+    int hChromaShift = cu->getHorzChromaShift();
+    int vChromaShift = cu->getVertChromaShift();
     uint32_t lumaOffset   = absZOrderIdx << cu->getPic()->getLog2UnitSize() * 2;
-    uint32_t chromaOffset = lumaOffset >> 2;
-
-    //uint32_t chromaOffset = lumaOffset >> (m_hChromaShift + m_vChromaShift);
+    uint32_t chromaOffset = lumaOffset >> (hChromaShift + vChromaShift);
 
     if (ttText == TEXT_LUMA)
     {
@@ -1403,10 +1403,9 @@
         }
 
         stride = pcPicYuvRec->getCStride();
-        //width  = ((g_maxCUSize >> depth) >> m_hChromaShift);
-        //height = ((g_maxCUSize >> depth) >> m_vhChromaShift);
-        width  = ((g_maxCUSize >> depth) >> 1);
-        height = ((g_maxCUSize >> depth) >> 1);
+        width  = ((g_maxCUSize >> depth) >> hChromaShift);
+        height = ((g_maxCUSize >> depth) >> vChromaShift);
+
         if (cu->isLosslessCoded(absZOrderIdx) && !cu->getIPCMFlag(absZOrderIdx))
         {
             pcmLeftShiftBit = 0;
diff -r a4d0d5679c28 -r 8ce774039d12 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Sun May 11 17:32:37 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Tue May 13 17:23:32 2014 +0530
@@ -557,6 +557,9 @@
     //PPAScopeEvent(TEncCu_xCompressIntraCU + depth);
 
     TComPic* pic = outBestCU->getPic();
+    int minTQ, maxTQ;
+    bool bIsTQBypassEnable = false;
+    bool bIsLosslessMode   = false;
 
     if (depth == 0)
     {
@@ -580,20 +583,38 @@
                           bpely <= slice->getSPS()->getPicHeightInLumaSamples());
     }
 
+    minTQ = maxTQ = 1;
+    if ((outTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()))
+    {
+        bIsTQBypassEnable = true; // mark that the first iteration is to cost TQB mode.
+        minTQ = minTQ - 1;        // increase loop variable range by 1, to allow testing of TQB mode.
+        if (m_param->bEnableCUTransquantBypass)
+        {
+            maxTQ = minTQ;
+        }
+    }
+
     // We need to split, so don't try these modes.
     if (bInsidePicture)
     {
-        outTempCU->initEstData(depth);
+        for (int iTQ = minTQ; iTQ <= maxTQ; iTQ++)
+        {
+            bIsLosslessMode = bIsTQBypassEnable && (iTQ == minTQ);
 
-        xCheckRDCostIntra(outBestCU, outTempCU, SIZE_2Nx2N);
-        outTempCU->initEstData(depth);
+            outTempCU->initEstData(depth, bIsLosslessMode);
 
-        if (depth == g_maxCUDepth - g_addCUDepth)
-        {
-            if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+            xCheckRDCostIntra(outBestCU, outTempCU, SIZE_2Nx2N);
+
+            outTempCU->initEstData(depth, bIsLosslessMode);
+
+            if (depth == g_maxCUDepth - g_addCUDepth)
             {
-                xCheckRDCostIntra(outBestCU, outTempCU, SIZE_NxN);
+                if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+                {
+                    xCheckRDCostIntra(outBestCU, outTempCU, SIZE_NxN);
+                }
             }
+
         }
 
         m_entropyCoder->resetBits();
@@ -602,7 +623,13 @@
         outBestCU->m_totalCost  = m_rdCost->calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
     }
 
-    outTempCU->initEstData(depth);
+    // copy original YUV samples to PCM buffer
+    if (outBestCU->isLosslessCoded(0) && (outBestCU->getIPCMFlag(0) == false))
+    {
+        xFillPCMBuffer(outBestCU, m_origYuv[depth]);
+    }
+
+    outTempCU->initEstData(depth, bIsLosslessMode);
 
     // further split
     if (depth < g_maxCUDepth - g_addCUDepth)
@@ -696,6 +723,8 @@
     //PPAScopeEvent(TEncCu_xCompressCU + depth);
 
     TComPic* pic = outBestCU->getPic();
+    int minTQ, maxTQ;
+    bool bIsTQBypassEnable = false;
 
     if (depth == 0)
     {
@@ -726,196 +755,218 @@
                           bpely <= slice->getSPS()->getPicHeightInLumaSamples());
     }
 
+    minTQ = maxTQ = 1;
+    if ((outTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()))
+    {
+        bIsTQBypassEnable = true; // mark that the first iteration is to cost TQB mode.
+        minTQ = minTQ - 1;        // increase loop variable range by 1, to allow testing of TQB mode along.
+        if (m_param->bEnableCUTransquantBypass)
+        {
+            maxTQ = minTQ;
+        }
+    }
+
     // We need to split, so don't try these modes.
     if (bInsidePicture)
     {
-        outTempCU->initEstData(depth);
+        for (int iTQ = minTQ; iTQ <= maxTQ; iTQ++)
+        {
+            bool bIsLosslessMode = bIsTQBypassEnable && (iTQ == minTQ);
 
-        // do inter modes, SKIP and 2Nx2N
-        if (slice->getSliceType() != I_SLICE)
-        {
-            // 2Nx2N
-            if (m_param->bEnableEarlySkip)
+            outTempCU->initEstData(depth, bIsLosslessMode);
+
+            // do inter modes, SKIP and 2Nx2N
+            if (slice->getSliceType() != I_SLICE)
             {
-                xCheckRDCostInter(outBestCU, outTempCU, SIZE_2Nx2N);
-                outTempCU->initEstData(depth); // by competition for inter_2Nx2N
-            }
-            // by Merge for inter_2Nx2N
-            xCheckRDCostMerge2Nx2N(outBestCU, outTempCU, &earlyDetectionSkipMode, m_bestPredYuv[depth], m_bestRecoYuv[depth]);
+                // 2Nx2N
+                if (m_param->bEnableEarlySkip)
+                {
+                    xCheckRDCostInter(outBestCU, outTempCU, SIZE_2Nx2N);
+                    outTempCU->initEstData(depth, bIsLosslessMode); // by competition for inter_2Nx2N
+                }
+                // by Merge for inter_2Nx2N
+                xCheckRDCostMerge2Nx2N(outBestCU, outTempCU, &earlyDetectionSkipMode, m_bestPredYuv[depth], m_bestRecoYuv[depth]);
 
-            outTempCU->initEstData(depth);
+                outTempCU->initEstData(depth, bIsLosslessMode);
 
-            if (!m_param->bEnableEarlySkip)
-            {
-                // 2Nx2N, NxN
-                xCheckRDCostInter(outBestCU, outTempCU, SIZE_2Nx2N);
-                outTempCU->initEstData(depth);
-                if (m_param->bEnableCbfFastMode)
+                if (!m_param->bEnableEarlySkip)
                 {
-                    doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                    // 2Nx2N, NxN
+                    xCheckRDCostInter(outBestCU, outTempCU, SIZE_2Nx2N);
+                    outTempCU->initEstData(depth, bIsLosslessMode);
+                    if (m_param->bEnableCbfFastMode)
+                    {
+                        doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                    }
                 }
             }
         }
 
         if (!earlyDetectionSkipMode)
         {
-            outTempCU->initEstData(depth);
+            for (int iTQ = minTQ; iTQ <= maxTQ; iTQ++)
+            {
+                bool bIsLosslessMode = bIsTQBypassEnable && (iTQ == minTQ); // If lossless, then iQP is irrelevant for subsequent modules.
 
-            // do inter modes, NxN, 2NxN, and Nx2N
-            if (slice->getSliceType() != I_SLICE)
-            {
-                // 2Nx2N, NxN
-                if (!(outBestCU->getCUSize(0) == 8))
+                outTempCU->initEstData(depth, bIsLosslessMode);
+
+                // do inter modes, NxN, 2NxN, and Nx2N
+                if (slice->getSliceType() != I_SLICE)
                 {
-                    if (depth == g_maxCUDepth - g_addCUDepth && doNotBlockPu)
+                    // 2Nx2N, NxN
+                    if (!(outBestCU->getCUSize(0) == 8))
                     {
-                        xCheckRDCostInter(outBestCU, outTempCU, SIZE_NxN);
-                        outTempCU->initEstData(depth);
-                    }
-                }
-
-                if (m_param->bEnableRectInter)
-                {
-                    // 2NxN, Nx2N
-                    if (doNotBlockPu)
-                    {
-                        xCheckRDCostInter(outBestCU, outTempCU, SIZE_Nx2N);
-                        outTempCU->initEstData(depth);
-                        if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_Nx2N)
+                        if (depth == g_maxCUDepth - g_addCUDepth && doNotBlockPu)
                         {
-                            doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_NxN);
+                            outTempCU->initEstData(depth, bIsLosslessMode);
                         }
                     }
-                    if (doNotBlockPu)
+
+                    if (m_param->bEnableRectInter)
                     {
-                        xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxN);
-                        outTempCU->initEstData(depth);
-                        if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxN)
-                        {
-                            doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
-                        }
-                    }
-                }
-
-                // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
-                if (slice->getSPS()->getAMPAcc(depth))
-                {
-                    bool bTestAMP_Hor = false, bTestAMP_Ver = false;
-                    bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false;
-
-                    deriveTestModeAMP(outBestCU, parentSize, bTestAMP_Hor, bTestAMP_Ver, bTestMergeAMP_Hor, bTestMergeAMP_Ver);
-
-                    // Do horizontal AMP
-                    if (bTestAMP_Hor)
-                    {
+                        // 2NxN, Nx2N
                         if (doNotBlockPu)
                         {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnU);
-                            outTempCU->initEstData(depth);
-                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnU)
+                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_Nx2N);
+                            outTempCU->initEstData(depth, bIsLosslessMode);
+                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_Nx2N)
                             {
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
                             }
                         }
                         if (doNotBlockPu)
                         {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnD);
-                            outTempCU->initEstData(depth);
-                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnD)
-                            {
-                                doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
-                            }
-                        }
-                    }
-                    else if (bTestMergeAMP_Hor)
-                    {
-                        if (doNotBlockPu)
-                        {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnU, true);
-                            outTempCU->initEstData(depth);
-                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnU)
-                            {
-                                doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
-                            }
-                        }
-                        if (doNotBlockPu)
-                        {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnD, true);
-                            outTempCU->initEstData(depth);
-                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnD)
+                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxN);
+                            outTempCU->initEstData(depth, bIsLosslessMode);
+                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxN)
                             {
                                 doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
                             }
                         }
                     }
 
-                    // Do horizontal AMP
-                    if (bTestAMP_Ver)
+                    // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
+                    if (slice->getSPS()->getAMPAcc(depth))
                     {
-                        if (doNotBlockPu)
+                        bool bTestAMP_Hor = false, bTestAMP_Ver = false;
+                        bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false;
+
+                        deriveTestModeAMP(outBestCU, parentSize, bTestAMP_Hor, bTestAMP_Ver, bTestMergeAMP_Hor, bTestMergeAMP_Ver);
+
+                        // Do horizontal AMP
+                        if (bTestAMP_Hor)
                         {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_nLx2N);
-                            outTempCU->initEstData(depth);
-                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_nLx2N)
+                            if (doNotBlockPu)
                             {
-                                doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnU);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                                if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnU)
+                                {
+                                    doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                }
+                            }
+                            if (doNotBlockPu)
+                            {
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnD);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                                if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnD)
+                                {
+                                    doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                }
                             }
                         }
-                        if (doNotBlockPu)
+                        else if (bTestMergeAMP_Hor)
                         {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_nRx2N);
-                            outTempCU->initEstData(depth);
-                        }
-                    }
-                    else if (bTestMergeAMP_Ver)
-                    {
-                        if (doNotBlockPu)
-                        {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_nLx2N, true);
-                            outTempCU->initEstData(depth);
-                            if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_nLx2N)
+                            if (doNotBlockPu)
                             {
-                                doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnU, true);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                                if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnU)
+                                {
+                                    doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                }
+                            }
+                            if (doNotBlockPu)
+                            {
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_2NxnD, true);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                                if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_2NxnD)
+                                {
+                                    doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                }
                             }
                         }
-                        if (doNotBlockPu)
+
+                        // Do horizontal AMP
+                        if (bTestAMP_Ver)
                         {
-                            xCheckRDCostInter(outBestCU, outTempCU, SIZE_nRx2N, true);
-                            outTempCU->initEstData(depth);
+                            if (doNotBlockPu)
+                            {
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_nLx2N);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                                if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_nLx2N)
+                                {
+                                    doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                }
+                            }
+                            if (doNotBlockPu)
+                            {
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_nRx2N);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                            }
+                        }
+                        else if (bTestMergeAMP_Ver)
+                        {
+                            if (doNotBlockPu)
+                            {
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_nLx2N, true);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                                if (m_param->bEnableCbfFastMode && outBestCU->getPartitionSize(0) == SIZE_nLx2N)
+                                {
+                                    doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
+                                }
+                            }
+                            if (doNotBlockPu)
+                            {
+                                xCheckRDCostInter(outBestCU, outTempCU, SIZE_nRx2N, true);
+                                outTempCU->initEstData(depth, bIsLosslessMode);
+                            }
                         }
                     }
                 }
-            }
 
-            // do normal intra modes
-            // speedup for inter frames
-            if (slice->getSliceType() == I_SLICE ||
-                outBestCU->getCbf(0, TEXT_LUMA) != 0   ||
-                outBestCU->getCbf(0, TEXT_CHROMA_U) != 0   ||
-                outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) // avoid very complex intra if it is unlikely
-            {
-                xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_2Nx2N);
-                outTempCU->initEstData(depth);
+                // do normal intra modes
+                // speedup for inter frames
+                if (slice->getSliceType() == I_SLICE ||
+                    outBestCU->getCbf(0, TEXT_LUMA) != 0   ||
+                    outBestCU->getCbf(0, TEXT_CHROMA_U) != 0   ||
+                    outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) // avoid very complex intra if it is unlikely
+                {
+                    xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_2Nx2N);
+                    outTempCU->initEstData(depth, bIsLosslessMode);
 
-                if (depth == g_maxCUDepth - g_addCUDepth)
-                {
-                    if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+                    if (depth == g_maxCUDepth - g_addCUDepth)
                     {
-                        xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_NxN);
-                        outTempCU->initEstData(depth);
+                        if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+                        {
+                            xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_NxN);
+                            outTempCU->initEstData(depth, bIsLosslessMode);
+                        }
                     }
                 }
-            }
-            // test PCM
-            if (slice->getSPS()->getUsePCM()
-                && outTempCU->getCUSize(0) <= (1 << slice->getSPS()->getPCMLog2MaxSize())
-                && outTempCU->getCUSize(0) >= (1 << slice->getSPS()->getPCMLog2MinSize()))
-            {
-                uint32_t rawbits = (2 * X265_DEPTH + X265_DEPTH) * outBestCU->getCUSize(0) * outBestCU->getCUSize(0) / 2;
-                uint32_t bestbits = outBestCU->m_totalBits;
-                if ((bestbits > rawbits) || (outBestCU->m_totalCost > m_rdCost->calcRdCost(0, rawbits)))
+                // test PCM
+                if (slice->getSPS()->getUsePCM()
+                    && outTempCU->getCUSize(0) <= (1 << slice->getSPS()->getPCMLog2MaxSize())
+                    && outTempCU->getCUSize(0) >= (1 << slice->getSPS()->getPCMLog2MinSize()))
                 {
-                    xCheckIntraPCM(outBestCU, outTempCU);
+                    uint32_t rawbits = (2 * X265_DEPTH + X265_DEPTH) * outBestCU->getCUSize(0) * outBestCU->getCUSize(0) / 2;
+                    uint32_t bestbits = outBestCU->m_totalBits;
+                    if ((bestbits > rawbits) || (outBestCU->m_totalCost > m_rdCost->calcRdCost(0, rawbits)))
+                    {
+                        xCheckIntraPCM(outBestCU, outTempCU);
+                        outTempCU->initEstData(depth, bIsLosslessMode);
+                    }
                 }
             }
         }
@@ -938,80 +989,90 @@
         xFillPCMBuffer(outBestCU, m_origYuv[depth]);
     }
 
-    outTempCU->initEstData(depth);
+    minTQ = maxTQ = 1;
+    if (m_param->bEnableCUTransquantBypass)
+    {
+        maxTQ = minTQ;
+    }
 
-    // further split
-    if (bSubBranch && depth < g_maxCUDepth - g_addCUDepth)
+    for (int iTQ = minTQ; iTQ <= maxTQ; iTQ++)
     {
-        uint8_t     nextDepth     = depth + 1;
-        TComDataCU* subBestPartCU = m_bestCU[nextDepth];
-        TComDataCU* subTempPartCU = m_tempCU[nextDepth];
-        uint32_t partUnitIdx = 0;
-        for (; partUnitIdx < 4; partUnitIdx++)
+        bool bIsLosslessMode = false;
+        outTempCU->initEstData(depth, bIsLosslessMode);
+
+        // further split
+        if (bSubBranch && depth < g_maxCUDepth - g_addCUDepth)
         {
-            subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
+            uint8_t     nextDepth     = depth + 1;
+            TComDataCU* subBestPartCU = m_bestCU[nextDepth];
+            TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+            uint32_t partUnitIdx = 0;
+            for (; partUnitIdx < 4; partUnitIdx++)
+            {
+                subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
 
-            if (bInsidePicture ||
-                ((subBestPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
-                 (subBestPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples())))
-            {
-                subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
-                if (0 == partUnitIdx) //initialize RD with previous depth buffer
+                if (bInsidePicture ||
+                    ((subBestPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
+                    (subBestPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples())))
                 {
-                    m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
+                    subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth); // clear sub partition datas or init.
+                    if (0 == partUnitIdx) //initialize RD with previous depth buffer
+                    {
+                        m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
+                    }
+                    else
+                    {
+                        m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
+                    }
+
+                    xCompressCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture);
+                    outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
+                    xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
                 }
                 else
                 {
-                    m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
-                }
-
-                xCompressCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture);
-                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
-                xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
-            }
-            else
-            {
-                subBestPartCU->copyToPic(nextDepth);
-                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
-            }
-        }
-
-        if (bInsidePicture)
-        {
-            m_entropyCoder->resetBits();
-            m_entropyCoder->encodeSplitFlag(outTempCU, 0, depth);
-            outTempCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
-        }
-        outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
-
-        if ((g_maxCUSize >> depth) == slice->getPPS()->getMinCuDQPSize() && slice->getPPS()->getUseDQP())
-        {
-            bool hasResidual = false;
-            for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
-            {
-                if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
-                    outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
-                {
-                    hasResidual = true;
-                    break;
+                    subBestPartCU->copyToPic(nextDepth);
+                    outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
                 }
             }
 
-            uint32_t targetPartIdx = 0;
-            if (hasResidual)
+            if (bInsidePicture)
             {
-                bool foundNonZeroCbf = false;
-                outTempCU->setQPSubCUs(outTempCU->getRefQP(targetPartIdx), outTempCU, 0, depth, foundNonZeroCbf);
-                assert(foundNonZeroCbf);
+                m_entropyCoder->resetBits();
+                m_entropyCoder->encodeSplitFlag(outTempCU, 0, depth);
+                outTempCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
             }
-            else
+            outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
+
+            if ((g_maxCUSize >> depth) == slice->getPPS()->getMinCuDQPSize() && slice->getPPS()->getUseDQP())
             {
-                outTempCU->setQPSubParts(outTempCU->getRefQP(targetPartIdx), 0, depth); // set QP to default QP
+                bool hasResidual = false;
+                for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
+                {
+                    if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
+                        outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
+                    {
+                        hasResidual = true;
+                        break;
+                    }
+                }
+
+                uint32_t targetPartIdx = 0;
+                if (hasResidual)
+                {
+                    bool foundNonZeroCbf = false;
+                    outTempCU->setQPSubCUs(outTempCU->getRefQP(targetPartIdx), outTempCU, 0, depth, foundNonZeroCbf);
+                    assert(foundNonZeroCbf);
+                }
+                else
+                {
+                    outTempCU->setQPSubParts(outTempCU->getRefQP(targetPartIdx), 0, depth); // set QP to default QP
+                }
             }
+
+            m_rdSbacCoders[nextDepth][CI_NEXT_BEST]->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
+            xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
         }
-
-        m_rdSbacCoders[nextDepth][CI_NEXT_BEST]->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
-        xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
     }
     outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
 
@@ -1215,6 +1276,7 @@
     TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists
     uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS];
     int numValidMergeCand = 0;
+    bool bTransquantBypassFlag = outTempCU->getCUTransquantBypass(0);
 
     for (uint32_t i = 0; i < outTempCU->getSlice()->getMaxNumMergeCand(); ++i)
     {
@@ -1223,7 +1285,7 @@
 
     uint8_t depth = outTempCU->getDepth(0);
     outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth); // interprets depth relative to LCU level
-    outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
+    
     outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, numValidMergeCand);
 
     int mergeCandBuffer[MRG_MAX_NUM_CANDS];
@@ -1260,7 +1322,7 @@
                 {
                     // set MC parameters
                     outTempCU->setPredModeSubParts(MODE_INTER, 0, depth); // interprets depth relative to LCU level
-                    outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
+                    outTempCU->setCUTransquantBypassSubParts(bTransquantBypassFlag, 0, depth);
                     outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth); // interprets depth relative to LCU level
                     outTempCU->setMergeFlag(0, true);
                     outTempCU->setMergeIndex(0, mergeCand);
@@ -1352,7 +1414,7 @@
     outTempCU->setSkipFlagSubParts(false, 0, depth);
     outTempCU->setPartSizeSubParts(partSize, 0, depth);
     outTempCU->setPredModeSubParts(MODE_INTER, 0, depth);
-    outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
+    
 
     m_tmpRecoYuv[depth]->clear(); // TODO: Are either of these clears necessary?
     m_tmpResiYuv[depth]->clear();
@@ -1372,7 +1434,7 @@
     outTempCU->setSkipFlagSubParts(false, 0, depth);
     outTempCU->setPartSizeSubParts(partSize, 0, depth);
     outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
-    outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
+    
 
     m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
 
@@ -1411,7 +1473,7 @@
     outTempCU->setSkipFlagSubParts(false, 0, depth);
     outTempCU->setPartSizeSubParts(partSize, 0, depth);
     outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
-    outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
+    
 
     m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
 
@@ -1460,7 +1522,7 @@
     outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth);
     outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
     outTempCU->setTrIdxSubParts(0, 0, depth);
-    outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
+    
 
     m_search->IPCMSearch(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
 
@@ -1567,8 +1629,8 @@
     pixel* dstCr = cu->getPCMSampleCr();
 
     uint32_t srcStrideC = fencYuv->getCStride();
-    uint32_t heightC = height >> 1;
-    uint32_t widthC = width >> 1;
+    uint32_t widthC  = width  >> cu->getHorzChromaShift();
+    uint32_t heightC = height >> cu->getVertChromaShift();
 
     for (int y = 0; y < heightC; y++)
     {
diff -r a4d0d5679c28 -r 8ce774039d12 source/common/param.cpp
--- a/source/common/param.cpp	Sun May 11 17:32:37 2014 +0900
+++ b/source/common/param.cpp	Tue May 13 17:23:32 2014 +0530
@@ -127,6 +127,10 @@
     param->bEnableConstrainedIntra = 0;
     param->bEnableStrongIntraSmoothing = 1;
 
+    /* Transquant Bypass */
+    param->bEnableTransquantBypass = 0;
+    param->bEnableCUTransquantBypass = 0;
+
     /* Inter Coding tools */
     param->searchMethod = X265_HEX_SEARCH;
     param->subpelRefine = 2;
@@ -548,6 +552,8 @@
     OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value);
     OPT("tskip-fast") p->bEnableTSkipFast = atobool(value);
     OPT("strong-intra-smoothing") p->bEnableStrongIntraSmoothing = atobool(value);
+    OPT("transquant-bypass") p->bEnableTransquantBypass = atobool(value);
+    OPT("cu-transquant-bypass") p->bEnableCUTransquantBypass = atobool(value);
     OPT("constrained-intra") p->bEnableConstrainedIntra = atobool(value);
     OPT("open-gop") p->bOpenGOP = atobool(value);
     OPT("scenecut")
@@ -890,6 +896,9 @@
     CHECK(param->maxNumReferences < 1, "maxNumReferences must be 1 or greater.");
     CHECK(param->maxNumReferences > MAX_NUM_REF, "maxNumReferences must be 16 or smaller.");
 
+    CHECK((param->bEnableTransquantBypass == 0 && param->bEnableCUTransquantBypass == 1) || param->rdLevel < 5,
+          "TransquantBypass flag must be enabled if CUTransquantBypass flag is signalled and RD Level must be greater then 5.");
+
     CHECK(param->sourceWidth < (int)param->maxCUSize || param->sourceWidth < (int)param->maxCUSize,
           "Picture size must be at least one CTU");
     CHECK(param->internalCsp < X265_CSP_I420 || X265_CSP_I444 < param->internalCsp,
@@ -1060,7 +1069,10 @@
     }
     x265_log(param, X265_LOG_INFO, "Lookahead / bframes / badapt        : %d / %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
     x265_log(param, X265_LOG_INFO, "b-pyramid / weightp / weightb / refs: %d / %d / %d / %d\n",
-             param->bBPyramid, param->bEnableWeightedPred, param->bEnableWeightedBiPred, param->maxNumReferences);
+        param->bBPyramid, param->bEnableWeightedPred, param->bEnableWeightedBiPred, param->maxNumReferences);
+
+    x265_log(param, X265_LOG_INFO, "transquant-bypass / cu-transquant-bypass: %d / %d\n", param->bEnableTransquantBypass, param->bEnableCUTransquantBypass);
+
     switch (param->rc.rateControlMode)
     {
     case X265_RC_ABR:
@@ -1140,6 +1152,8 @@
     BOOL(p->bEnableTransformSkip, "tskip");
     BOOL(p->bEnableTSkipFast, "tskip-fast");
     BOOL(p->bEnableStrongIntraSmoothing, "strong-intra-smoothing");
+    BOOL(p->bEnableTransquantBypass, "transquant-bypass");
+    BOOL(p->bEnableCUTransquantBypass, "cu-transquant-bypass");
     BOOL(p->bEnableConstrainedIntra, "constrained-intra");
     BOOL(p->bOpenGOP, "open-gop");
     s += sprintf(s, " interlace=%d", p->interlaceMode);
diff -r a4d0d5679c28 -r 8ce774039d12 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Sun May 11 17:32:37 2014 +0900
+++ b/source/encoder/encoder.cpp	Tue May 13 17:23:32 2014 +0530
@@ -62,6 +62,8 @@
     m_numChromaWPFrames = 0;
     m_numLumaWPBiFrames = 0;
     m_numChromaWPBiFrames = 0;
+    m_TransquantBypassEnableFlag = false;
+    m_CUTransquantBypassFlagValue = false;
     m_lookahead = NULL;
     m_frameEncoder = NULL;
     m_rateControl = NULL;
@@ -1442,8 +1444,14 @@
     m_bPCMFilterDisableFlag = false;
 
     m_useLossless = false;  // x264 configures this via --qp=0
-    m_TransquantBypassEnableFlag = false;
-    m_CUTransquantBypassFlagValue = false;
+    if (p->bEnableTransquantBypass)
+    {
+        m_TransquantBypassEnableFlag  = true;
+    }
+    if (p->bEnableCUTransquantBypass)
+    {
+        m_CUTransquantBypassFlagValue = true;
+    }
 }
 
 int Encoder::extractNalData(NALUnitEBSP **nalunits, int& memsize)
diff -r a4d0d5679c28 -r 8ce774039d12 source/x265.cpp
--- a/source/x265.cpp	Sun May 11 17:32:37 2014 +0900
+++ b/source/x265.cpp	Tue May 13 17:23:32 2014 +0530
@@ -157,6 +157,10 @@
     { "strong-intra-smoothing",    no_argument, NULL, 0 },
     { "no-cutree",                 no_argument, NULL, 0 },
     { "cutree",                    no_argument, NULL, 0 },
+    { "transquant-bypass",       no_argument, NULL, 0 },
+    { "no-transquant-bypass",    no_argument, NULL, 0 },
+    { "cu-transquant-bypass",    no_argument, NULL, 0 },
+    { "no-cu-transquant-bypass", no_argument, NULL, 0 },
     { "sar",            required_argument, NULL, 0 },
     { "overscan",       required_argument, NULL, 0 },
     { "videoformat",    required_argument, NULL, 0 },
@@ -362,6 +366,8 @@
     H0("   --ref <integer>               max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
     H0("-w/--[no-]weightp                Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred));
     H0("   --[no-]weightb                Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred));
+    H0("   --[no-]transquant-bypass      Enable transquant bypass flag. Default %s\n", OPT(param->bEnableTransquantBypass));
+    H0("   --[no-]cu-transquant-bypass   Scaling, transform and in-loop filter process are bypassed. Default %s\n", OPT(param->bEnableCUTransquantBypass));
     H0("\nRate control and rate distortion options:\n");
     H0("   --bitrate <integer>           Target bitrate (kbps), implies ABR. Default %d\n", param->rc.bitrate);
     H0("   --crf <float>                 Quality-based VBR (0-51). Default %f\n", param->rc.rfConstant);
diff -r a4d0d5679c28 -r 8ce774039d12 source/x265.h
--- a/source/x265.h	Sun May 11 17:32:37 2014 +0900
+++ b/source/x265.h	Tue May 13 17:23:32 2014 +0530
@@ -635,6 +635,9 @@
      * Default is 0, which is recommended */
     int       crQpOffset;
 
+    int bEnableTransquantBypass;    /* transquant_bypass_enable_flag setting */
+    int bEnableCUTransquantBypass;  /* if transquant_bypass_enable_flag is set, then all CU transquant bypass flags will be set to true. */
+
     /*== Rate Control ==*/
 
     struct



More information about the x265-devel mailing list