[x265] [PATCH 3 of 6] idct: Improved performance by DC only block detect

Fri Aug 16 12:52:33 CEST 2013

# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1376650198 -28800
# Node ID 0b225ee24b5d5271e0bbdc366e2bcb2c320bffe4
# Parent  4be95d676094ee2ec88c63fa2620ab6f037234b5
idct: Improved performance by DC only block detect

diff -r 4be95d676094 -r 0b225ee24b5d source/Lib/TLibCommon/TComTrQuant.cpp

--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Fri Aug 16 18:49:44 2013 +0800
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Fri Aug 16 18:49:58 2013 +0800
@@ -445,7 +445,7 @@
     return xQuant(cu, m_tmpCoeff, coeff, width, height, ttype, absPartIdx, lastPos);
 }
 
-Void TComTrQuant::invtransformNxN( Bool transQuantBypass, UInt mode, Short* residual, UInt stride, TCoeff* coeff, UInt width, UInt height, Int scalingListType, Bool useTransformSkip /*= false*/ )
+Void TComTrQuant::invtransformNxN( Bool transQuantBypass, UInt mode, Short* residual, UInt stride, TCoeff* coeff, UInt width, UInt height, Int scalingListType, Bool useTransformSkip /*= false*/, int lastPos )
 {
     if (transQuantBypass)
     {
@@ -474,6 +474,27 @@
     }
     else
     {
+        // CHECK_ME: we can't here when no any coeff
+        assert(lastPos >= 0);
+
+#if !HIGH_BIT_DEPTH
+        // DC only
+        if (lastPos == 0 && !((width == 4) && (mode != REG_DCT)))
+        {
+            int dc = (((m_tmpCoeff[0] * 64 + 64) >> 7) * 64 + 2048) >> 12;
+
+            for(int i = 0; i < width; i++)
+            {
+                for(int j = 0; j < width; j++)
+                {
+                    residual[i * stride + j] = dc;
+                }
+            }
+
+            return;
+        }
+#endif
+
         // TODO: this may need larger data types for X265_DEPTH > 8
         const UInt log2BlockSize = g_convertToBit[width];
         x265::primitives.idct[x265::IDCT_4x4 + log2BlockSize - ((width == 4) && (mode != REG_DCT))](m_tmpCoeff, residual, stride);
diff -r 4be95d676094 -r 0b225ee24b5d source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h	Fri Aug 16 18:49:44 2013 +0800
+++ b/source/Lib/TLibCommon/TComTrQuant.h	Fri Aug 16 18:49:58 2013 +0800
@@ -127,7 +127,7 @@
     UInt transformNxN(TComDataCU* cu, Short* residual, UInt stride, TCoeff* coeff, UInt width, UInt height,
                       TextType ttype, UInt absPartIdx, int* lastPos, Bool useTransformSkip = false);
 
-    Void invtransformNxN(Bool transQuantBypass, UInt mode, Short* residual, UInt stride, TCoeff* coeff, UInt width, UInt height, Int scalingListType, Bool useTransformSkip = false);
+    Void invtransformNxN(Bool transQuantBypass, UInt mode, Short* residual, UInt stride, TCoeff* coeff, UInt width, UInt height, Int scalingListType, Bool useTransformSkip = false, int lastPos = MAX_INT);
 
     Void invRecurTransformNxN(TComDataCU* cu, UInt absPartIdx, TextType ttype, Short* residual, UInt addr, UInt stride,
                               UInt width, UInt height, UInt maxTrMode, UInt trMode, TCoeff* coeff);
diff -r 4be95d676094 -r 0b225ee24b5d source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Fri Aug 16 18:49:44 2013 +0800
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Fri Aug 16 18:49:58 2013 +0800
@@ -555,7 +555,7 @@
     {
         Int scalingListType = 0 + g_eTTable[(Int)TEXT_LUMA];
         assert(scalingListType < 6);
-        m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, width, height, scalingListType, useTransformSkip);
+        m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, width, height, scalingListType, useTransformSkip, lastPos);
     }
     else
     {