[x265] [PATCH] Separated the logic block of code as standalone function for vectorization

Tue Jun 25 14:41:28 CEST 2013

# HG changeset patch
# User praveentiwari
# Date 1372164077 -19800
# Node ID 08a1bf9ba9bcd79e62059615f99aa86fee0d4f79
# Parent  4e7a3fc1c49854ab89de035b6bc9690627463d69
Separated the logic block of code as standalone function for vectorization

diff -r 4e7a3fc1c498 -r 08a1bf9ba9bc source/Lib/TLibCommon/TComTrQuant.cpp

--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Tue Jun 25 17:38:30 2013 +0530
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Tue Jun 25 18:11:17 2013 +0530
@@ -739,6 +739,43 @@
     } // TU loop
 }
 
+void xCalQuantCoef(Int *  piCoef,
+                   Int *  piQuantCoeff,
+                   Int *  deltaU,
+                   Int *  piQCoef,
+                   Int *  piArlCCoef,
+                   UInt & uiAcSum,
+                   Int    iQBitsC,
+                   Int    iQBits,
+                   Int    iAdd,
+                   Int    iWidth,
+                   Int    iHeight,
+                   Bool   m_bUseAdaptQpSelect)
+{
+    Int iAddC   = 1 << (iQBitsC - 1);
+    Int qBits8 = iQBits - 8;
+
+    for (Int n = 0; n < iWidth * iHeight; n++)
+    {
+        Int iLevel;
+        Int  iSign;
+        UInt uiBlockPos = n;
+        iLevel  = piCoef[uiBlockPos];
+        iSign   = (iLevel < 0 ? -1 : 1);
+
+        Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
+        if (m_bUseAdaptQpSelect)
+        {
+            piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC) >> iQBitsC);
+        }
+        iLevel = (Int)((tmpLevel + iAdd) >> iQBits);
+        deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel << iQBits)) >> qBits8);
+        uiAcSum += iLevel;
+        iLevel *= iSign;
+        piQCoef[uiBlockPos] = Clip3(-32768, 32767, iLevel);
+    }
+}
+
 Void TComTrQuant::xQuant(TComDataCU* pcCU,
                          Int*        pSrc,
                          TCoeff*     pDes,
@@ -818,28 +855,8 @@
         Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
         iAdd = (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) << (iQBits - 9);
         Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
-        Int iAddC   = 1 << (iQBitsC - 1);
 
-        Int qBits8 = iQBits - 8;
-        for (Int n = 0; n < iWidth * iHeight; n++)
-        {
-            Int iLevel;
-            Int  iSign;
-            UInt uiBlockPos = n;
-            iLevel  = piCoef[uiBlockPos];
-            iSign   = (iLevel < 0 ? -1 : 1);
-
-            Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
-            if (m_bUseAdaptQpSelect)
-            {
-                piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC) >> iQBitsC);
-            }
-            iLevel = (Int)((tmpLevel + iAdd) >> iQBits);
-            deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel << iQBits)) >> qBits8);
-            uiAcSum += iLevel;
-            iLevel *= iSign;
-            piQCoef[uiBlockPos] = Clip3(-32768, 32767, iLevel);
-        } // for n
+        xCalQuantCoef(piCoef, piQuantCoeff, deltaU, piQCoef, piArlCCoef, uiAcSum, iQBitsC, iQBits, iAdd, iWidth, iHeight, m_bUseAdaptQpSelect);
 
         if (pcCU->getSlice()->getPPS()->getSignHideFlag())
         {
@@ -976,7 +993,7 @@
         assert(bitDepth == 8);
 
         const UInt uiLog2BlockSize = g_aucConvertToBit[uiWidth];
-        x265::primitives.dct[x265::DCT_4x4 + uiLog2BlockSize - ((uiWidth==4) && (uiMode != REG_DCT))](pcResidual, m_plTempCoeff, uiStride);
+        x265::primitives.dct[x265::DCT_4x4 + uiLog2BlockSize - ((uiWidth == 4) && (uiMode != REG_DCT))](pcResidual, m_plTempCoeff, uiStride);
 
         assert(uiWidth == uiHeight);
     }
@@ -1021,10 +1038,10 @@
     else
     {
         // ChECK_ME: I assume we don't use HIGH_BIT_DEPTH here
-        assert( bitDepth == 8 );
+        assert(bitDepth == 8);
 
         const UInt uiLog2BlockSize = g_aucConvertToBit[uiWidth];
-        x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((uiWidth==4) && (uiMode != REG_DCT))](m_plTempCoeff, rpcResidual, uiStride);
+        x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((uiWidth == 4) && (uiMode != REG_DCT))](m_plTempCoeff, rpcResidual, uiStride);
     }
 }
 
@@ -1093,11 +1110,11 @@
 Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Short* pResidual, UInt uiStride, Int iWidth, Int iHeight)
 {
     // ChECK_ME: I assume we don't use HIGH_BIT_DEPTH here
-    assert( bitDepth == 8 );
+    assert(bitDepth == 8);
 
     //xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode);
     const UInt uiLog2BlockSize = g_aucConvertToBit[iWidth];
-    x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((iWidth==4) && (uiMode != REG_DCT))](plCoef, pResidual, uiStride);
+    x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((iWidth == 4) && (uiMode != REG_DCT))](plCoef, pResidual, uiStride);
 }
 
 /** Wrapper function between HM interface and core 4x4 transform skipping
@@ -1850,12 +1867,12 @@
  * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
  * \returns cost of given absolute transform level
  */
-__inline Double TComTrQuant::xGetICRateCost(UInt uiAbsLevel,
+__inline Double TComTrQuant::xGetICRateCost(UInt   uiAbsLevel,
                                             UShort ui16CtxNumOne,
                                             UShort ui16CtxNumAbs,
                                             UShort ui16AbsGoRice,
-                                            UInt c1Idx,
-                                            UInt c2Idx) const
+                                            UInt   c1Idx,
+                                            UInt   c2Idx) const
 {
     Double iRate = xGetIEPRate();
     UInt baseLevel  =  (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;