<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Jun 25, 2013 at 7:41 AM,  <span dir="ltr"><<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex"># HG changeset patch<br>
# User praveentiwari<br>
# Date 1372164077 -19800<br>
# Node ID 08a1bf9ba9bcd79e62059615f99aa86fee0d4f79<br>
# Parent  4e7a3fc1c49854ab89de035b6bc9690627463d69<br>
Separated the logic block of code as standalone function for vectorization<br>
<br>
diff -r 4e7a3fc1c498 -r 08a1bf9ba9bc source/Lib/TLibCommon/TComTrQuant.cpp<br>
--- a/source/Lib/TLibCommon/TComTrQuant.cpp     Tue Jun 25 17:38:30 2013 +0530<br>
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp     Tue Jun 25 18:11:17 2013 +0530<br>
@@ -739,6 +739,43 @@<br>
     } // TU loop<br>
 }<br></blockquote><div><br></div><div style>This should be a C-ref primitive in a file in common/, either dct.cpp or in a new quant.cpp.</div><div style><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+void xCalQuantCoef(Int *  piCoef,<br>
+                   Int *  piQuantCoeff,<br>
+                   Int *  deltaU,<br>
+                   Int *  piQCoef,<br>
+                   Int *  piArlCCoef,<br>
+                   UInt & uiAcSum,<br>
+                   Int    iQBitsC,<br>
+                   Int    iQBits,<br>
+                   Int    iAdd,<br>
+                   Int    iWidth,<br>
+                   Int    iHeight,<br>
+                   Bool   m_bUseAdaptQpSelect)<br>
</blockquote><div><br></div><div><div>Drop the m_ prefix, that function argument is not a member variable.   And our performance primitives need to be "cleaned" of the HM style.  This means drop all the hungarian notation prefixes ("pi for pointer to integer", "ui prefix for unsigned integer", "i for signed integer"). For example, iQBitsC -> qbitsc.</div>
<div> </div></div><div>The accumulated sum should be the return value from this primitive, not passed as a reference.</div><div><br></div><div style>It doesn't seem necessary to pass width and height, it only needs the total block size (width * height).</div>
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">+{<br>
+    Int iAddC   = 1 << (iQBitsC - 1);<br>
+    Int qBits8 = iQBits - 8;<br>
+<br>
+    for (Int n = 0; n < iWidth * iHeight; n++)<br>
+    {<br>
+        Int iLevel;<br>
+        Int  iSign;<br>
+        UInt uiBlockPos = n;<br></blockquote><div><br></div><div style>just use n.  declaring uiBlockPos here is just silly (and yes I know this came directly from the HM routine).</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">

+        iLevel  = piCoef[uiBlockPos];<br>
+        iSign   = (iLevel < 0 ? -1 : 1);<br>
+<br>
+        Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];<br>
+        if (m_bUseAdaptQpSelect)<br>
+        {<br>
+            piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC) >> iQBitsC);<br>
+        }<br>
+        iLevel = (Int)((tmpLevel + iAdd) >> iQBits);<br>
+        deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel << iQBits)) >> qBits8);<br>
+        uiAcSum += iLevel;<br>
+        iLevel *= iSign;<br>
+        piQCoef[uiBlockPos] = Clip3(-32768, 32767, iLevel);<br>
+    }<br>
+}<br><br></blockquote><div><br></div><div style>I think there should be two primitives for Quant, one with adaptive QP select, and one without.  The one without should have a lot fewer arguments.</div><div><br></div><div>
 </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
 Void TComTrQuant::xQuant(TComDataCU* pcCU,<br>
                          Int*        pSrc,<br>
                          TCoeff*     pDes,<br>
@@ -818,28 +855,8 @@<br>
         Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;<br>
         iAdd = (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) << (iQBits - 9);<br>
         Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;<br>
-        Int iAddC   = 1 << (iQBitsC - 1);<br>
<br>
-        Int qBits8 = iQBits - 8;<br>
-        for (Int n = 0; n < iWidth * iHeight; n++)<br>
-        {<br>
-            Int iLevel;<br>
-            Int  iSign;<br>
-            UInt uiBlockPos = n;<br>
-            iLevel  = piCoef[uiBlockPos];<br>
-            iSign   = (iLevel < 0 ? -1 : 1);<br>
-<br>
-            Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];<br>
-            if (m_bUseAdaptQpSelect)<br>
-            {<br>
-                piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC) >> iQBitsC);<br>
-            }<br>
-            iLevel = (Int)((tmpLevel + iAdd) >> iQBits);<br>
-            deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel << iQBits)) >> qBits8);<br>
-            uiAcSum += iLevel;<br>
-            iLevel *= iSign;<br>
-            piQCoef[uiBlockPos] = Clip3(-32768, 32767, iLevel);<br>
-        } // for n<br>
+        xCalQuantCoef(piCoef, piQuantCoeff, deltaU, piQCoef, piArlCCoef, uiAcSum, iQBitsC, iQBits, iAdd, iWidth, iHeight, m_bUseAdaptQpSelect);<br>
<br>
         if (pcCU->getSlice()->getPPS()->getSignHideFlag())<br>
         {<br>
@@ -976,7 +993,7 @@<br>
         assert(bitDepth == 8);<br>
<br>
         const UInt uiLog2BlockSize = g_aucConvertToBit[uiWidth];<br></blockquote><div><br></div><div style>This patch has a number of white-space fixes; they need to be separated into their own patch.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">

-        x265::primitives.dct[x265::DCT_4x4 + uiLog2BlockSize - ((uiWidth==4) && (uiMode != REG_DCT))](pcResidual, m_plTempCoeff, uiStride);<br>
+        x265::primitives.dct[x265::DCT_4x4 + uiLog2BlockSize - ((uiWidth == 4) && (uiMode != REG_DCT))](pcResidual, m_plTempCoeff, uiStride);<br>
<br>
         assert(uiWidth == uiHeight);<br>
     }<br>
@@ -1021,10 +1038,10 @@<br>
     else<br>
     {<br>
         // ChECK_ME: I assume we don't use HIGH_BIT_DEPTH here<br>
-        assert( bitDepth == 8 );<br>
+        assert(bitDepth == 8);<br>
<br>
         const UInt uiLog2BlockSize = g_aucConvertToBit[uiWidth];<br>
-        x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((uiWidth==4) && (uiMode != REG_DCT))](m_plTempCoeff, rpcResidual, uiStride);<br>
+        x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((uiWidth == 4) && (uiMode != REG_DCT))](m_plTempCoeff, rpcResidual, uiStride);<br>
     }<br>
 }<br>
<br>
@@ -1093,11 +1110,11 @@<br>
 Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Short* pResidual, UInt uiStride, Int iWidth, Int iHeight)<br>
 {<br>
     // ChECK_ME: I assume we don't use HIGH_BIT_DEPTH here<br>
-    assert( bitDepth == 8 );<br>
+    assert(bitDepth == 8);<br>
<br>
     //xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode);<br>
     const UInt uiLog2BlockSize = g_aucConvertToBit[iWidth];<br>
-    x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((iWidth==4) && (uiMode != REG_DCT))](plCoef, pResidual, uiStride);<br>
+    x265::primitives.idct[x265::IDCT_4x4 + uiLog2BlockSize - ((iWidth == 4) && (uiMode != REG_DCT))](plCoef, pResidual, uiStride);<br>
 }<br>
<br>
 /** Wrapper function between HM interface and core 4x4 transform skipping<br>
@@ -1850,12 +1867,12 @@<br>
  * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3<br>
  * \returns cost of given absolute transform level<br>
  */<br>
-__inline Double TComTrQuant::xGetICRateCost(UInt uiAbsLevel,<br>
+__inline Double TComTrQuant::xGetICRateCost(UInt   uiAbsLevel,<br>
                                             UShort ui16CtxNumOne,<br>
                                             UShort ui16CtxNumAbs,<br>
                                             UShort ui16AbsGoRice,<br>
-                                            UInt c1Idx,<br>
-                                            UInt c2Idx) const<br>
+                                            UInt   c1Idx,<br>
+                                            UInt   c2Idx) const<br>
 {<br>
     Double iRate = xGetIEPRate();<br>
     UInt baseLevel  =  (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="http://mailman.videolan.org/listinfo/x265-devel" target="_blank">http://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>