[x265] [PATCH] quant.cpp: use 'rdoQuant_c' primitive to optimize rdoQuant path

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Nov 28 11:06:33 CET 2017


# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1511851222 -19800
#      Tue Nov 28 12:10:22 2017 +0530
# Node ID d732ca2095defdbf42748327006083befb30a89e
# Parent  4d242c555d14ca8214d9da89cef41c4418af4dca
quant.cpp: use 'rdoQuant_c' primitive to optimize rdoQuant path

diff -r 4d242c555d14 -r d732ca2095de source/common/quant.cpp
--- a/source/common/quant.cpp	Tue Nov 28 11:43:00 2017 +0530
+++ b/source/common/quant.cpp	Tue Nov 28 12:10:22 2017 +0530
@@ -803,20 +803,14 @@
 
             if (usePsyMask)
             {
-                // TODO: we can't SIMD optimize because PSYVALUE need 64-bits multiplication, convert to Double can work faster by FMA
+                // Expected to work faster by FMA SIMD
+                primitives.rdoQuant(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, psyScale, blkPos, log2TrSize);
+                blkPos = codeParams.scan[scanPosBase];
+
                 for (int y = 0; y < MLS_CG_SIZE; y++)
                 {
                     for (int x = 0; x < MLS_CG_SIZE; x++)
                     {
-                        int signCoef         = m_resiDctCoeff[blkPos + x];            /* pre-quantization DCT coeff */
-                        int predictedCoef    = m_fencDctCoeff[blkPos + x] - signCoef; /* predicted DCT = source DCT - residual DCT*/
-                        costUncoded[blkPos + x] = static_cast<double>(((int64_t)signCoef * signCoef) << scaleBits);
-                        /* when no residual coefficient is coded, predicted coef == recon coef */
-                        costUncoded[blkPos + x] -= PSYVALUE(predictedCoef);
-
-                        totalUncodedCost += costUncoded[blkPos + x];
-                        totalRdCost += costUncoded[blkPos + x];
-
                         const uint32_t scanPosOffset =  y * MLS_CG_SIZE + x;
                         const uint32_t ctxSig = table_cnt[patternSigCtx][g_scan4x4[codeParams.scanType][scanPosOffset]] + ctxSigOffset;
                         X265_CHECK(trSize > 4, "trSize check failure\n");


More information about the x265-devel mailing list