[x265] [PATCH] quant.cpp: use 'nonPsyRdoQuant_c' primitive to optimize rdoQuant path

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Nov 28 11:07:18 CET 2017


# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1511855234 -19800
#      Tue Nov 28 13:17:14 2017 +0530
# Node ID 85970193df47aa5da685efc27aaef04d9f7f21a0
# Parent  d732ca2095defdbf42748327006083befb30a89e
quant.cpp: use 'nonPsyRdoQuant_c' primitive to optimize rdoQuant path

diff -r d732ca2095de -r 85970193df47 source/common/quant.cpp
--- a/source/common/quant.cpp	Tue Nov 28 12:10:22 2017 +0530
+++ b/source/common/quant.cpp	Tue Nov 28 13:17:14 2017 +0530
@@ -824,16 +824,14 @@
             }
             else
             {
-                // non-psy path
+                // non-psy path - expected to work faster by FMA SIMD
+                primitives.nonPsyRdoQuant(m_resiDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, blkPos, log2TrSize);
+                blkPos = codeParams.scan[scanPosBase];
+
                 for (int y = 0; y < MLS_CG_SIZE; y++)
                 {
                     for (int x = 0; x < MLS_CG_SIZE; x++)
                     {
-                        int signCoef = m_resiDctCoeff[blkPos + x];            /* pre-quantization DCT coeff */
-                        costUncoded[blkPos + x] = static_cast<double>(((int64_t)signCoef * signCoef) << scaleBits);
-                        totalUncodedCost += costUncoded[blkPos + x];
-                        totalRdCost += costUncoded[blkPos + x];
-
                         const uint32_t scanPosOffset =  y * MLS_CG_SIZE + x;
                         const uint32_t ctxSig = table_cnt[patternSigCtx][g_scan4x4[codeParams.scanType][scanPosOffset]] + ctxSigOffset;
                         X265_CHECK(trSize > 4, "trSize check failure\n");


More information about the x265-devel mailing list