[x265] [PATCH] quant.cpp: use 'rdoQuant_c' primitive to optimize rdoQuant path
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Nov 28 11:06:33 CET 2017
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1511851222 -19800
# Tue Nov 28 12:10:22 2017 +0530
# Node ID d732ca2095defdbf42748327006083befb30a89e
# Parent 4d242c555d14ca8214d9da89cef41c4418af4dca
quant.cpp: use 'rdoQuant_c' primitive to optimize rdoQuant path
diff -r 4d242c555d14 -r d732ca2095de source/common/quant.cpp
--- a/source/common/quant.cpp Tue Nov 28 11:43:00 2017 +0530
+++ b/source/common/quant.cpp Tue Nov 28 12:10:22 2017 +0530
@@ -803,20 +803,14 @@
if (usePsyMask)
{
- // TODO: we can't SIMD optimize because PSYVALUE need 64-bits multiplication, convert to Double can work faster by FMA
+ // Expected to work faster by FMA SIMD
+ primitives.rdoQuant(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, psyScale, blkPos, log2TrSize);
+ blkPos = codeParams.scan[scanPosBase];
+
for (int y = 0; y < MLS_CG_SIZE; y++)
{
for (int x = 0; x < MLS_CG_SIZE; x++)
{
- int signCoef = m_resiDctCoeff[blkPos + x]; /* pre-quantization DCT coeff */
- int predictedCoef = m_fencDctCoeff[blkPos + x] - signCoef; /* predicted DCT = source DCT - residual DCT*/
- costUncoded[blkPos + x] = static_cast<double>(((int64_t)signCoef * signCoef) << scaleBits);
- /* when no residual coefficient is coded, predicted coef == recon coef */
- costUncoded[blkPos + x] -= PSYVALUE(predictedCoef);
-
- totalUncodedCost += costUncoded[blkPos + x];
- totalRdCost += costUncoded[blkPos + x];
-
const uint32_t scanPosOffset = y * MLS_CG_SIZE + x;
const uint32_t ctxSig = table_cnt[patternSigCtx][g_scan4x4[codeParams.scanType][scanPosOffset]] + ctxSigOffset;
X265_CHECK(trSize > 4, "trSize check failure\n");
More information about the x265-devel
mailing list