[x265] [PATCH] quant.cpp: use 'nonPsyRdoQuant_c' primitive to optimize rdoQuant path
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Nov 28 11:07:18 CET 2017
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1511855234 -19800
# Tue Nov 28 13:17:14 2017 +0530
# Node ID 85970193df47aa5da685efc27aaef04d9f7f21a0
# Parent d732ca2095defdbf42748327006083befb30a89e
quant.cpp: use 'nonPsyRdoQuant_c' primitive to optimize rdoQuant path
diff -r d732ca2095de -r 85970193df47 source/common/quant.cpp
--- a/source/common/quant.cpp Tue Nov 28 12:10:22 2017 +0530
+++ b/source/common/quant.cpp Tue Nov 28 13:17:14 2017 +0530
@@ -824,16 +824,14 @@
}
else
{
- // non-psy path
+ // non-psy path - expected to work faster by FMA SIMD
+ primitives.nonPsyRdoQuant(m_resiDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, blkPos, log2TrSize);
+ blkPos = codeParams.scan[scanPosBase];
+
for (int y = 0; y < MLS_CG_SIZE; y++)
{
for (int x = 0; x < MLS_CG_SIZE; x++)
{
- int signCoef = m_resiDctCoeff[blkPos + x]; /* pre-quantization DCT coeff */
- costUncoded[blkPos + x] = static_cast<double>(((int64_t)signCoef * signCoef) << scaleBits);
- totalUncodedCost += costUncoded[blkPos + x];
- totalRdCost += costUncoded[blkPos + x];
-
const uint32_t scanPosOffset = y * MLS_CG_SIZE + x;
const uint32_t ctxSig = table_cnt[patternSigCtx][g_scan4x4[codeParams.scanType][scanPosOffset]] + ctxSigOffset;
X265_CHECK(trSize > 4, "trSize check failure\n");
More information about the x265-devel
mailing list