[x265] [PATCH 8 of 8] modify findPosFirstLast API to output absSubSign
Min Chen
chenm003 at 163.com
Thu Oct 1 02:48:48 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1443656120 18000
# Node ID a6a56cc966f1dc65cdf2c36660e341807f5bf21c
# Parent aa62ce092d1b34cb7037b3d884136d4d647d13a9
modify findPosFirstLast API to output absSubSign
---
source/common/dct.cpp | 14 ++++++++++++--
source/common/quant.cpp | 36 +++++++++++++++++++++---------------
source/common/x86/pixel-util8.asm | 23 ++++++++++++++++++-----
source/test/pixelharness.cpp | 4 ++--
4 files changed, 53 insertions(+), 24 deletions(-)
diff -r aa62ce092d1b -r a6a56cc966f1 source/common/dct.cpp
--- a/source/common/dct.cpp Wed Sep 30 17:32:31 2015 -0500
+++ b/source/common/dct.cpp Wed Sep 30 18:35:20 2015 -0500
@@ -787,11 +787,12 @@
return scanPosLast - 1;
}
+// NOTE: no defined value on lastNZPosInCG & absSumSign when ALL ZEROS block as input
static uint32_t findPosFirstLast_c(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16])
{
int n;
- for (n = SCAN_SET_SIZE - 1; n >= 0; --n)
+ for (n = SCAN_SET_SIZE - 1; n >= 0; n--)
{
const uint32_t idx = scanTbl[n];
const uint32_t idxY = idx / MLS_CG_SIZE;
@@ -815,8 +816,17 @@
uint32_t firstNZPosInCG = (uint32_t)n;
+ uint32_t absSumSign = 0;
+ for (n = firstNZPosInCG; n <= (int)lastNZPosInCG; n++)
+ {
+ const uint32_t idx = scanTbl[n];
+ const uint32_t idxY = idx / MLS_CG_SIZE;
+ const uint32_t idxX = idx % MLS_CG_SIZE;
+ absSumSign += dstCoeff[idxY * trSize + idxX];
+ }
+
// NOTE: when coeff block all ZERO, the lastNZPosInCG is undefined and firstNZPosInCG is 16
- return ((lastNZPosInCG << 16) | firstNZPosInCG);
+ return ((absSumSign << 31) | (lastNZPosInCG << 8) | firstNZPosInCG);
}
diff -r aa62ce092d1b -r a6a56cc966f1 source/common/quant.cpp
--- a/source/common/quant.cpp Wed Sep 30 17:32:31 2015 -0500
+++ b/source/common/quant.cpp Wed Sep 30 18:35:20 2015 -0500
@@ -1232,7 +1232,8 @@
// Average 49.62 pixels
/* clean uncoded coefficients */
- for (int pos = bestLastIdx; pos <= fastMin(lastScanPos, (bestLastIdx | (SCAN_SET_SIZE - 1))); pos++)
+ X265_CHECK((uint32_t)(fastMin(lastScanPos, bestLastIdx) | (SCAN_SET_SIZE - 1)) < trSize * trSize, "array beyond bound\n");
+ for (int pos = bestLastIdx; pos <= (fastMin(lastScanPos, bestLastIdx) | (SCAN_SET_SIZE - 1)); pos++)
{
dstCoeff[codeParams.scan[pos]] = 0;
}
@@ -1262,19 +1263,23 @@
/* measure distance between first and last non-zero coef in this
* coding group */
const uint32_t posFirstLast = primitives.findPosFirstLast(&dstCoeff[codeParams.scan[subPos]], trSize, g_scan4x4[codeParams.scanType]);
- int firstNZPosInCG = (uint16_t)posFirstLast;
- int lastNZPosInCG = posFirstLast >> 16;
-
+ const int firstNZPosInCG = (uint8_t)posFirstLast;
+ const int lastNZPosInCG = (int8_t)(posFirstLast >> 8);
+ const uint32_t absSumSign = posFirstLast;
if (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD)
{
- int absSum = dstCoeff[codeParams.scan[subPos + firstNZPosInCG]];
- const uint32_t signbit = ((uint32_t)absSum >> 31);
+ const int32_t signbit = ((int32_t)dstCoeff[codeParams.scan[subPos + firstNZPosInCG]]);
- for (n = firstNZPosInCG + 1; n <= lastNZPosInCG; n++)
- absSum += dstCoeff[codeParams.scan[n + subPos]];
+#if CHECKED_BUILD || _DEBUG
+ int32_t absSum_dummy = 0;
+ for (n = firstNZPosInCG; n <= lastNZPosInCG; n++)
+ absSum_dummy += dstCoeff[codeParams.scan[n + subPos]];
+ X265_CHECK(((uint32_t)absSum_dummy & 1) == (absSumSign >> 31), "absSumSign check failure\n");
+#endif
- if (signbit != (absSum & 1U))
+ //if (signbit != absSumSign)
+ if (((int32_t)(signbit ^ absSumSign)) < 0)
{
/* We must find a coeff to toggle up or down so the sign bit of the first non-zero coeff
* is properly implied. Note dstCoeff[] are signed by this point but curChange and
@@ -1284,13 +1289,13 @@
uint32_t minPos = 0;
int8_t finalChange = 0;
int curChange = 0;
- uint32_t lastCoeff = lastCG;
+ uint32_t lastCoeffAdjust = (lastCG & (abs(dstCoeff[codeParams.scan[lastNZPosInCG + subPos]]) == 1)) * 4 * IEP_RATE;
for (n = (lastCG ? lastNZPosInCG : SCAN_SET_SIZE - 1); n >= 0; --n)
{
const uint32_t blkPos = codeParams.scan[n + subPos];
- const int signCoef = m_resiDctCoeff[blkPos]; /* pre-quantization DCT coeff */
- const int absLevel = abs(dstCoeff[blkPos]);
+ const int32_t signCoef = m_resiDctCoeff[blkPos]; /* pre-quantization DCT coeff */
+ const int absLevel = abs(dstCoeff[blkPos]);
// TODO: this is constant in non-scaling mode
const uint32_t preDQuantLevelDiff = (unquantScale[blkPos] << per);
const uint32_t unQuantLevel = (absLevel * (unquantScale[blkPos] << per) + unquantRound);
@@ -1316,13 +1321,14 @@
int downBits = rateIncDown[blkPos] - (isOne ? (IEP_RATE + sigRateDelta[blkPos]) : 0);
int64_t costDown = DELTARDCOST(origDist, d, downBits);
- costDown -= (lastCoeff & isOne) * 4 * IEP_RATE;
+ costDown -= lastCoeffAdjust;
curCost = ((n == firstNZPosInCG) & isOne) ? MAX_INT64 : costDown;
curChange = 2 * (costUp < costDown) - 1;
curCost = (costUp < costDown) ? costUp : curCost;
}
- else if ((n < firstNZPosInCG) & (signbit != ((uint32_t)signCoef >> 31)))
+ //else if ((n < firstNZPosInCG) & (signbit != ((uint32_t)signCoef >> 31)))
+ else if ((n < firstNZPosInCG) & ((signbit ^ signCoef) < 0))
{
/* don't try to make a new coded coeff before the first coeff if its
* sign would be different than the first coeff, the inferred sign would
@@ -1344,7 +1350,7 @@
finalChange = (int8_t)curChange;
minPos = blkPos + (absLevel << 16);
}
- lastCoeff = 0;
+ lastCoeffAdjust = 0;
}
const int absInMinPos = (minPos >> 16);
diff -r aa62ce092d1b -r a6a56cc966f1 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Wed Sep 30 17:32:31 2015 -0500
+++ b/source/common/x86/pixel-util8.asm Wed Sep 30 18:35:20 2015 -0500
@@ -6653,10 +6653,10 @@
;-----------------------------------------------------------------------------
-; uint32_t[last first] findPosFirstAndLast(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16])
+; uint32_t[sumSign last first] findPosFirstLast(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16], uint32_t *absSum)
;-----------------------------------------------------------------------------
INIT_XMM ssse3
-cglobal findPosFirstLast, 3,3,3
+cglobal findPosFirstLast, 3,3,4
; convert stride to int16_t
add r1d, r1d
@@ -6668,10 +6668,22 @@
movh m1, [r0]
movhps m1, [r0 + r1]
movh m2, [r0 + r1 * 2]
- lea r1, [r1 * 3]
+ lea r1d, [r1 * 3]
movhps m2, [r0 + r1]
+ pxor m3, m1, m2
packsswb m1, m2
+ ; get absSum
+ movhlps m2, m3
+ pxor m3, m2
+ pshufd m2, m3, q2301
+ pxor m3, m2
+ movd r0d, m3
+ mov r2d, r0d
+ shr r2d, 16
+ xor r2d, r0d
+ shl r2d, 31
+
; get non-zero mask
pxor m2, m2
pcmpeqb m1, m2
@@ -6684,8 +6696,9 @@
not r0d
bsr r1w, r0w
bsf eax, r0d ; side effect: clear AH to Zero
- shl r1d, 16
- or eax, r1d
+ shl r1d, 8
+ or eax, r2d ; merge absSumSign
+ or eax, r1d ; merge lastNZPosInCG
RET
diff -r aa62ce092d1b -r a6a56cc966f1 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Wed Sep 30 17:32:31 2015 -0500
+++ b/source/test/pixelharness.cpp Wed Sep 30 18:35:20 2015 -0500
@@ -1571,8 +1571,8 @@
// specially case: all coeff group are zero
if (j >= SCAN_SET_SIZE)
{
- // all zero block the high 16-bits undefined
- if ((uint16_t)ref_scanPos != (uint16_t)opt_scanPos)
+ // all zero block the high 24-bits undefined
+ if ((uint8_t)ref_scanPos != (uint8_t)opt_scanPos)
return false;
}
else if (ref_scanPos != opt_scanPos)
More information about the x265-devel
mailing list