[x265] quant: remove scaledCoeff from nquant()
Satoshi Nakagawa
nakagawa424 at oki.com
Sun Aug 10 10:24:06 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1407658928 -32400
# Sun Aug 10 17:22:08 2014 +0900
# Node ID d1dad09266327d40b6c2372f9916f7fcf288c2f0
# Parent 6e4eb854220350cf0c980fc02cc11109c506585f
quant: remove scaledCoeff from nquant()
diff -r 6e4eb8542203 -r d1dad0926632 source/common/dct.cpp
--- a/source/common/dct.cpp Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/dct.cpp Sun Aug 10 17:22:08 2014 +0900
@@ -795,7 +795,7 @@
return numSig;
}
-uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* scaledCoeff, int32_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* qCoef, int qBits, int add, int numCoeff)
{
uint32_t numSig = 0;
@@ -805,7 +805,6 @@
int sign = (level < 0 ? -1 : 1);
int tmplevel = abs(level) * quantCoeff[blockpos];
- scaledCoeff[blockpos] = tmplevel;
level = ((tmplevel + add) >> qBits);
if (level)
++numSig;
diff -r 6e4eb8542203 -r d1dad0926632 source/common/primitives.h
--- a/source/common/primitives.h Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/primitives.h Sun Aug 10 17:22:08 2014 +0900
@@ -160,7 +160,7 @@
typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
diff -r 6e4eb8542203 -r d1dad0926632 source/common/quant.cpp
--- a/source/common/quant.cpp Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/quant.cpp Sun Aug 10 17:22:08 2014 +0900
@@ -487,7 +487,6 @@
* probability models like CABAC */
uint32_t Quant::rdoQuant(TComDataCU* cu, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
{
- uint32_t trSize = 1 << log2TrSize;
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
int scalingListType = (cu->isIntra(absPartIdx) ? 0 : 3) + ttype;
@@ -500,14 +499,13 @@
int32_t *qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
int numCoeff = 1 << log2TrSize * 2;
- int scaledCoeff[32 * 32];
- uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, scaledCoeff, dstCoeff, qbits, add, numCoeff);
+ uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);
X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, numCoeff), "numSig differ\n");
if (!numSig)
return 0;
- x265_emms();
+ uint32_t trSize = 1 << log2TrSize;
/* unquant constants for psy-rdoq. The dequant coefficients have a (1<<4) scale applied that
* must be removed during unquant. This may be larger than the QP upshift, which would turn
diff -r 6e4eb8542203 -r d1dad0926632 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/x86/pixel-util.h Sun Aug 10 17:22:08 2014 +0900
@@ -45,7 +45,7 @@
void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
int x265_count_nonzero_ssse3(const int32_t *quantCoeff, int numCoeff);
diff -r 6e4eb8542203 -r d1dad0926632 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/x86/pixel-util8.asm Sun Aug 10 17:22:08 2014 +0900
@@ -938,72 +938,63 @@
;-----------------------------------------------------------------------------
-; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
;-----------------------------------------------------------------------------
INIT_XMM sse4
-cglobal nquant, 5,6,8
-
- ; fill qbits
- movd m4, r4d ; m4 = qbits
-
- ; fill offset
- movd m5, r5m
- pshufd m5, m5, 0 ; m5 = add
-
- mov r4d, r6m
+cglobal nquant, 4,5,8
+ movd m6, r4m
+ mov r4d, r5m
+ pxor m7, m7 ; m7 = numZero
+ movd m5, r3d ; m5 = qbits
+ pshufd m6, m6, 0 ; m6 = add
+ mov r3d, r4d ; r3 = numCoeff
shr r4d, 3
- pxor m7, m7 ; m7 = numZero
.loop:
- ; 4 coeff
movu m0, [r0] ; m0 = level
- pxor m1, m1
- pcmpgtd m1, m0 ; m1 = sign
+ movu m1, [r0 + 16] ; m1 = level
movu m2, [r1] ; m2 = qcoeff
+ movu m3, [r1 + 16] ; m3 = qcoeff
+ add r0, 32
+ add r1, 32
+
+ pxor m4, m4
+ pcmpgtd m4, m0 ; m4 = sign
pabsd m0, m0
pmulld m0, m2 ; m0 = tmpLevel1
- movu [r2], m0 ; m0 = scaledCoeff
- paddd m2, m0, m5
- psrad m2, m4 ; m2 = level1
- pxor m0, m0
- pcmpeqd m0, m2 ; m0 = mask4
- psubd m7, m0
-
- pxor m2, m1
- psubd m2, m1
- packssdw m2, m2
- pmovsxwd m2, m2
- movu [r3], m2
- ; 4 coeff
- movu m0, [r0 + 16] ; m0 = level
- pxor m1, m1
- pcmpgtd m1, m0 ; m1 = sign
- movu m2, [r1 + 16] ; m2 = qcoeff
- pabsd m0, m0
- pmulld m0, m2 ; m0 = tmpLevel1
- movu [r2 + 16], m0 ; m0 = scaledCoeff
- paddd m2, m0, m5
- psrad m2, m4 ; m2 = level1
- pxor m0, m0
- pcmpeqd m0, m2 ; m0 = mask4
- psubd m7, m0
-
- pxor m2, m1
- psubd m2, m1
- packssdw m2, m2
- pmovsxwd m2, m2
- movu [r3 + 16], m2
-
- add r0, 32
- add r1, 32
+ paddd m0, m6
+ psrad m0, m5 ; m0 = level1
+ pxor m0, m4
+ psubd m0, m4
+
+ pxor m4, m4
+ pcmpgtd m4, m1 ; m4 = sign
+ pabsd m1, m1
+ pmulld m1, m3 ; m1 = tmpLevel1
+ paddd m1, m6
+ psrad m1, m5 ; m1 = level1
+ pxor m1, m4
+ psubd m1, m4
+
+ packssdw m0, m0
+ packssdw m1, m1
+ pmovsxwd m0, m0
+ pmovsxwd m1, m1
+
+ movu [r2], m0
+ movu [r2 + 16], m1
add r2, 32
- add r3, 32
-
dec r4d
- jnz .loop
-
- phaddd m7, m7
- phaddd m7, m7
- mov eax, r6m
+
+ packssdw m0, m1
+ pxor m4, m4
+ pcmpeqw m0, m4
+ psubw m7, m0
+
+ jnz .loop
+
+ packuswb m7, m7
+ psadbw m7, m4
+ mov eax, r3d
movd r4d, m7
sub eax, r4d ; numSig
diff -r 6e4eb8542203 -r d1dad0926632 source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp Sat Aug 09 19:43:23 2014 -0500
+++ b/source/test/mbdstharness.cpp Sun Aug 10 17:22:08 2014 +0900
@@ -348,11 +348,8 @@
int index1 = rand() % TEST_CASES;
int index2 = rand() % TEST_CASES;
- refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf5, mintbuf6, bits, valueToAdd, numCoeff);
- optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mintbuf4, bits, valueToAdd, numCoeff);
-
- if (memcmp(mintbuf3, mintbuf5, cmp_size))
- return false;
+ refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf6, bits, valueToAdd, numCoeff);
+ optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf4, bits, valueToAdd, numCoeff);
if (memcmp(mintbuf4, mintbuf6, cmp_size))
return false;
@@ -511,7 +508,7 @@
if (opt.nquant)
{
printf("nquant\t\t");
- REPORT_SPEEDUP(opt.nquant, ref.nquant, mintbuf1, mintbuf2, mintbuf3, mintbuf4, 23, 23785, 32 * 32);
+ REPORT_SPEEDUP(opt.nquant, ref.nquant, mintbuf1, mintbuf2, mintbuf3, 23, 23785, 32 * 32);
}
if (opt.count_nonzero)
More information about the x265-devel
mailing list