[x265] [PATCH] nquant optimization, downscaling qCoef from int32_t* to int16_t*
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 2 16:10:25 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409639717 -19800
# Node ID 5b06f046b25ffa84d5ddafff3f0e9f618cd38625
# Parent 7589725c09db0ba0d130916304625f17ba48db00
nquant optimization, downscaling qCoef from int32_t* to int16_t*
diff -r 7589725c09db -r 5b06f046b25f source/common/dct.cpp
--- a/source/common/dct.cpp Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/dct.cpp Tue Sep 02 12:05:17 2014 +0530
@@ -791,7 +791,7 @@
return numSig;
}
-uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff)
{
uint32_t numSig = 0;
@@ -805,7 +805,7 @@
if (level)
++numSig;
level *= sign;
- qCoef[blockpos] = Clip3(-32768, 32767, level);
+ qCoef[blockpos] = (int16_t)Clip3(-32768, 32767, level);
}
return numSig;
diff -r 7589725c09db -r 5b06f046b25f source/common/primitives.h
--- a/source/common/primitives.h Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/primitives.h Tue Sep 02 12:05:17 2014 +0530
@@ -163,7 +163,7 @@
typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
diff -r 7589725c09db -r 5b06f046b25f source/common/quant.cpp
--- a/source/common/quant.cpp Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/quant.cpp Tue Sep 02 12:05:17 2014 +0530
@@ -486,7 +486,18 @@
int32_t *qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
int numCoeff = 1 << log2TrSize * 2;
- uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);
+
+ assert(numCoeff <= 1024);
+ ALIGN_VAR_16(int16_t, qCoeff1[1024]);
+ for (int i = 0; i < numCoeff; i++)
+ {
+ qCoeff1[i] = (int16_t)Clip3(-32768, 32767, dstCoeff[i]);
+ }
+ uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, qCoeff1, qbits, add, numCoeff);
+ for (int i = 0; i < numCoeff; i++)
+ {
+ dstCoeff[i] = qCoeff1[i];
+ }
assert(numCoeff <= 1024);
ALIGN_VAR_16(int16_t, qCoeff[1024]);
diff -r 7589725c09db -r 5b06f046b25f source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/x86/pixel-util.h Tue Sep 02 12:05:17 2014 +0530
@@ -45,7 +45,7 @@
void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
diff -r 7589725c09db -r 5b06f046b25f source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/x86/pixel-util8.asm Tue Sep 02 12:05:17 2014 +0530
@@ -938,7 +938,7 @@
;-----------------------------------------------------------------------------
-; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
;-----------------------------------------------------------------------------
INIT_XMM sse4
cglobal nquant, 4,5,8
@@ -975,17 +975,12 @@
pxor m1, m4
psubd m1, m4
- packssdw m0, m0
- packssdw m1, m1
- pmovsxwd m0, m0
- pmovsxwd m1, m1
+ packssdw m0, m1
movu [r2], m0
- movu [r2 + 16], m1
- add r2, 32
+ add r2, 16
dec r4d
- packssdw m0, m1
pxor m4, m4
pcmpeqw m0, m4
psubw m7, m0
diff -r 7589725c09db -r 5b06f046b25f source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp Mon Aug 25 15:03:23 2014 +0530
+++ b/source/test/mbdstharness.cpp Tue Sep 02 12:05:17 2014 +0530
@@ -252,16 +252,16 @@
int bits = rand() % 32;
int valueToAdd = rand() % (32 * 1024);
- int cmp_size = sizeof(int) * height * width;
+ int cmp_size = sizeof(short) * height * width;
int numCoeff = height * width;
int index1 = rand() % TEST_CASES;
int index2 = rand() % TEST_CASES;
- refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf2, bits, valueToAdd, numCoeff);
- optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf4, bits, valueToAdd, numCoeff);
+ refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mshortbuf2, bits, valueToAdd, numCoeff);
+ optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mshortbuf3, bits, valueToAdd, numCoeff);
- if (memcmp(mintbuf4, mintbuf2, cmp_size))
+ if (memcmp(mshortbuf2, mshortbuf3, cmp_size))
return false;
if (optReturnValue != refReturnValue)
@@ -436,7 +436,7 @@
if (opt.nquant)
{
printf("nquant\t\t");
- REPORT_SPEEDUP(opt.nquant, ref.nquant, int_test_buff[0], int_test_buff[1], mintbuf3, 23, 23785, 32 * 32);
+ REPORT_SPEEDUP(opt.nquant, ref.nquant, int_test_buff[0], int_test_buff[1], mshortbuf2, 23, 23785, 32 * 32);
}
if (opt.count_nonzero)
More information about the x265-devel
mailing list