[x265] [PATCH] quant_c optimization, downscaling qCoef from int32_t* to int16_t*
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 2 16:14:16 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409661936 -19800
# Node ID 40e242e316b962116d64fb43444029c5c6546484
# Parent e1b2ab942177bc9f67547a61c365c6167b5cee38
quant_c optimization, downscaling qCoef from int32_t* to int16_t*
diff -r e1b2ab942177 -r 40e242e316b9 source/common/dct.cpp
--- a/source/common/dct.cpp Tue Sep 02 17:52:33 2014 +0530
+++ b/source/common/dct.cpp Tue Sep 02 18:15:36 2014 +0530
@@ -769,7 +769,7 @@
}
}
-uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int32_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff)
{
int qBits8 = qBits - 8;
uint32_t numSig = 0;
@@ -785,7 +785,7 @@
if (level)
++numSig;
level *= sign;
- qCoef[blockpos] = Clip3(-32768, 32767, level);
+ qCoef[blockpos] = (int16_t)Clip3(-32768, 32767, level);
}
return numSig;
diff -r e1b2ab942177 -r 40e242e316b9 source/common/primitives.h
--- a/source/common/primitives.h Tue Sep 02 17:52:33 2014 +0530
+++ b/source/common/primitives.h Tue Sep 02 18:15:36 2014 +0530
@@ -164,7 +164,7 @@
typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
-typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
+typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
diff -r e1b2ab942177 -r 40e242e316b9 source/common/quant.cpp
--- a/source/common/quant.cpp Tue Sep 02 17:52:33 2014 +0530
+++ b/source/common/quant.cpp Tue Sep 02 18:15:36 2014 +0530
@@ -409,7 +409,21 @@
int add = (cu->m_slice->m_sliceType == I_SLICE ? 171 : 85) << (qbits - 9);
int numCoeff = 1 << log2TrSize * 2;
- uint32_t numSig = primitives.quant(m_resiDctCoeff, quantCoeff, deltaU, coeff, qbits, add, numCoeff);
+ /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
+ * optimize to take coefficients as int16_t*, it will be cleanse.*/
+ ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
+ for (int i = 0; i < numCoeff; i++)
+ {
+ qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
+ }
+ uint32_t numSig = primitives.quant(m_resiDctCoeff, quantCoeff, deltaU, qCoeff, qbits, add, numCoeff);
+
+ /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
+ * optimize to take coefficients as int16_t*, it will be cleanse.*/
+ for (int i = 0; i < numCoeff; i++)
+ {
+ coeff[i] = qCoeff[i];
+ }
if (numSig >= 2 && cu->m_slice->m_pps->bSignHideEnabled)
{
diff -r e1b2ab942177 -r 40e242e316b9 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Tue Sep 02 17:52:33 2014 +0530
+++ b/source/common/x86/pixel-util.h Tue Sep 02 18:15:36 2014 +0530
@@ -44,7 +44,7 @@
void x265_transpose32_sse2(pixel *dest, pixel *src, intptr_t stride);
void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
-uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
+uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
diff -r e1b2ab942177 -r 40e242e316b9 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Tue Sep 02 17:52:33 2014 +0530
+++ b/source/common/x86/pixel-util8.asm Tue Sep 02 18:15:36 2014 +0530
@@ -855,7 +855,7 @@
;-----------------------------------------------------------------------------
-; uint32_t quant(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
+; uint32_t quant(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
;-----------------------------------------------------------------------------
INIT_XMM sse4
cglobal quant, 5,6,8
@@ -895,8 +895,7 @@
pxor m2, m1
psubd m2, m1
packssdw m2, m2
- pmovsxwd m2, m2
- movu [r3], m2
+ movh [r3], m2
; 4 coeff
movu m0, [r0 + 16] ; m0 = level
pxor m1, m1
@@ -917,13 +916,12 @@
pxor m2, m1
psubd m2, m1
packssdw m2, m2
- pmovsxwd m2, m2
- movu [r3 + 16], m2
+ movh [r3 + 8], m2
add r0, 32
add r1, 32
add r2, 32
- add r3, 32
+ add r3, 16
dec r4d
jnz .loop
diff -r e1b2ab942177 -r 40e242e316b9 source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp Tue Sep 02 17:52:33 2014 +0530
+++ b/source/test/mbdstharness.cpp Tue Sep 02 18:15:36 2014 +0530
@@ -214,18 +214,19 @@
int bits = rand() % 32;
int valueToAdd = rand() % (32 * 1024);
int cmp_size = sizeof(int) * height * width;
+ int cmp_size1 = sizeof(short) * height * width;
int numCoeff = height * width;
int index1 = rand() % TEST_CASES;
int index2 = rand() % TEST_CASES;
- refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf1, mintbuf2, bits, valueToAdd, numCoeff);
- optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mintbuf4, bits, valueToAdd, numCoeff);
+ refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf1, mshortbuf2, bits, valueToAdd, numCoeff);
+ optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mshortbuf3, bits, valueToAdd, numCoeff);
- if (memcmp(mintbuf3, mintbuf1, cmp_size))
+ if (memcmp(mintbuf1, mintbuf3, cmp_size))
return false;
- if (memcmp(mintbuf4, mintbuf2, cmp_size))
+ if (memcmp(mshortbuf2, mshortbuf3, cmp_size1))
return false;
if (optReturnValue != refReturnValue)
@@ -430,7 +431,7 @@
if (opt.quant)
{
printf("quant\t\t");
- REPORT_SPEEDUP(opt.quant, ref.quant, int_test_buff[0], int_test_buff[1], mintbuf3, mintbuf4, 23, 23785, 32 * 32);
+ REPORT_SPEEDUP(opt.quant, ref.quant, int_test_buff[0], int_test_buff[1], mintbuf3, mshortbuf2, 23, 23785, 32 * 32);
}
if (opt.nquant)
More information about the x265-devel
mailing list