[x265] [PATCH] nquant optimization, downscaling qCoef from int32_t* to int16_t*

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Sep 2 16:10:25 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1409639717 -19800
# Node ID 5b06f046b25ffa84d5ddafff3f0e9f618cd38625
# Parent  7589725c09db0ba0d130916304625f17ba48db00
nquant optimization, downscaling qCoef from int32_t* to int16_t*

diff -r 7589725c09db -r 5b06f046b25f source/common/dct.cpp
--- a/source/common/dct.cpp	Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/dct.cpp	Tue Sep 02 12:05:17 2014 +0530
@@ -791,7 +791,7 @@
     return numSig;
 }
 
-uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff)
 {
     uint32_t numSig = 0;
 
@@ -805,7 +805,7 @@
         if (level)
             ++numSig;
         level *= sign;
-        qCoef[blockpos] = Clip3(-32768, 32767, level);
+        qCoef[blockpos] = (int16_t)Clip3(-32768, 32767, level);
     }
 
     return numSig;
diff -r 7589725c09db -r 5b06f046b25f source/common/primitives.h
--- a/source/common/primitives.h	Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/primitives.h	Tue Sep 02 12:05:17 2014 +0530
@@ -163,7 +163,7 @@
 typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
 typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
 typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
 typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
diff -r 7589725c09db -r 5b06f046b25f source/common/quant.cpp
--- a/source/common/quant.cpp	Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/quant.cpp	Tue Sep 02 12:05:17 2014 +0530
@@ -486,7 +486,18 @@
     int32_t *qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
 
     int numCoeff = 1 << log2TrSize * 2;
-    uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);
+
+    assert(numCoeff <= 1024);
+    ALIGN_VAR_16(int16_t, qCoeff1[1024]);
+    for (int i = 0; i < numCoeff; i++)
+    {
+        qCoeff1[i] = (int16_t)Clip3(-32768, 32767, dstCoeff[i]);
+    }
+    uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, qCoeff1, qbits, add, numCoeff);
+    for (int i = 0; i < numCoeff; i++)
+    {
+        dstCoeff[i] = qCoeff1[i];
+    }
 
     assert(numCoeff <= 1024);
     ALIGN_VAR_16(int16_t, qCoeff[1024]);
diff -r 7589725c09db -r 5b06f046b25f source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/x86/pixel-util.h	Tue Sep 02 12:05:17 2014 +0530
@@ -45,7 +45,7 @@
 void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
 
 uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
 void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
 
diff -r 7589725c09db -r 5b06f046b25f source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Mon Aug 25 15:03:23 2014 +0530
+++ b/source/common/x86/pixel-util8.asm	Tue Sep 02 12:05:17 2014 +0530
@@ -938,7 +938,7 @@
 
 
 ;-----------------------------------------------------------------------------
-; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
 ;-----------------------------------------------------------------------------
 INIT_XMM sse4
 cglobal nquant, 4,5,8
@@ -975,17 +975,12 @@
     pxor        m1, m4
     psubd       m1, m4
 
-    packssdw    m0, m0
-    packssdw    m1, m1
-    pmovsxwd    m0, m0
-    pmovsxwd    m1, m1
+    packssdw    m0, m1
 
     movu        [r2], m0
-    movu        [r2 + 16], m1
-    add         r2, 32
+    add         r2,   16
     dec         r4d
 
-    packssdw    m0, m1
     pxor        m4, m4
     pcmpeqw     m0, m4
     psubw       m7, m0
diff -r 7589725c09db -r 5b06f046b25f source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp	Mon Aug 25 15:03:23 2014 +0530
+++ b/source/test/mbdstharness.cpp	Tue Sep 02 12:05:17 2014 +0530
@@ -252,16 +252,16 @@
 
         int bits = rand() % 32;
         int valueToAdd = rand() % (32 * 1024);
-        int cmp_size = sizeof(int) * height * width;
+        int cmp_size = sizeof(short) * height * width;
         int numCoeff = height * width;
 
         int index1 = rand() % TEST_CASES;
         int index2 = rand() % TEST_CASES;
 
-        refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf2, bits, valueToAdd, numCoeff);
-        optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf4, bits, valueToAdd, numCoeff);
+        refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mshortbuf2, bits, valueToAdd, numCoeff);
+        optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mshortbuf3, bits, valueToAdd, numCoeff);
 
-        if (memcmp(mintbuf4, mintbuf2, cmp_size))
+        if (memcmp(mshortbuf2, mshortbuf3, cmp_size))
             return false;
 
         if (optReturnValue != refReturnValue)
@@ -436,7 +436,7 @@
     if (opt.nquant)
     {
         printf("nquant\t\t");
-        REPORT_SPEEDUP(opt.nquant, ref.nquant, int_test_buff[0], int_test_buff[1], mintbuf3, 23, 23785, 32 * 32);
+        REPORT_SPEEDUP(opt.nquant, ref.nquant, int_test_buff[0], int_test_buff[1], mshortbuf2, 23, 23785, 32 * 32);
     }
 
     if (opt.count_nonzero)


More information about the x265-devel mailing list