[x265] [PATCH] dct/idct: interface simplification

Tue Oct 28 13:06:53 CET 2014

# HG changeset patch
# User Praveen Tiwari
# Date 1414476382 -19800
# Node ID 4bd21a6f2304eb6783c028546773362c9789885c
# Parent  3ccb20b6c0223e3e4ccf253faedd930c6bb98403
dct/idct: interface simplification

diff -r 3ccb20b6c022 -r 4bd21a6f2304 source/common/dct.cpp

--- a/source/common/dct.cpp	Mon Oct 27 21:59:30 2014 -0500
+++ b/source/common/dct.cpp	Tue Oct 28 11:36:22 2014 +0530
@@ -440,7 +440,7 @@
     }
 }
 
-void dst4_c(int16_t *src, int32_t *dst, intptr_t stride)
+void dst4_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 1 + X265_DEPTH - 8;
     const int shift_2nd = 8;
@@ -468,7 +468,7 @@
 #undef N
 }
 
-void dct4_c(int16_t *src, int32_t *dst, intptr_t stride)
+void dct4_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 1 + X265_DEPTH - 8;
     const int shift_2nd = 8;
@@ -495,7 +495,7 @@
 #undef N
 }
 
-void dct8_c(int16_t *src, int32_t *dst, intptr_t stride)
+void dct8_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 2 + X265_DEPTH - 8;
     const int shift_2nd = 9;
@@ -523,7 +523,7 @@
 #undef N
 }
 
-void dct16_c(int16_t *src, int32_t *dst, intptr_t stride)
+void dct16_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 3 + X265_DEPTH - 8;
     const int shift_2nd = 10;
@@ -551,7 +551,7 @@
 #undef N
 }
 
-void dct32_c(int16_t *src, int32_t *dst, intptr_t stride)
+void dct32_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 4 + X265_DEPTH - 8;
     const int shift_2nd = 11;
@@ -579,7 +579,7 @@
 #undef N
 }
 
-void idst4_c(int32_t *src, int16_t *dst, intptr_t stride)
+void idst4_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
     const int shift_2nd = 12 - (X265_DEPTH - 8);
@@ -607,7 +607,7 @@
     }
 }
 
-void idct4_c(int32_t *src, int16_t *dst, intptr_t stride)
+void idct4_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
     const int shift_2nd = 12 - (X265_DEPTH - 8);
@@ -635,7 +635,7 @@
     }
 }
 
-void idct8_c(int32_t *src, int16_t *dst, intptr_t stride)
+void idct8_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
     const int shift_2nd = 12 - (X265_DEPTH - 8);
@@ -662,7 +662,7 @@
     }
 }
 
-void idct16_c(int32_t *src, int16_t *dst, intptr_t stride)
+void idct16_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
     const int shift_2nd = 12 - (X265_DEPTH - 8);
@@ -689,7 +689,7 @@
     }
 }
 
-void idct32_c(int32_t *src, int16_t *dst, intptr_t stride)
+void idct32_c(int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
     const int shift_2nd = 12 - (X265_DEPTH - 8);
@@ -717,7 +717,7 @@
     }
 }
 
-void dequant_normal_c(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift)
+void dequant_normal_c(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift)
 {
 #if HIGH_BIT_DEPTH
     X265_CHECK(scale < 32768 || ((scale & 3) == 0 && shift > 2), "dequant invalid scale %d\n", scale);
@@ -737,11 +737,11 @@
     for (int n = 0; n < num; n++)
     {
         coeffQ = (quantCoef[n] * scale + add) >> shift;
-        coef[n] = Clip3(-32768, 32767, coeffQ);
+        coef[n] = (int16_t)Clip3(-32768, 32767, coeffQ);
     }
 }
 
-void dequant_scaling_c(const int16_t* quantCoef, const int32_t *deQuantCoef, int32_t* coef, int num, int per, int shift)
+void dequant_scaling_c(const int16_t* quantCoef, const int32_t *deQuantCoef, int16_t* coef, int num, int per, int shift)
 {
     X265_CHECK(num <= 32 * 32, "dequant num %d too large\n", num);
 
@@ -756,7 +756,7 @@
         for (int n = 0; n < num; n++)
         {
             coeffQ = ((quantCoef[n] * deQuantCoef[n]) + add) >> (shift - per);
-            coef[n] = Clip3(-32768, 32767, coeffQ);
+            coef[n] = (int16_t)Clip3(-32768, 32767, coeffQ);
         }
     }
     else
@@ -764,12 +764,12 @@
         for (int n = 0; n < num; n++)
         {
             coeffQ   = Clip3(-32768, 32767, quantCoef[n] * deQuantCoef[n]);
-            coef[n] = Clip3(-32768, 32767, coeffQ << (per - shift));
+            coef[n] = (int16_t)Clip3(-32768, 32767, coeffQ << (per - shift));
         }
     }
 }
 
-uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t quant_c(int16_t* coef, int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff)
 {
     X265_CHECK(qBits >= 8, "qBits less than 8\n");
     X265_CHECK((numCoeff % 16) == 0, "numCoeff must be multiple of 16\n");
@@ -793,7 +793,7 @@
     return numSig;
 }
 
-uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t nquant_c(int16_t* coef, int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff)
 {
     X265_CHECK((numCoeff % 16) == 0, "number of quant coeff is not multiple of 4x4\n");
     X265_CHECK((uint32_t)add < ((uint32_t)1 << qBits), "2 ^ qBits less than add\n");
@@ -848,7 +848,7 @@
     return numSig;
 }
 
-void denoiseDct_c(int32_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff)
+void denoiseDct_c(int16_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff)
 {
     for (int i = 0; i < numCoeff; i++)
     {
@@ -857,7 +857,7 @@
         level = (level + sign) ^ sign;
         resSum[i] += level;
         level -= offset[i];
-        dctCoef[i] = level < 0 ? 0 : (level ^ sign) - sign;
+        dctCoef[i] = (int16_t)(level < 0 ? 0 : (level ^ sign) - sign);
     }
 }
 
diff -r 3ccb20b6c022 -r 4bd21a6f2304 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Mon Oct 27 21:59:30 2014 -0500
+++ b/source/common/pixel.cpp	Tue Oct 28 11:36:22 2014 +0530
@@ -491,7 +491,7 @@
     }
 }
 
-void convert16to32_shl(int32_t *dst, int16_t *src, intptr_t stride, int shift, int size)
+void convert16to32_shl(int16_t *dst, int16_t *src, intptr_t stride, int shift, int size)
 {
     for (int i = 0; i < size; i++)
     {
@@ -514,7 +514,7 @@
     }
 }
 
-void convert32to16_shr(int16_t *dst, int32_t *src, intptr_t stride, int shift, int size)
+void convert32to16_shr(int16_t *dst, int16_t *src, intptr_t stride, int shift, int size)
 {
     int round = 1 << (shift - 1);
 
diff -r 3ccb20b6c022 -r 4bd21a6f2304 source/common/primitives.h
--- a/source/common/primitives.h	Mon Oct 27 21:59:30 2014 -0500
+++ b/source/common/primitives.h	Tue Oct 28 11:36:22 2014 +0530
@@ -147,25 +147,25 @@
 typedef void (*intra_pred_t)(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter);
 typedef void (*intra_allangs_t)(pixel *dst, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma);
 
-typedef void (*cvt16to32_shl_t)(int32_t *dst, int16_t *src, intptr_t, int, int);
+typedef void (*cvt16to32_shl_t)(int16_t *dst, int16_t *src, intptr_t, int, int);
 typedef void (*cvt16to32_shr_t)(int32_t *dst, int16_t *src, intptr_t, int, int);
-typedef void (*cvt32to16_shr_t)(int16_t *dst, int32_t *src, intptr_t, int, int);
+typedef void (*cvt32to16_shr_t)(int16_t *dst, int16_t *src, intptr_t, int, int);
 typedef void (*cvt32to16_shl_t)(int16_t *dst, int32_t *src, intptr_t, int);
 typedef uint32_t (*copy_cnt_t)(int16_t* coeff, int16_t* residual, intptr_t stride);
 typedef void (*copy_shr_t)(int16_t *dst, int16_t *src, intptr_t stride, int shift, int size);
 typedef void (*copy_shl_t)(int16_t *dst, int16_t *src, intptr_t stride, int shift);
 
-typedef void (*dct_t)(int16_t *src, int32_t *dst, intptr_t stride);
-typedef void (*idct_t)(int32_t *src, int16_t *dst, intptr_t stride);
-typedef void (*denoiseDct_t)(int32_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff);
+typedef void (*dct_t)(int16_t *src, int16_t *dst, intptr_t stride);
+typedef void (*idct_t)(int16_t *src, int16_t *dst, intptr_t stride);
+typedef void (*denoiseDct_t)(int16_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff);
 
 typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
 typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
 typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
-typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
-typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
-typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
-typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+typedef uint32_t (*quant_t)(int16_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
+typedef uint32_t (*nquant_t)(int16_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
+typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t *dequantCoef, int16_t* dst, int num, int mcqp_miper, int shift);
+typedef void (*dequant_normal_t)(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
 typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
 
 typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
diff -r 3ccb20b6c022 -r 4bd21a6f2304 source/common/quant.cpp
--- a/source/common/quant.cpp	Mon Oct 27 21:59:30 2014 -0500
+++ b/source/common/quant.cpp	Tue Oct 28 11:36:22 2014 +0530
@@ -166,7 +166,7 @@
     m_useRDOQ = useRDOQ;
     m_psyRdoqScale = (int64_t)(psyScale * 256.0);
     m_scalingList = &scalingList;
-    m_resiDctCoeff = X265_MALLOC(int32_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
+    m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
     m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
     m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
 
diff -r 3ccb20b6c022 -r 4bd21a6f2304 source/common/quant.h
--- a/source/common/quant.h	Mon Oct 27 21:59:30 2014 -0500
+++ b/source/common/quant.h	Tue Oct 28 11:36:22 2014 +0530
@@ -69,8 +69,8 @@
 
     bool               m_useRDOQ;
     int64_t            m_psyRdoqScale;
-    int32_t*           m_resiDctCoeff;
-    int32_t*           m_fencDctCoeff;
+    int16_t*           m_resiDctCoeff;
+    int16_t*           m_fencDctCoeff;
     int16_t*           m_fencShortBuf;
 
     enum { IEP_RATE = 32768 }; /* FIX15 cost of an equal probable bit */
diff -r 3ccb20b6c022 -r 4bd21a6f2304 source/common/vec/dct-sse41.cpp
--- a/source/common/vec/dct-sse41.cpp	Mon Oct 27 21:59:30 2014 -0500
+++ b/source/common/vec/dct-sse41.cpp	Tue Oct 28 11:36:22 2014 +0530
@@ -36,7 +36,7 @@
 using namespace x265;
 
 namespace {
-void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int32_t* coef, int num, int per, int shift)
+void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int16_t* coef, int num, int per, int shift)
 {
     X265_CHECK(num <= 32 * 32, "dequant num too large\n");