[x265] [PATCH] quant path cleanup

Tue Sep 2 16:14:34 CEST 2014

# HG changeset patch
# User Praveen Tiwari
# Date 1409663972 -19800
# Node ID b2890a7555ac555b547622d12c47d8627910f721
# Parent  40e242e316b962116d64fb43444029c5c6546484
quant path cleanup

diff -r 40e242e316b9 -r b2890a7555ac source/common/common.h

--- a/source/common/common.h	Tue Sep 02 18:15:36 2014 +0530
+++ b/source/common/common.h	Tue Sep 02 18:49:32 2014 +0530
@@ -124,7 +124,7 @@
     return std::min<T>(std::max<T>(minVal, a), maxVal);
 }
 
-typedef int32_t  coeff_t;      // transform coefficient
+typedef int16_t  coeff_t;      // transform coefficient
 
 #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
 #define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
diff -r 40e242e316b9 -r b2890a7555ac source/common/dct.cpp
--- a/source/common/dct.cpp	Tue Sep 02 18:15:36 2014 +0530
+++ b/source/common/dct.cpp	Tue Sep 02 18:49:32 2014 +0530
@@ -842,7 +842,7 @@
     return numSig;
 }
 
-void denoiseDct_c(coeff_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff)
+void denoiseDct_c(int32_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff)
 {
     for (int i = 0; i < numCoeff; i++)
     {
diff -r 40e242e316b9 -r b2890a7555ac source/common/primitives.h
--- a/source/common/primitives.h	Tue Sep 02 18:15:36 2014 +0530
+++ b/source/common/primitives.h	Tue Sep 02 18:49:32 2014 +0530
@@ -159,7 +159,7 @@
 
 typedef void (*dct_t)(int16_t *src, int32_t *dst, intptr_t stride);
 typedef void (*idct_t)(int32_t *src, int16_t *dst, intptr_t stride);
-typedef void (*denoiseDct_t)(coeff_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff);
+typedef void (*denoiseDct_t)(int32_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff);
 
 typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
 typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
diff -r 40e242e316b9 -r b2890a7555ac source/common/quant.cpp
--- a/source/common/quant.cpp	Tue Sep 02 18:15:36 2014 +0530
+++ b/source/common/quant.cpp	Tue Sep 02 18:49:32 2014 +0530
@@ -163,7 +163,7 @@
     m_useRDOQ = useRDOQ;
     m_psyRdoqScale = (int64_t)(psyScale * 256.0);
     m_scalingList = &scalingList;
-    m_resiDctCoeff = X265_MALLOC(coeff_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
+    m_resiDctCoeff = X265_MALLOC(int32_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
     m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
     m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
     
@@ -315,23 +315,7 @@
     if (cu->getCUTransquantBypass(absPartIdx))
     {
         X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
-        /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
-         * optimize to take coefficients as int16_t*, it will be cleanse.*/
-        int numCoeff = 1 << log2TrSize * 2;
-        ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
-        for (int i = 0; i < numCoeff; i++)
-        {
-             qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
-        }
-        int numSign = primitives.copy_cnt[log2TrSize - 2](qCoeff, residual, stride);
-
-        /* This section of code is to safely convert int16_t coefficients to int32_t, once the caller function is
-         * optimize to take coefficients as int16_t*, it will be cleanse.*/
-        for (int i = 0; i < numCoeff; i++)
-        {
-            coeff[i] = qCoeff[i];
-        }
-        return numSign;
+        return primitives.copy_cnt[log2TrSize - 2](coeff, residual, stride);
     }
 
     bool isLuma  = ttype == TEXT_LUMA;
@@ -385,16 +369,7 @@
 
     if (m_useRDOQ)
     {
-        /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
-         * optimize to take coefficients as int16_t*, it will be cleanse.*/
-        int numCoeff = 1 << log2TrSize * 2;
-        assert(numCoeff <= 1024);
-        ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
-        for (int i = 0; i < numCoeff; i++)
-        {
-            qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
-        }
-        return rdoQuant(cu, qCoeff, log2TrSize, ttype, absPartIdx, usePsy);
+        return rdoQuant(cu, coeff, log2TrSize, ttype, absPartIdx, usePsy);
     }
     else
     {
@@ -409,42 +384,13 @@
         int add = (cu->m_slice->m_sliceType == I_SLICE ? 171 : 85) << (qbits - 9);
         int numCoeff = 1 << log2TrSize * 2;
 
-        /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
-         * optimize to take coefficients as int16_t*, it will be cleanse.*/
-        ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
-        for (int i = 0; i < numCoeff; i++)
-        {
-             qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
-        }
-        uint32_t numSig = primitives.quant(m_resiDctCoeff, quantCoeff, deltaU, qCoeff, qbits, add, numCoeff);
-
-        /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
-         * optimize to take coefficients as int16_t*, it will be cleanse.*/
-        for (int i = 0; i < numCoeff; i++)
-        {
-             coeff[i] = qCoeff[i];
-        }
+        uint32_t numSig = primitives.quant(m_resiDctCoeff, quantCoeff, deltaU, coeff, qbits, add, numCoeff);
 
         if (numSig >= 2 && cu->m_slice->m_pps->bSignHideEnabled)
         {
-           /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
-            * optimize to take coefficients as int16_t*, it will be cleanse.*/
-           ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
-           for (int i = 0; i < numCoeff; i++)
-           {
-               qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
-           }
             TUEntropyCodingParameters codeParams;
             cu->getTUEntropyCodingParameters(codeParams, absPartIdx, log2TrSize, isLuma);
-            uint32_t numSign = signBitHidingHDQ(qCoeff, deltaU, numSig, codeParams);
-
-            /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
-             * optimize to take coefficients as int16_t*, it will be cleanse.*/
-             for (int i = 0; i < numCoeff; i++)
-             {
-                  coeff[i] = qCoeff[i];
-             }
-             return numSign;
+            return signBitHidingHDQ(coeff, deltaU, numSig, codeParams);
         }
         else
             return numSig;
@@ -456,15 +402,7 @@
 {
     if (transQuantBypass)
     {
-        int numCoeff = (1 << (log2TrSize << 1));
-        assert(numCoeff <= 1024);
-        ALIGN_VAR_16(int16_t, qCoeff[1024]);
-        for (int i = 0; i < numCoeff; i++)
-        {
-            qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
-        }
-
-        primitives.copy_shl[log2TrSize - 2](residual, qCoeff, stride, 0);
+        primitives.copy_shl[log2TrSize - 2](residual, coeff, stride, 0);
         return;
     }
 
@@ -475,25 +413,16 @@
     int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
     int numCoeff = 1 << log2TrSize * 2;
 
-    /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
-     * optimize to take coefficients as int16_t*, it will be cleanse.*/
-    assert(numCoeff <= 1024);
-    ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
-    for (int i = 0; i < numCoeff; i++)
-    {
-        qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
-    }
-
     if (m_scalingList->m_bEnabled)
     {
         int scalingListType = (bIntra ? 0 : 3) + ttype;
         int32_t *dequantCoef = m_scalingList->m_dequantCoef[log2TrSize - 2][scalingListType][rem];
-        primitives.dequant_scaling(qCoeff, dequantCoef, m_resiDctCoeff, numCoeff, per, shift);
+        primitives.dequant_scaling(coeff, dequantCoef, m_resiDctCoeff, numCoeff, per, shift);
     }
     else
     {
         int scale = m_scalingList->s_invQuantScales[rem] << per;
-        primitives.dequant_normal(qCoeff, m_resiDctCoeff, numCoeff, scale, shift);
+        primitives.dequant_normal(coeff, m_resiDctCoeff, numCoeff, scale, shift);
     }
 
     if (useTransformSkip)
diff -r 40e242e316b9 -r b2890a7555ac source/common/quant.h
--- a/source/common/quant.h	Tue Sep 02 18:15:36 2014 +0530
+++ b/source/common/quant.h	Tue Sep 02 18:49:32 2014 +0530
@@ -74,8 +74,8 @@
 
     bool               m_useRDOQ;
     int64_t            m_psyRdoqScale;
-    coeff_t*           m_resiDctCoeff;
-    coeff_t*           m_fencDctCoeff;
+    int32_t*           m_resiDctCoeff;
+    int32_t*           m_fencDctCoeff;
     int16_t*           m_fencShortBuf;
 
     enum { IEP_RATE = 32768 }; /* FIX15 cost of an equal probable bit */
diff -r 40e242e316b9 -r b2890a7555ac source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Tue Sep 02 18:15:36 2014 +0530
+++ b/source/encoder/entropy.cpp	Tue Sep 02 18:49:32 2014 +0530
@@ -1598,16 +1598,8 @@
 {
     uint32_t trSize = 1 << log2TrSize;
 
-    int numCoeff = (1 << (log2TrSize << 1));
-    assert(numCoeff <= 1024);
-    ALIGN_VAR_16(int16_t, qCoeff[1024]);
-    for (int i = 0; i < numCoeff; i++)
-    {
-        qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
-    }
-
     // compute number of significant coefficients
-    uint32_t numSig = primitives.count_nonzero(qCoeff, (1 << (log2TrSize << 1)));
+    uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
 
     X265_CHECK(numSig > 0, "cbf check fail\n");