[x265] [PATCH] adapt x264 style lambda tables [CHANGES OUTPUTS]

Tue Apr 15 22:22:05 CEST 2014

# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1395899136 18000
#      Thu Mar 27 00:45:36 2014 -0500
# Node ID 38ff5a3ce176bbfe999e0b6a1eaf10b61d81965c
# Parent  0b696c7f46f261f66ecebbe3280b17c01165f3d0
adapt x264 style lambda tables [CHANGES OUTPUTS]

initLambda() in TComRom.cpp is passed the scale factor that is used to adjust
lambda2 values. It uses 0.85 right now, which is similar to x264's value (0.9),
and seems to be optimal for HEVC based on our tests.

These lambda tables seem to help most at low QPs, allowing the encoder to get
nearly lossless quality at high bitrates.

diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/Lib/TLibCommon/TComRdCost.h

--- a/source/Lib/TLibCommon/TComRdCost.h	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/Lib/TLibCommon/TComRdCost.h	Thu Mar 27 00:45:36 2014 -0500
@@ -54,10 +54,6 @@
 {
 private:
 
-    double    m_lambda2;
-
-    double    m_lambda;
-
     uint64_t  m_lambdaMotionSSE;  // m_lambda2 w/ 16 bits of fraction
 
     uint64_t  m_lambdaMotionSAD;  // m_lambda w/ 16 bits of fraction
@@ -68,12 +64,10 @@
 
 public:
 
-    void setLambda(double lambda)
+    void setLambda(double lambda2, double lambda)
     {
-        m_lambda2         = lambda;
-        m_lambda          = sqrt(m_lambda2);
-        m_lambdaMotionSAD = (uint64_t)floor(65536.0 * m_lambda);
-        m_lambdaMotionSSE = (uint64_t)floor(65536.0 * m_lambda2);
+        m_lambdaMotionSSE = (uint64_t)floor(65536.0 * lambda2);
+        m_lambdaMotionSAD = (uint64_t)floor(65536.0 * lambda);
     }
 
     void setCbDistortionWeight(double cbDistortionWeight)
@@ -95,8 +89,6 @@
     inline uint32_t scaleChromaDistCb(uint32_t dist)           { return ((dist * m_cbDistortionWeight) + 128) >> 8; }
 
     inline uint32_t scaleChromaDistCr(uint32_t dist)           { return ((dist * m_crDistortionWeight) + 128) >> 8; }
-
-    inline double   getSADLambda() const                       { return m_lambda; }
 };
 }
 //! \}
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.cpp	Thu Mar 27 00:45:36 2014 -0500
@@ -126,6 +126,20 @@
     }
 };
 
+double x265_lambda_tab[MAX_MAX_QP + 1];
+double x265_lambda2_tab[MAX_MAX_QP + 1];
+
+static void initLambda(double scale)
+{
+    for (int q = 0; q <= MAX_MAX_QP; q++)
+    {
+        double lambda = pow(2, (double)q / 6 - 2);
+
+        x265_lambda_tab[q]  = lambda;
+        x265_lambda2_tab[q] = pow(lambda, 2) * scale;
+    }
+}
+
 static int initialized /* = 0 */;
 
 // initialize ROM variables
@@ -134,6 +148,8 @@
     if (ATOMIC_CAS32(&initialized, 0, 1) == 1)
         return;
 
+    initLambda(0.85);
+
     int i, c;
 
     // g_aucConvertToBit[ x ]: log2(x/4), if x=4 -> 0, x=8 -> 1, x=16 -> 2, ...
@@ -477,32 +493,6 @@
 const int g_winUnitX[] = { 1, 2, 2, 1 };
 const int g_winUnitY[] = { 1, 2, 1, 1 };
 
-const double x265_lambda2_tab_I[MAX_MAX_QP + 1] =
-{
-    0.012265625, 0.022265625, 0.028052813, 0.035344375, 0.04453125, 0.056105938, 0.070689063, 0.0890625,
-    0.112211563, 0.141377813, 0.178125, 0.224423438, 0.282755938, 0.35625, 0.448846875,
-    0.565511563, 0.7125, 0.89769375, 1.131023125, 1.425, 1.7953875, 2.262046563, 2.85,
-    3.590775, 4.524093125, 5.7, 7.7800125, 10.55621688, 14.25, 19.1508, 25.63652688, 34.2,
-    45.48315, 60.32124, 79.8, 105.3293997, 138.7388519, 182.4, 229.8095994, 289.5419519, 364.8,
-    459.6191991, 579.0839038, 729.6, 919.2383981, 1158.167808, 1459.2, 1838.476796, 2316.335615,
-    2918.4, 3676.953592, 4632.67123, 5836.799769, 7353.906601, 9265.341359, 11673.59815, 14707.81146,
-    18530.68052, 23347.19353, 29415.61942, 37061.35663, 46694.38151, 58831.23184, 74122.70446,
-    93388.75192, 117662.4497, 148245.3913, 186777.4817, 235324.8715, 296490.7474
-};
-
-const double x265_lambda2_non_I[MAX_MAX_QP + 1] =
-{
-    0.05231, 0.060686, 0.07646, 0.096333333, 0.151715667, 0.15292, 0.192666667, 0.242745, 0.382299,
-    0.385333333, 0.485489333, 0.611678333, 0.963333333, 0.970979, 1.223356667, 1.541333333, 2.427447667,
-    2.446714, 3.082666667, 3.883916667, 6.116785, 6.165333333, 7.767833333, 9.786856667, 15.41333333,
-    16.57137733, 22.183542, 29.5936, 39.357022, 52.19656867, 69.05173333, 91.14257667, 150.0651347,
-    157.8325333, 207.1422197, 271.4221573, 443.904, 447.4271947, 563.722941, 710.2464, 1118.567987,
-    1127.445883, 1420.4928, 1789.70878, 2818.614706, 2840.9856, 3579.41756, 4509.78353, 7102.464,
-    7158.83512, 9019.56706, 11363.9424, 14317.66967, 18039.13269, 22727.8821, 28635.33594, 36078.2611,
-    45455.7588, 57270.66508, 72156.51362, 90911.5068, 114541.3166, 144313.0101, 181822.992, 229082.6059,
-    288625.9859, 363645.9408, 458165.1574, 577251.9032, 727291.7952
-};
-
 const uint8_t g_lpsTable[64][4] =
 {
     { 128, 176, 208, 240 },
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.h	Thu Mar 27 00:45:36 2014 -0500
@@ -266,13 +266,14 @@
 extern const uint32_t g_scalingListSize[SCALING_LIST_SIZE_NUM];
 extern const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM];
 extern const uint32_t g_scalingListNum[SCALING_LIST_SIZE_NUM];
-//! \}
 
 // Map Luma samples to chroma samples
 extern const int g_winUnitX[MAX_CHROMA_FORMAT_IDC + 1];
 extern const int g_winUnitY[MAX_CHROMA_FORMAT_IDC + 1];
-extern const double x265_lambda2_tab_I[MAX_MAX_QP + 1];
-extern const double x265_lambda2_non_I[MAX_MAX_QP + 1];
+
+extern double x265_lambda_tab[MAX_MAX_QP + 1];
+extern double x265_lambda2_tab[MAX_MAX_QP + 1];
+
 // CABAC tables
 extern const uint8_t g_lpsTable[64][4];
 extern const uint8_t x265_exp2_lut[64];
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Thu Mar 27 00:45:36 2014 -0500
@@ -150,10 +150,16 @@
     return false;
 }
 
-void TEncSearch::setQPLambda(int QP, double lambdaLuma, double lambdaChroma)
+void TEncSearch::setQP(int qp, double crWeight, double cbWeight)
 {
-    m_trQuant->setLambda(lambdaLuma, lambdaChroma);
-    m_me.setQP(QP);
+    double lambda2 = x265_lambda2_tab[qp];
+    double chromaLambda = lambda2 / crWeight;
+
+    m_me.setQP(qp);
+    m_trQuant->setLambda(lambda2, chromaLambda);
+    m_rdCost->setLambda(lambda2, x265_lambda_tab[qp]);
+    m_rdCost->setCbDistortionWeight(cbWeight);
+    m_rdCost->setCrDistortionWeight(crWeight);
 }
 
 void TEncSearch::xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Thu Mar 27 00:45:36 2014 -0500
@@ -142,7 +142,7 @@
 
     void setRDGoOnSbacCoder(TEncSbac* rdGoOnSbacCoder) { m_rdGoOnSbacCoder = rdGoOnSbacCoder; }
 
-    void setQPLambda(int QP, double lambdaLuma, double lambdaChroma);
+    void setQP(int QP, double crWeight, double cbWeight);
 
     TEncSearch();
     virtual ~TEncSearch();
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/encoder/bitcost.cpp
--- a/source/encoder/bitcost.cpp	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/encoder/bitcost.cpp	Thu Mar 27 00:45:36 2014 -0500
@@ -42,7 +42,7 @@
 
             CalculateLogs();
             s_costs[qp] = new uint16_t[2 * BC_MAX_MV] + BC_MAX_MV;
-            double lambda = sqrt(x265_lambda2_non_I[qp]);
+            double lambda = x265_lambda_tab[qp];
 
             // estimate same cost for negative and positive MVD
             for (int i = 0; i < BC_MAX_MV; i++)
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/encoder/frameencoder.cpp	Thu Mar 27 00:45:36 2014 -0500
@@ -329,36 +329,20 @@
 void FrameEncoder::setLambda(int qp, int row)
 {
     TComSlice*  slice = m_pic->getSlice();
-    TComPicYuv* fenc  = slice->getPic()->getPicYuvOrg();
     int         chFmt = slice->getSPS()->getChromaFormatIdc();
 
-    double lambda = 0;
-
-    if (m_pic->getSlice()->getSliceType() == I_SLICE)
-    {
-        lambda = X265_MAX(1, x265_lambda2_tab_I[qp]);
-    }
-    else
-    {
-        lambda = X265_MAX(1, x265_lambda2_non_I[qp]);
-    }
-
     // for RDO
     // in RdCost there is only one lambda because the luma and chroma bits are not separated,
     // instead we weight the distortion of chroma.
     int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
     int qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
     double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset
+
     chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() + slice->getSliceQpDeltaCr();
     qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
     double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset
-    double chromaLambda = lambda / crWeight;
 
-    m_rows[row].m_search.setQPLambda(qp, lambda, chromaLambda);
-    m_rows[row].m_search.m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
-    m_rows[row].m_rdCost.setLambda(lambda);
-    m_rows[row].m_rdCost.setCbDistortionWeight(cbWeight);
-    m_rows[row].m_rdCost.setCrDistortionWeight(crWeight);
+    m_rows[row].m_search.setQP(qp, crWeight, cbWeight);
 }
 
 void FrameEncoder::compressFrame()
@@ -392,15 +376,6 @@
         m_nalCount += getStreamHeaders(m_nalList + m_nalCount);
 
     int qp = slice->getSliceQp();
-    double lambda = 0;
-    if (slice->getSliceType() == I_SLICE)
-    {
-        lambda = X265_MAX(1, x265_lambda2_tab_I[qp]);
-    }
-    else
-    {
-        lambda = X265_MAX(1, x265_lambda2_non_I[qp]);
-    }
 
     // for RDO
     // in RdCost there is only one lambda because the luma and chroma bits are not separated,
@@ -409,9 +384,12 @@
     int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
     qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
     double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset
+
     chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() + slice->getSliceQpDeltaCr();
     qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
     double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset
+
+    double lambda = x265_lambda2_tab[qp];
     double chromaLambda = lambda / crWeight;
 
     // NOTE: set SAO lambda every Frame
@@ -421,11 +399,8 @@
     TComPicYuv *fenc = slice->getPic()->getPicYuvOrg();
     for (int i = 0; i < m_numRows; i++)
     {
-        m_rows[i].m_search.setQPLambda(qp, lambda, chromaLambda);
         m_rows[i].m_search.m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
-        m_rows[i].m_rdCost.setLambda(lambda);
-        m_rows[i].m_rdCost.setCbDistortionWeight(cbWeight);
-        m_rows[i].m_rdCost.setCrDistortionWeight(crWeight);
+        m_rows[i].m_search.setQP(qp, crWeight, cbWeight);
     }
 
     m_frameFilter.m_sao.lumaLambda = lambda;
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/encoder/slicetype.h
--- a/source/encoder/slicetype.h	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/encoder/slicetype.h	Thu Mar 27 00:45:36 2014 -0500
@@ -70,7 +70,7 @@
         me.setSubpelRefine(1);
         predictions = X265_MALLOC(pixel, 35 * 8 * 8);
         merange = 16;
-        lookAheadLambda = (int)x265_lambda2_non_I[X265_LOOKAHEAD_QP];
+        lookAheadLambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
     }
 
     ~EstimateRow()
diff -r 0b696c7f46f2 -r 38ff5a3ce176 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Tue Apr 15 14:07:33 2014 -0500
+++ b/source/encoder/weightPrediction.cpp	Thu Mar 27 00:45:36 2014 -0500
@@ -238,7 +238,7 @@
         return;
     pixel *weightTemp = mcbuf + fencYuv->getStride() * fencYuv->getHeight();
 
-    int lambda = (int)x265_lambda2_non_I[X265_LOOKAHEAD_QP];
+    int lambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
     int curPoc = slice.getPOC();
     const float epsilon = 1.f / 128.f;