[x265] [PATCH RFC2] adapt psy-rd from x264

Wed May 14 23:02:10 CEST 2014

# HG changeset patch
# User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
# Date 1399547936 -19800
#      Thu May 08 16:48:56 2014 +0530
# Node ID 53c9b4e2e16e5e60f3d7465cb858acdb54fbc929
# Parent  890b34705c95e934a5e47f37b08d46e4e367157e
adapt psy-rd from x264

In this initial implementation, we only use sa8d to estimate the energy of the
source and reconstructed blocks. This version removes the DC component of the
sa8d measured energy, and more rdo checks were updated to use psy-rdo

diff -r 890b34705c95 -r 53c9b4e2e16e doc/reST/cli.rst

--- a/doc/reST/cli.rst	Wed May 14 14:13:08 2014 +0900
+++ b/doc/reST/cli.rst	Thu May 08 16:48:56 2014 +0530
@@ -619,6 +619,12 @@
 
 	**Range of values:** 0: least .. 6: full RDO analysis
 
+.. option:: --psy-rd <float>
+
+	Influence rate distortion optimizations to try to preserve the
+	energy of the source image in the encoded image, at the expense of
+	compression efficiency. Default 1.0
+
 .. option:: --signhide, --no-signhide
 
 	Hide sign bit of one coeff per TU (rdo). Default enabled
diff -r 890b34705c95 -r 53c9b4e2e16e source/Lib/TLibCommon/TComRdCost.h
--- a/source/Lib/TLibCommon/TComRdCost.h	Wed May 14 14:13:08 2014 +0900
+++ b/source/Lib/TLibCommon/TComRdCost.h	Thu May 08 16:48:56 2014 +0530
@@ -62,8 +62,12 @@
 
     uint64_t  m_crDistortionWeight;
 
+    uint64_t  m_psyRdScale;            // Psy RD strength w/ 8 bits of fraction
+
 public:
 
+    static const pixel zeroPel[MAX_CU_SIZE * MAX_CU_SIZE];
+
     void setLambda(double lambda2, double lambda)
     {
         m_lambdaMotionSSE = (uint64_t)floor(256.0 * lambda2);
@@ -80,6 +84,16 @@
         m_crDistortionWeight = (uint64_t)floor(256.0 * crDistortionWeight);
     }
 
+    void setPsyRdScale(double scale)
+    {
+        m_psyRdScale = (uint64_t)floor(256.0 * scale);
+    }
+
+    inline bool psyRdEnabled() const
+    {
+        return !!m_psyRdScale;
+    }
+
     inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits)
     {
         X265_CHECK(abs(distortion + ((bits * m_lambdaMotionSSE + 128) >> 8)) -
@@ -88,6 +102,31 @@
         return distortion + ((bits * m_lambdaMotionSSE + 128) >> 8);
     }
 
+    /* return the difference in energy between the source block and the recon block */
+    inline uint32_t psyCost(int size, pixel *source, intptr_t sstride, pixel *recon, intptr_t rstride)
+    {
+        int width, height;
+        width = height = 1 << (size * 2);
+        int part = partitionFromSizes(width, height);
+        int dc = 2 * primitives.sad[part](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) / (width * height);
+        int sEnergy = primitives.sa8d[size](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) - dc;
+
+        dc = 2 * primitives.sad[part](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) / (width * height);
+        int rEnergy = primitives.sa8d[size](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) - dc;
+
+        return abs(sEnergy - rEnergy);
+    }
+
+    /* return the RD cost of this prediction, including the effect of psy-rd */
+    inline uint64_t calcPsyRdCost(uint32_t distortion, uint32_t bits, uint32_t psycost)
+    {
+        uint64_t tot = bits + (((psycost * m_psyRdScale) + 128) >> 8);
+        X265_CHECK(abs(distortion + ((tot * m_lambdaMotionSSE + 128) >> 8)) -
+                      (distortion + (float)tot * m_lambdaMotionSSE / 256.0) < 2,
+                   "calcPsyRdCost wrap detected dist: %d, tot %d, lambda: %d\n", distortion, tot, (int)m_lambdaMotionSSE);
+        return distortion + ((tot * m_lambdaMotionSSE + 128) >> 8);
+    }
+
     inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits)
     {
         X265_CHECK(abs(sadCost + ((bits * m_lambdaMotionSAD + 128) >> 8)) -
diff -r 890b34705c95 -r 53c9b4e2e16e source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Wed May 14 14:13:08 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Thu May 08 16:48:56 2014 +0530
@@ -1392,9 +1392,20 @@
     m_entropyCoder->encodeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), outTempCU->getCUSize(0), bCodeDQP);
 
     m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
+    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
 
-    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
-    outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
+    if (m_rdCost->psyRdEnabled())
+    {
+        int part = g_convertToBit[outTempCU->getCUSize(0)];
+        TComPicYuv *recon = outTempCU->getPic()->getPicYuvRec();
+        uint32_t psyRdCost = m_rdCost->psyCost(part, m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
+                                                     recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
+        outTempCU->m_totalCost = m_rdCost->calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, psyRdCost);
+    }
+    else
+    {
+        outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
+    }
 
     xCheckDQP(outTempCU);
     xCheckBestMode(outBestCU, outTempCU, depth);
@@ -1429,12 +1440,21 @@
     // Encode Coefficients
     bool bCodeDQP = getdQPFlag();
     m_entropyCoder->encodeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), outTempCU->getCUSize(0), bCodeDQP);
+    m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
+    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
 
-    m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
-
-    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
-    outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
-
+    if (m_rdCost->psyRdEnabled())
+    {
+        int part = g_convertToBit[outTempCU->getCUSize(0)];
+        TComPicYuv *recon = outTempCU->getPic()->getPicYuvRec();
+        uint32_t psyRdCost = m_rdCost->psyCost(part, m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
+            recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
+        outTempCU->m_totalCost = m_rdCost->calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, psyRdCost);
+    }
+    else
+    {
+        outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
+    }
     xCheckDQP(outTempCU);
     xCheckBestMode(outBestCU, outTempCU, depth);
 }
diff -r 890b34705c95 -r 53c9b4e2e16e source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Wed May 14 14:13:08 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Thu May 08 16:48:56 2014 +0530
@@ -47,8 +47,7 @@
 
 using namespace x265;
 
-//! \ingroup TLibEncoder
-//! \{
+ALIGN_VAR_32(const pixel, TComRdCost::zeroPel[MAX_CU_SIZE * MAX_CU_SIZE]) = { 0 };
 
 TEncSearch::TEncSearch()
 {
@@ -2682,7 +2681,17 @@
 
         cu->m_totalBits       = bits;
         cu->m_totalDistortion = distortion;
-        cu->m_totalCost       = m_rdCost->calcRdCost(distortion, bits);
+        if (m_rdCost->psyRdEnabled())
+        {
+            int size = g_convertToBit[cu->getCUSize(0)];
+            uint32_t psyRdCost = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+                                                         outReconYuv->getLumaAddr(), outReconYuv->getStride());
+            cu->m_totalCost = m_rdCost->calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, psyRdCost);
+        }
+        else
+        {
+            cu->m_totalCost = m_rdCost->calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
+        }
 
         m_rdGoOnSbacCoder->store(m_rdSbacCoders[cu->getDepth(0)][CI_TEMP_BEST]);
 
@@ -2767,8 +2776,17 @@
     part = partitionFromSizes(width >> cu->getHorzChromaShift(), height >> cu->getVertChromaShift());
     bdist += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
     bdist += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
-    bcost = m_rdCost->calcRdCost(bdist, bestBits);
-
+    if (m_rdCost->psyRdEnabled())
+    {
+        int size = g_convertToBit[cu->getCUSize(0)];
+        uint32_t psyRdCost = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+            outReconYuv->getLumaAddr(), outReconYuv->getStride());
+        bcost = m_rdCost->calcPsyRdCost(bdist, bestBits, psyRdCost);
+    }
+    else
+    {
+        bcost = m_rdCost->calcRdCost(bdist, bestBits);
+    }
     cu->m_totalBits       = bestBits;
     cu->m_totalDistortion = bdist;
     cu->m_totalCost       = bcost;
@@ -3153,8 +3171,6 @@
 
         const uint32_t numSamplesLuma = 1 << (trSizeLog2 << 1);
 
-        ALIGN_VAR_32(static const pixel, zeroPel[MAX_CU_SIZE * MAX_CU_SIZE]) = { 0 };
-
         for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)
         {
             minCost[TEXT_LUMA][subTUIndex]     = MAX_INT64;
@@ -3163,7 +3179,7 @@
         }
 
         int partSize = partitionFromSizes(trWidth, trHeight);
-        uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, (pixel*)zeroPel, trWidth);
+        uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, (pixel*)TComRdCost::zeroPel, trWidth);
 
         if (outZeroDist)
         {
@@ -3247,7 +3263,7 @@
             {
                 uint32_t subTUBufferOffset = widthC * heightC * tuIterator.m_section;
 
-                distU = m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth, (pixel*)zeroPel, widthC));
+                distU = m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
 
                 if (outZeroDist)
                 {
@@ -3317,7 +3333,7 @@
                     primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0);
                 }
 
-                distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth, (pixel*)zeroPel, widthC));
+                distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
                 if (outZeroDist)
                 {
                     *outZeroDist += distV;
diff -r 890b34705c95 -r 53c9b4e2e16e source/common/param.cpp
--- a/source/common/param.cpp	Wed May 14 14:13:08 2014 +0900
+++ b/source/common/param.cpp	Thu May 08 16:48:56 2014 +0530
@@ -156,6 +156,7 @@
     param->cbQpOffset = 0;
     param->crQpOffset = 0;
     param->rdPenalty = 0;
+    param->psyRd = 1.0;
 
     /* Rate control options */
     param->rc.vbvMaxBitrate = 0;
@@ -588,6 +589,7 @@
     OPT("cbqpoffs") p->cbQpOffset = atoi(value);
     OPT("crqpoffs") p->crQpOffset = atoi(value);
     OPT("rd") p->rdLevel = atoi(value);
+    OPT("psy-rd") p->psyRd = atof(value);
     OPT("signhide") p->bEnableSignHiding = atobool(value);
     OPT("lft") p->bEnableLoopFilter = atobool(value);
     OPT("sao") p->bEnableSAO = atobool(value);
@@ -915,7 +917,7 @@
           "Aq-Mode is out of range");
     CHECK(param->rc.aqStrength < 0 || param->rc.aqStrength > 3,
           "Aq-Strength is out of range");
-
+    CHECK(param->psyRd < 0 || 2.0 < param->psyRd, "Psy-rd strength must be between 0 and 2.0");
     CHECK(param->bEnableWavefront < 0, "WaveFrontSynchro cannot be negative");
     CHECK((param->vui.aspectRatioIdc < 0
            || param->vui.aspectRatioIdc > 16)
@@ -1061,6 +1063,7 @@
     x265_log(param, X265_LOG_INFO, "Lookahead / bframes / badapt        : %d / %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
     x265_log(param, X265_LOG_INFO, "b-pyramid / weightp / weightb / refs: %d / %d / %d / %d\n",
              param->bBPyramid, param->bEnableWeightedPred, param->bEnableWeightedBiPred, param->maxNumReferences);
+
     switch (param->rc.rateControlMode)
     {
     case X265_RC_ABR:
@@ -1091,6 +1094,7 @@
     TOOLOPT(param->bEnableConstrainedIntra, "cip");
     TOOLOPT(param->bEnableEarlySkip, "esd");
     fprintf(stderr, "rd=%d ", param->rdLevel);
+    fprintf(stderr, "psyrd=%.1lf ", param->psyRd);
 
     TOOLOPT(param->bEnableLoopFilter, "lft");
     if (param->bEnableSAO)
diff -r 890b34705c95 -r 53c9b4e2e16e source/encoder/cturow.cpp
--- a/source/encoder/cturow.cpp	Wed May 14 14:13:08 2014 +0900
+++ b/source/encoder/cturow.cpp	Thu May 08 16:48:56 2014 +0530
@@ -34,7 +34,7 @@
     m_rdGoOnSbacCoder.init(&m_rdGoOnBinCodersCABAC);
     m_sbacCoder.init(&m_binCoderCABAC);
     m_trQuant.init(1 << top->m_quadtreeTULog2MaxSize, top->bEnableRDOQ, top->bEnableRDOQTS, top->param->bEnableTSkipFast);
-
+    m_rdCost.setPsyRdScale(top->param->rdLevel >= 4 ? top->param->psyRd : 0);
     m_rdSbacCoders = new TEncSbac * *[g_maxCUDepth + 1];
     m_binCodersCABAC = new TEncBinCABAC * *[g_maxCUDepth + 1];
 
diff -r 890b34705c95 -r 53c9b4e2e16e source/x265.cpp
--- a/source/x265.cpp	Wed May 14 14:13:08 2014 +0900
+++ b/source/x265.cpp	Thu May 08 16:48:56 2014 +0530
@@ -140,6 +140,7 @@
     { "cbqpoffs",       required_argument, NULL, 0 },
     { "crqpoffs",       required_argument, NULL, 0 },
     { "rd",             required_argument, NULL, 0 },
+    { "psy-rd",         required_argument, NULL, 0 },
     { "no-signhide",          no_argument, NULL, 0 },
     { "signhide",             no_argument, NULL, 0 },
     { "no-lft",               no_argument, NULL, 0 },
@@ -379,6 +380,7 @@
     H0("   --cbqpoffs <integer>          Chroma Cb QP Offset. Default %d\n", param->cbQpOffset);
     H0("   --crqpoffs <integer>          Chroma Cr QP Offset. Default %d\n", param->crQpOffset);
     H0("   --rd <0..6>                   Level of RD in mode decision 0:least....6:full RDO. Default %d\n", param->rdLevel);
+    H0("   --psy-rd <0..2.0>             Strength of psycho-visual optimization. Default %f\n", param->psyRd);
     H0("   --[no-]signhide               Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding));
     H0("\nLoop filters (deblock and SAO):\n");
     H0("   --[no-]lft                    Enable Deblocking Loop Filter. Default %s\n", OPT(param->bEnableLoopFilter));
diff -r 890b34705c95 -r 53c9b4e2e16e source/x265.h
--- a/source/x265.h	Wed May 14 14:13:08 2014 +0900
+++ b/source/x265.h	Thu May 08 16:48:56 2014 +0530
@@ -579,6 +579,11 @@
      * efficiency at a major cost of performance. Default is no RDO (0) */
     int       rdLevel;
 
+    /* Psycho-visual rate-distortion strength. Only has an effect in presets
+     * which use RDO. It makes mode decision favor options which preserve the
+     * energy of the source, at the cost of lost compression. Default 1.0 */
+    double     psyRd;
+
     /*== Coding tools ==*/
 
     /* Enable the implicit signaling of the sign bit of the last coefficient of