[x265-commits] [x265] psy-rd: add sad_square primitive array to optimize psy-rd

Steve Borho steve at borho.org
Fri May 16 11:40:04 CEST 2014


details:   http://hg.videolan.org/x265/rev/48af10fff12b
branches:  
changeset: 6868:48af10fff12b
user:      Steve Borho <steve at borho.org>
date:      Fri May 16 10:42:23 2014 +0530
description:
psy-rd: add sad_square primitive array to optimize psy-rd

This fixes the DC component calculation at the same time making the calculation
more efficient.
Subject: [x265] common: make a global X265_LL macro for printing uint64_t

details:   http://hg.videolan.org/x265/rev/5167067ed452
branches:  
changeset: 6869:5167067ed452
user:      Steve Borho <steve at borho.org>
date:      Fri May 16 11:03:21 2014 +0530
description:
common: make a global X265_LL macro for printing uint64_t
Subject: [x265] nits

details:   http://hg.videolan.org/x265/rev/0bd90aaaa3a7
branches:  
changeset: 6870:0bd90aaaa3a7
user:      Steve Borho <steve at borho.org>
date:      Fri May 16 14:52:12 2014 +0530
description:
nits
Subject: [x265] cli: introduce --[no]-b-intra which enables/disables intra modes in B frames

details:   http://hg.videolan.org/x265/rev/7d11f60c5dba
branches:  
changeset: 6871:7d11f60c5dba
user:      Sumalatha Polureddy<sumalatha at multicorewareinc.com>
date:      Fri May 16 14:31:01 2014 +0530
description:
cli: introduce --[no]-b-intra which enables/disables intra modes in B frames

diffstat:

 doc/reST/cli.rst                      |   6 ++++++
 source/CMakeLists.txt                 |   5 +++++
 source/Lib/TLibCommon/TComRdCost.h    |  26 ++++++++++++++------------
 source/Lib/TLibEncoder/TEncCu.cpp     |   8 +++-----
 source/Lib/TLibEncoder/TEncSearch.cpp |   2 +-
 source/common/common.h                |   8 ++++++++
 source/common/param.cpp               |   3 +++
 source/common/primitives.h            |   1 +
 source/common/x86/asm-primitives.cpp  |   6 ++++++
 source/encoder/CMakeLists.txt         |   5 -----
 source/encoder/encoder.cpp            |  22 +++++++---------------
 source/x265.cpp                       |   3 +++
 source/x265.h                         |   6 ++++++
 13 files changed, 63 insertions(+), 38 deletions(-)

diffs (truncated from 339 to 300 lines):

diff -r 7533425d5060 -r 7d11f60c5dba doc/reST/cli.rst
--- a/doc/reST/cli.rst	Fri May 16 08:00:23 2014 +0530
+++ b/doc/reST/cli.rst	Fri May 16 14:31:01 2014 +0530
@@ -414,6 +414,12 @@ Spatial/intra options
 
 	**Values:** 0:disabled 1:RD-penalty 2:maximum
 
+.. option:: --b-intra, --no-b-intra
+
+	Enables the use of intra modes in very slow presets (rdLevel 5 or
+	6). Presets slow to ultrafast do not try intra in B frames
+	regardless of this setting. Default enabled.
+
 .. option:: --tskip, --no-tskip
 
 	Enable intra transform skipping (encode residual as coefficients)
diff -r 7533425d5060 -r 7d11f60c5dba source/CMakeLists.txt
--- a/source/CMakeLists.txt	Fri May 16 08:00:23 2014 +0530
+++ b/source/CMakeLists.txt	Fri May 16 14:31:01 2014 +0530
@@ -103,6 +103,11 @@ if(MSVC)
     endif()
 endif(MSVC)
 
+check_include_files(inttypes.h HAVE_INT_TYPES_H)
+if(HAVE_INT_TYPES_H)
+    add_definitions(-DHAVE_INT_TYPES_H=1)
+endif()
+
 if(INTEL_CXX AND UNIX)
     # treat icpc roughly like gcc
     set(GCC 1)
diff -r 7533425d5060 -r 7d11f60c5dba source/Lib/TLibCommon/TComRdCost.h
--- a/source/Lib/TLibCommon/TComRdCost.h	Fri May 16 08:00:23 2014 +0530
+++ b/source/Lib/TLibCommon/TComRdCost.h	Fri May 16 14:31:01 2014 +0530
@@ -105,15 +105,14 @@ public:
     /* return the difference in energy between the source block and the recon block */
     inline uint32_t psyCost(int size, pixel *source, intptr_t sstride, pixel *recon, intptr_t rstride)
     {
-        int width, height;
-        width = height = 1 << (size * 2);
-        int part = partitionFromSizes(width, height);
-        int dc = 2 * primitives.sad[part](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) / (width * height);
-        int sEnergy = primitives.sa8d[size](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) - dc;
+        int pixelCountShift = (size + 2) * 2 - 1; // width * height / satd-scale(2)
+        int sdc = primitives.sad_square[size](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) >> pixelCountShift;
+        int sEnergy = primitives.sa8d[size](source, sstride, (pixel*)zeroPel, MAX_CU_SIZE) - sdc;
 
-        dc = 2 * primitives.sad[part](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) / (width * height);
-        int rEnergy = primitives.sa8d[size](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) - dc;
+        int rdc = primitives.sad_square[size](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) >> pixelCountShift;
+        int rEnergy = primitives.sa8d[size](recon, rstride, (pixel*)zeroPel, MAX_CU_SIZE) - rdc;
 
+        X265_CHECK(sdc <= sEnergy && rdc <= rEnergy, "DC component of energy is more than total cost\n")
         return abs(sEnergy - rEnergy);
     }
 
@@ -123,7 +122,7 @@ public:
         uint64_t tot = bits + (((psycost * m_psyRdScale) + 128) >> 8);
         X265_CHECK(abs((float)((tot * m_lambdaMotionSSE + 128) >> 8) -
                        (float)tot * m_lambdaMotionSSE / 256.0) < 2,
-                   "calcPsyRdCost wrap detected dist: %d, tot %d, lambda: %d\n", distortion, (int)tot, (int)m_lambdaMotionSSE);
+                   "calcPsyRdCost wrap detected tot: "X265_LL", lambda: "X265_LL"\n", tot, m_lambdaMotionSSE);
         return distortion + ((tot * m_lambdaMotionSSE + 128) >> 8);
     }
 
@@ -131,17 +130,20 @@ public:
     {
         X265_CHECK(abs((float)((bits * m_lambdaMotionSAD + 128) >> 8) -
                        (float)bits * m_lambdaMotionSAD / 256.0) < 2,
-                   "calcRdSADCost wrap detected dist: %d, bits %d, lambda: %d\n", sadCost, bits, (int)m_lambdaMotionSAD);
+                   "calcRdSADCost wrap detected dist: %d, bits %d, lambda: "X265_LL"\n", sadCost, bits, m_lambdaMotionSAD);
         return sadCost + ((bits * m_lambdaMotionSAD + 128) >> 8);
     }
 
-    inline uint32_t getCost(uint32_t bits)                     { return (uint32_t)((bits * m_lambdaMotionSAD + 128) >> 8); }
+    inline uint32_t getCost(uint32_t bits)
+    {
+        return (uint32_t)((bits * m_lambdaMotionSAD + 128) >> 8);
+    }
 
     inline uint32_t scaleChromaDistCb(uint32_t dist)
     {
         X265_CHECK(abs((float)((dist * m_cbDistortionWeight + 128) >> 8) -
                        (float)dist * m_cbDistortionWeight / 256.0) < 2,
-                   "scaleChromaDistCb wrap detected dist: %d, lambda: %d\n", dist, (int)m_cbDistortionWeight);
+                   "scaleChromaDistCb wrap detected dist: %d, lambda: "X265_LL"\n", dist, m_cbDistortionWeight);
         return (uint32_t)(((dist * m_cbDistortionWeight) + 128) >> 8);
     }
 
@@ -149,7 +151,7 @@ public:
     {
         X265_CHECK(abs((float)((dist * m_crDistortionWeight + 128) >> 8) -
                        (float)dist * m_crDistortionWeight / 256.0) < 2,
-                   "scaleChromaDistCr wrap detected dist: %d, lambda: %d\n", dist, (int)m_crDistortionWeight);
+                   "scaleChromaDistCr wrap detected dist: %d, lambda: "X265_LL"\n", dist, m_crDistortionWeight);
         return (uint32_t)(((dist * m_crDistortionWeight) + 128) >> 8);
     }
 };
diff -r 7533425d5060 -r 7d11f60c5dba source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Fri May 16 08:00:23 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Fri May 16 14:31:01 2014 +0530
@@ -887,10 +887,10 @@ void TEncCu::xCompressCU(TComDataCU*& ou
 
             // do normal intra modes
             // speedup for inter frames
-            if (slice->getSliceType() == I_SLICE ||
+            if ((slice->getSliceType() == I_SLICE ||
                 outBestCU->getCbf(0, TEXT_LUMA) != 0   ||
                 outBestCU->getCbf(0, TEXT_CHROMA_U) != 0   ||
-                outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) // avoid very complex intra if it is unlikely
+                outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) && m_param->bIntraInBFrames) // avoid very complex intra if it is unlikely
             {
                 xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_2Nx2N);
                 outTempCU->initEstData(depth);
@@ -1386,7 +1386,6 @@ void TEncCu::xCheckRDCostIntra(TComDataC
     // Encode Coefficients
     bool bCodeDQP = getdQPFlag();
     m_entropyCoder->encodeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), outTempCU->getCUSize(0), bCodeDQP);
-
     m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
     outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
 
@@ -1402,7 +1401,6 @@ void TEncCu::xCheckRDCostIntra(TComDataC
     {
         outTempCU->m_totalCost = m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
     }
-
     xCheckDQP(outTempCU);
     xCheckBestMode(outBestCU, outTempCU, depth);
 }
@@ -1444,7 +1442,7 @@ void TEncCu::xCheckRDCostIntraInInter(TC
         int part = g_convertToBit[outTempCU->getCUSize(0)];
         TComPicYuv *recon = outTempCU->getPic()->getPicYuvRec();
         uint32_t psyRdCost = m_rdCost->psyCost(part, m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
-            recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
+                                                     recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
         outTempCU->m_totalCost = m_rdCost->calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, psyRdCost);
     }
     else
diff -r 7533425d5060 -r 7d11f60c5dba source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Fri May 16 08:00:23 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Fri May 16 14:31:01 2014 +0530
@@ -2780,7 +2780,7 @@ void TEncSearch::encodeResAndCalcRdInter
     {
         int size = g_convertToBit[cu->getCUSize(0)];
         uint32_t psyRdCost = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
-            outReconYuv->getLumaAddr(), outReconYuv->getStride());
+                                                     outReconYuv->getLumaAddr(), outReconYuv->getStride());
         bcost = m_rdCost->calcPsyRdCost(bdist, bestBits, psyRdCost);
     }
     else
diff -r 7533425d5060 -r 7d11f60c5dba source/common/common.h
--- a/source/common/common.h	Fri May 16 08:00:23 2014 +0530
+++ b/source/common/common.h	Fri May 16 14:31:01 2014 +0530
@@ -64,6 +64,14 @@ extern "C" intptr_t x265_stack_align(voi
 
 #endif // if defined(__GNUC__)
 
+#if HAVE_INT_TYPES_H
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#define X265_LL "%" PRIu64
+#else
+#define X265_LL "%lld"
+#endif
+
 /* If compiled with CHECKED_BUILD perform run-time checks and log any that
  * fail, both to stderr and to a file */
 #if CHECKED_BUILD || _DEBUG
diff -r 7533425d5060 -r 7d11f60c5dba source/common/param.cpp
--- a/source/common/param.cpp	Fri May 16 08:00:23 2014 +0530
+++ b/source/common/param.cpp	Fri May 16 14:31:01 2014 +0530
@@ -157,6 +157,7 @@ void x265_param_default(x265_param *para
     param->crQpOffset = 0;
     param->rdPenalty = 0;
     param->psyRd = 0.0;
+    param->bIntraInBFrames = 1;
 
     /* Rate control options */
     param->rc.vbvMaxBitrate = 0;
@@ -384,6 +385,7 @@ int x265_param_default_preset(x265_param
             param->bEnableSAO = 0;
             param->bEnableWeightedPred = 0;
             param->bEnableWeightedBiPred = 0;
+            param->bIntraInBFrames = 0;
         }
         else if (!strcmp(tune, "zerolatency") ||
                  !strcmp(tune, "zero-latency"))
@@ -591,6 +593,7 @@ int x265_param_parse(x265_param *p, cons
     OPT("rd") p->rdLevel = atoi(value);
     OPT("psy-rd") p->psyRd = atof(value);
     OPT("signhide") p->bEnableSignHiding = atobool(value);
+    OPT("b-intra") p->bIntraInBFrames = atobool(value);
     OPT("lft") p->bEnableLoopFilter = atobool(value);
     OPT("sao") p->bEnableSAO = atobool(value);
     OPT("sao-lcu-bounds") p->saoLcuBoundary = atoi(value);
diff -r 7533425d5060 -r 7d11f60c5dba source/common/primitives.h
--- a/source/common/primitives.h	Fri May 16 08:00:23 2014 +0530
+++ b/source/common/primitives.h	Fri May 16 14:31:01 2014 +0530
@@ -190,6 +190,7 @@ struct EncoderPrimitives
     pixelcmp_t      satd[NUM_LUMA_PARTITIONS];       // Sum of Transformed differences (HADAMARD)
     pixelcmp_t      sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
     pixelcmp_t      sa8d[NUM_SQUARE_BLOCKS];         // sa8d primitives for square intra blocks
+    pixelcmp_t      sad_square[NUM_SQUARE_BLOCKS];   // sad primitives for square coding blocks
 
     blockfill_s_t   blockfill_s[NUM_SQUARE_BLOCKS];  // block fill with value
     blockcpy_pp_t   blockcpy_pp;                     // block copy pixel from pixel
diff -r 7533425d5060 -r 7d11f60c5dba source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri May 16 08:00:23 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri May 16 14:31:01 2014 +0530
@@ -1332,6 +1332,12 @@ void Setup_Assembly_Primitives(EncoderPr
     primitives.sa8d[BLOCK_32x32] = primitives.sa8d_inter[LUMA_32x32];
     primitives.sa8d[BLOCK_64x64] = primitives.sa8d_inter[LUMA_64x64];
 
+    primitives.sad_square[BLOCK_4x4]   = primitives.sad[LUMA_4x4];
+    primitives.sad_square[BLOCK_8x8]   = primitives.sad[LUMA_8x8];
+    primitives.sad_square[BLOCK_16x16] = primitives.sad[LUMA_16x16];
+    primitives.sad_square[BLOCK_32x32] = primitives.sad[LUMA_32x32];
+    primitives.sad_square[BLOCK_64x64] = primitives.sad[LUMA_64x64];
+
     // SA8D devolves to SATD for blocks not even multiples of 8x8
     primitives.sa8d_inter[LUMA_4x4]   = primitives.satd[LUMA_4x4];
     primitives.sa8d_inter[LUMA_4x8]   = primitives.satd[LUMA_4x8];
diff -r 7533425d5060 -r 7d11f60c5dba source/encoder/CMakeLists.txt
--- a/source/encoder/CMakeLists.txt	Fri May 16 08:00:23 2014 +0530
+++ b/source/encoder/CMakeLists.txt	Fri May 16 14:31:01 2014 +0530
@@ -39,11 +39,6 @@ if(MSVC)
         "/wd4244 /wd4389 /wd4018 /wd4800")
 endif(MSVC)
 
-check_include_files(inttypes.h HAVE_INT_TYPES_H)
-if(HAVE_INT_TYPES_H)
-    add_definitions(-DHAVE_INT_TYPES_H=1)
-endif()
-
 add_library(encoder OBJECT ../x265.h
     ${LIBENCODER_SRC} ${LIBENCODER_HDR}
     bitcost.cpp bitcost.h
diff -r 7533425d5060 -r 7d11f60c5dba source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri May 16 08:00:23 2014 +0530
+++ b/source/encoder/encoder.cpp	Fri May 16 14:31:01 2014 +0530
@@ -41,14 +41,6 @@
 
 #include "x265.h"
 
-#if HAVE_INT_TYPES_H
-#define __STDC_FORMAT_MACROS
-#include <inttypes.h>
-#define LL "%" PRIu64
-#else
-#define LL "%lld"
-#endif
-
 using namespace x265;
 
 Encoder::Encoder()
@@ -637,39 +629,39 @@ void Encoder::printSummary()
             int len = 0;
             if (sliceType != I_SLICE)
             {
-                len += sprintf(stats + len, "EncCU "LL "%% Merge "LL "%%", encCu, cntSkipCu);
+                len += sprintf(stats + len, "EncCU "X265_LL "%% Merge "X265_LL "%%", encCu, cntSkipCu);
             }
             if (cntInter)
             {
-                len += sprintf(stats + len, " Inter "LL "%%", cntInter);
+                len += sprintf(stats + len, " Inter "X265_LL "%%", cntInter);
                 if (param->bEnableAMP)
-                    len += sprintf(stats + len, "(%dx%d "LL "%% %dx%d "LL "%% %dx%d "LL "%% AMP "LL "%%)",
+                    len += sprintf(stats + len, "(%dx%d "X265_LL "%% %dx%d "X265_LL "%% %dx%d "X265_LL "%% AMP "X265_LL "%%)",
                                    cuSize, cuSize, cuInterDistribution[0],
                                    cuSize / 2, cuSize, cuInterDistribution[2],
                                    cuSize, cuSize / 2, cuInterDistribution[1],
                                    cuInterDistribution[3]);
                 else if (param->bEnableRectInter)
-                    len += sprintf(stats + len, "(%dx%d "LL "%% %dx%d "LL "%% %dx%d "LL "%%)",
+                    len += sprintf(stats + len, "(%dx%d "X265_LL "%% %dx%d "X265_LL "%% %dx%d "X265_LL "%%)",
                                    cuSize, cuSize, cuInterDistribution[0],
                                    cuSize / 2, cuSize, cuInterDistribution[2],
                                    cuSize, cuSize / 2, cuInterDistribution[1]);
             }
             if (cntIntra)
             {
-                len += sprintf(stats + len, " Intra "LL "%%(DC "LL "%% P "LL "%% Ang "LL "%%",
+                len += sprintf(stats + len, " Intra "X265_LL "%%(DC "X265_LL "%% P "X265_LL "%% Ang "X265_LL "%%",
                                cntIntra, cuIntraDistribution[0],
                                cuIntraDistribution[1], cuIntraDistribution[2]);
                 if (sliceType != I_SLICE)
                 {
                     if (depth == (int)g_maxCUDepth - 1)
-                        len += sprintf(stats + len, " %dx%d "LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
+                        len += sprintf(stats + len, " %dx%d "X265_LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
                 }
 
                 len += sprintf(stats + len, ")");
                 if (sliceType == I_SLICE)
                 {
                     if (depth == (int)g_maxCUDepth - 1)
-                        len += sprintf(stats + len, " %dx%d: "LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
+                        len += sprintf(stats + len, " %dx%d: "X265_LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);


More information about the x265-commits mailing list