[x265-commits] [x265] TEncCU: inserting runtime checking for m_totalPsyCost

Deepthi Nandakumar deepthi at multicorewareinc.com
Thu May 29 03:40:16 CEST 2014


details:   http://hg.videolan.org/x265/rev/503a359e874c
branches:  
changeset: 6929:503a359e874c
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Wed May 28 13:55:05 2014 +0530
description:
TEncCU: inserting runtime checking for m_totalPsyCost
Subject: [x265] psy-rd: bug fix in merge mode, use psyCosts for all decisions whenever psy-rd is enabled

details:   http://hg.videolan.org/x265/rev/306d3e6b5185
branches:  
changeset: 6930:306d3e6b5185
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Wed May 28 13:56:38 2014 +0530
description:
psy-rd: bug fix in merge mode, use psyCosts for all decisions whenever psy-rd is enabled
Subject: [x265] TEncCu: nit

details:   http://hg.videolan.org/x265/rev/e6ba953dcb1a
branches:  
changeset: 6931:e6ba953dcb1a
user:      Steve Borho <steve at borho.org>
date:      Wed May 28 09:36:37 2014 -0500
description:
TEncCu: nit
Subject: [x265] rest: add missing --no-repeat-headers option

details:   http://hg.videolan.org/x265/rev/6df1a5bb11fc
branches:  
changeset: 6932:6df1a5bb11fc
user:      Steve Borho <steve at borho.org>
date:      Wed May 28 15:17:56 2014 -0500
description:
rest: add missing --no-repeat-headers option
Subject: [x265] pool: allow thread private data structures

details:   http://hg.videolan.org/x265/rev/77f788046989
branches:  
changeset: 6933:77f788046989
user:      Steve Borho <steve at borho.org>
date:      Wed May 28 17:42:58 2014 -0500
description:
pool: allow thread private data structures

Pass worker's threadId to JobProvider::findJob() and allow job providers to use
this ID as they see fit to keep thread local data. No behavior changes, this is
just laying the plumbing for future optimizations.
Subject: [x265] nits

details:   http://hg.videolan.org/x265/rev/eb236aec3757
branches:  
changeset: 6934:eb236aec3757
user:      Steve Borho <steve at borho.org>
date:      Wed May 28 17:43:30 2014 -0500
description:
nits
Subject: [x265] nits

details:   http://hg.videolan.org/x265/rev/e9776dfd1471
branches:  
changeset: 6935:e9776dfd1471
user:      Steve Borho <steve at borho.org>
date:      Wed May 28 20:39:40 2014 -0500
description:
nits

diffstat:

 doc/reST/cli.rst                      |   2 +-
 source/Lib/TLibEncoder/TEncCu.cpp     |  22 +++++++++++++++++++---
 source/Lib/TLibEncoder/TEncSearch.cpp |   8 ++++----
 source/common/threadpool.cpp          |   2 +-
 source/common/threadpool.h            |   2 +-
 source/common/wavefront.cpp           |   4 ++--
 source/common/wavefront.h             |   4 ++--
 source/encoder/frameencoder.cpp       |   8 ++++----
 source/encoder/frameencoder.h         |   6 +++---
 source/encoder/slicetype.cpp          |   8 ++++----
 source/encoder/slicetype.h            |   4 ++--
 source/test/testpool.cpp              |   4 ++--
 12 files changed, 45 insertions(+), 29 deletions(-)

diffs (300 lines):

diff -r 807ee7f1597b -r e9776dfd1471 doc/reST/cli.rst
--- a/doc/reST/cli.rst	Tue May 27 23:22:21 2014 +0530
+++ b/doc/reST/cli.rst	Wed May 28 20:39:40 2014 -0500
@@ -839,7 +839,7 @@ VUI fields must be manually specified.
 Bitstream options
 =================
 
-.. option:: --repeat-headers
+.. option:: --repeat-headers, --no-repeat-headers
 
 	If enabled, x265 will emit VPS, SPS, and PPS headers with every
 	keyframe. This is intended for use when you do not have a container
diff -r 807ee7f1597b -r e9776dfd1471 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Tue May 27 23:22:21 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Wed May 28 20:39:40 2014 -0500
@@ -706,7 +706,14 @@ void TEncCu::xCompressIntraCU(TComDataCU
 
     X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
     X265_CHECK(outBestCU->getPredictionMode(0) != MODE_NONE, "no best partition mode\n");
-    X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best partition cost\n");
+    if (m_rdCost->psyRdEnabled())
+    {
+        X265_CHECK(outBestCU->m_totalPsyCost != MAX_INT64, "no best partition cost\n");
+    }
+    else
+    {
+        X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best partition cost\n");
+    }
 }
 
 void TEncCu::xCompressCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, PartSize parentSize)
@@ -1058,7 +1065,14 @@ void TEncCu::xCompressCU(TComDataCU*& ou
 
     X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
     X265_CHECK(outBestCU->getPredictionMode(0) != MODE_NONE, "no best partition mode\n");
-    X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best partition cost\n");
+    if (m_rdCost->psyRdEnabled())
+    {
+        X265_CHECK(outBestCU->m_totalPsyCost != MAX_INT64, "no best partition cost\n");
+    }
+    else
+    {
+        X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best partition cost\n");
+    }
 }
 
 /** finish encoding a cu and handle end-of-slice conditions
@@ -1317,7 +1331,9 @@ void TEncCu::xCheckRDCostMerge2Nx2N(TCom
                     outTempCU->setSkipFlagSubParts(!outTempCU->getQtRootCbf(0), 0, depth);
                     int origQP = outTempCU->getQP(0);
                     xCheckDQP(outTempCU);
-                    if (outTempCU->m_totalRDCost < outBestCU->m_totalRDCost)
+                    uint64_t tempCost = m_rdCost->psyRdEnabled() ? outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
+                    uint64_t bestCost = m_rdCost->psyRdEnabled() ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;    
+                    if (tempCost < bestCost)
                     {
                         TComDataCU* tmp = outTempCU;
                         outTempCU = outBestCU;
diff -r 807ee7f1597b -r e9776dfd1471 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Tue May 27 23:22:21 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Wed May 28 20:39:40 2014 -0500
@@ -453,7 +453,7 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
     int lastPos = -1;
     cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
 
-    int      chFmt        = cu->getChromaFormat();
+    int chFmt = cu->getChromaFormat();
     m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
     m_trQuant->selectLambda(TEXT_LUMA);
 
@@ -1959,8 +1959,8 @@ void TEncSearch::estIntraPredChromaQT(TC
                 m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
             }
 
-            uint32_t   bits = xGetIntraBitsQT(cu, initTrDepth, tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, false, true);
-            uint64_t cost  = m_rdCost->calcRdCost(dist, bits);
+            uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, false, true);
+            uint64_t cost = m_rdCost->calcRdCost(dist, bits);
 
             //----- compare -----
             if (cost < bestCost)
@@ -2168,7 +2168,7 @@ void TEncSearch::IPCMSearch(TComDataCU* 
     m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
 
     cu->m_totalBits       = bits;
-    cu->m_totalRDCost       = cost;
+    cu->m_totalRDCost     = cost;
     cu->m_totalDistortion = distortion;
 
     cu->copyToPic(depth, 0, 0);
diff -r 807ee7f1597b -r e9776dfd1471 source/common/threadpool.cpp
--- a/source/common/threadpool.cpp	Tue May 27 23:22:21 2014 +0530
+++ b/source/common/threadpool.cpp	Wed May 28 20:39:40 2014 -0500
@@ -156,7 +156,7 @@ void PoolThread::threadMain()
         {
             // FindJob() may perform actual work and return true.  If
             // it does we restart the job search
-            if (cur->findJob() == true)
+            if (cur->findJob(m_id) == true)
                 break;
 
             cur = cur->m_nextProvider;
diff -r 807ee7f1597b -r e9776dfd1471 source/common/threadpool.h
--- a/source/common/threadpool.h	Tue May 27 23:22:21 2014 +0530
+++ b/source/common/threadpool.h	Wed May 28 20:39:40 2014 -0500
@@ -62,7 +62,7 @@ public:
 
     // Worker threads will call this method to find a job.  Must return true if
     // work was completed.  False if no work was available.
-    virtual bool findJob() = 0;
+    virtual bool findJob(int threadId) = 0;
 
     // All derived objects that call Enqueue *MUST* call flush before allowing
     // their object to be destroyed, otherwise you will see random crashes involving
diff -r 807ee7f1597b -r e9776dfd1471 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp	Tue May 27 23:22:21 2014 +0530
+++ b/source/common/wavefront.cpp	Wed May 28 20:39:40 2014 -0500
@@ -112,7 +112,7 @@ bool WaveFront::dequeueRow(int row)
     return ATOMIC_CAS(&m_internalDependencyBitmap[row >> 6], oldval, newval) == oldval;
 }
 
-bool WaveFront::findJob()
+bool WaveFront::findJob(int threadId)
 {
     unsigned long id;
 
@@ -130,7 +130,7 @@ bool WaveFront::findJob()
             if (ATOMIC_CAS(&m_internalDependencyBitmap[w], oldval, newval) == oldval)
             {
                 // we cleared the bit, process row
-                processRow(w * 64 + id);
+                processRow(w * 64 + id, threadId);
                 return true;
             }
             // some other thread cleared the bit, try another bit
diff -r 807ee7f1597b -r e9776dfd1471 source/common/wavefront.h
--- a/source/common/wavefront.h	Tue May 27 23:22:21 2014 +0530
+++ b/source/common/wavefront.h	Wed May 28 20:39:40 2014 -0500
@@ -87,11 +87,11 @@ public:
     // WaveFront's implementation of JobProvider::findJob. Consults
     // m_queuedBitmap and calls ProcessRow(row) for lowest numbered queued row
     // or returns false
-    bool findJob();
+    bool findJob(int threadId);
 
     // Start or resume encode processing of this row, must be implemented by
     // derived classes.
-    virtual void processRow(int row) = 0;
+    virtual void processRow(int row, int threadId) = 0;
 
     // Returns true if a row above curRow is available for processing.  The processRow()
     // method may call this function periodically and voluntarily exit
diff -r 807ee7f1597b -r e9776dfd1471 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Tue May 27 23:22:21 2014 +0530
+++ b/source/encoder/frameencoder.cpp	Wed May 28 20:39:40 2014 -0500
@@ -1088,13 +1088,13 @@ void FrameEncoder::compressCTURows()
                     }
                 }
 
-                processRow(i * 2 + 0);
+                processRow(i * 2 + 0, -1);
             }
 
             // Filter
             if (i >= m_filterRowDelay)
             {
-                processRow((i - m_filterRowDelay) * 2 + 1);
+                processRow((i - m_filterRowDelay) * 2 + 1, -1);
             }
         }
     }
@@ -1103,7 +1103,7 @@ void FrameEncoder::compressCTURows()
 }
 
 // Called by worker threads
-void FrameEncoder::processRowEncoder(int row)
+void FrameEncoder::processRowEncoder(int row, const int /* threadId */)
 {
     PPAScopeEvent(Thread_ProcessRow);
 
@@ -1124,7 +1124,7 @@ void FrameEncoder::processRowEncoder(int
              * believe the problem is fixed, but are leaving this check in place
              * to prevent crashes in case it is not */
             x265_log(m_cfg->param, X265_LOG_WARNING,
-                     "internal error - simulaneous row access detected. Please report HW to x265-devel at videolan.org\n");
+                     "internal error - simultaneous row access detected. Please report HW to x265-devel at videolan.org\n");
             return;
         }
         curRow.m_busy = true;
diff -r 807ee7f1597b -r e9776dfd1471 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Tue May 27 23:22:21 2014 +0530
+++ b/source/encoder/frameencoder.h	Wed May 28 20:39:40 2014 -0500
@@ -63,7 +63,7 @@ public:
 
     void destroy();
 
-    void processRowEncoder(int row);
+    void processRowEncoder(int row, const int threadId);
 
     void processRowFilter(int row)
     {
@@ -90,7 +90,7 @@ public:
         WaveFront::enableRow(row * 2 + 1);
     }
 
-    void processRow(int row)
+    void processRow(int row, int threadId)
     {
         const int realRow = row >> 1;
         const int typeNum = row & 1;
@@ -98,7 +98,7 @@ public:
         // TODO: use switch when more type
         if (typeNum == 0)
         {
-            processRowEncoder(realRow);
+            processRowEncoder(realRow, threadId);
         }
         else
         {
diff -r 807ee7f1597b -r e9776dfd1471 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Tue May 27 23:22:21 2014 +0530
+++ b/source/encoder/slicetype.cpp	Wed May 28 20:39:40 2014 -0500
@@ -189,7 +189,7 @@ TComPic* Lookahead::getDecidedPicture()
 }
 
 /* Called by pool worker threads */
-bool Lookahead::findJob()
+bool Lookahead::findJob(int)
 {
     if (bReady && ATOMIC_CAS32(&bReady, 1, 0) == 1)
     {
@@ -1289,7 +1289,7 @@ int64_t CostEstimate::estimateFrameCost(
             enqueueRow(0);
             while (!bFrameCompleted)
             {
-                WaveFront::findJob();
+                WaveFront::findJob(-1);
             }
 
             WaveFront::dequeue();
@@ -1298,7 +1298,7 @@ int64_t CostEstimate::estimateFrameCost(
         {
             for (int row = 0; row < heightInCU; row++)
             {
-                processRow(row);
+                processRow(row, -1);
             }
 
             x265_emms();
@@ -1455,7 +1455,7 @@ void CostEstimate::weightsAnalyse(Lowres
     }
 }
 
-void CostEstimate::processRow(int row)
+void CostEstimate::processRow(int row, int /*threadId*/)
 {
     int realrow = heightInCU - 1 - row;
     Lowres **frames = curframes;
diff -r 807ee7f1597b -r e9776dfd1471 source/encoder/slicetype.h
--- a/source/encoder/slicetype.h	Tue May 27 23:22:21 2014 +0530
+++ b/source/encoder/slicetype.h	Wed May 28 20:39:40 2014 -0500
@@ -110,7 +110,7 @@ struct CostEstimate : public WaveFront
     volatile bool    bFrameCompleted;
     int              curb, curp0, curp1;
 
-    void     processRow(int row);
+    void     processRow(int row, int threadId);
     int64_t  estimateFrameCost(Lowres **frames, int p0, int p1, int b, bool bIntraPenalty);
 
 protected:
@@ -155,7 +155,7 @@ protected:
     volatile bool bFilling;
     volatile bool bFlushed;
 
-    bool findJob();
+    bool findJob(int);
 
     /* called by addPicture() or flush() to trigger slice decisions */
     void slicetypeDecide();
diff -r 807ee7f1597b -r e9776dfd1471 source/test/testpool.cpp
--- a/source/test/testpool.cpp	Tue May 27 23:22:21 2014 +0530
+++ b/source/test/testpool.cpp	Wed May 28 20:39:40 2014 -0500
@@ -87,7 +87,7 @@ public:
 
     void encode();
 
-    void processRow(int row);
+    void processRow(int row, int threadid);
 };
 
 void MD5Frame::initialize(int cols, int rows)
@@ -130,7 +130,7 @@ void MD5Frame::encode()
         std::cout << "Bad hash: " << ss.str() << std::endl;
 }
 
-void MD5Frame::processRow(int rownum)
+void MD5Frame::processRow(int rownum, int)
 {
     // Called by worker thread
     RowData &curRow = this->row[rownum];


More information about the x265-commits mailing list