[x265-commits] [x265] analysis: fix binary mismatch for share intra save and lo...

Tue Nov 18 05:41:00 CET 2014

details:   http://hg.videolan.org/x265/rev/10b8d3fbe408
branches:  
changeset: 8842:10b8d3fbe408
user:      Gopu Govindaswamy <gopu at multicorewareinc.com>
date:      Wed Nov 12 17:06:23 2014 +0530
description:
analysis: fix binary mismatch for share intra save and load mode with same cli
Subject: [x265] analysis: cleanups, init pointers, variable names are made self-explanatory

details:   http://hg.videolan.org/x265/rev/ed2ba7a90567
branches:  
changeset: 8843:ed2ba7a90567
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Sun Nov 16 22:49:06 2014 +0530
description:
analysis: cleanups, init pointers, variable names are made self-explanatory
Subject: [x265] vbv: tune vbv predictors for better mapping of predicted bits to encoded bits

details:   http://hg.videolan.org/x265/rev/27d36c4b4a27
branches:  
changeset: 8844:27d36c4b4a27
user:      Aarthi Thirumalai
date:      Mon Nov 17 01:30:26 2014 +0530
description:
vbv: tune vbv predictors for better mapping of predicted bits to encoded bits
Subject: [x265] modify MV default constructor to do nothing

details:   http://hg.videolan.org/x265/rev/7a1ec67bd004
branches:  
changeset: 8845:7a1ec67bd004
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Mon Nov 17 19:44:35 2014 +0900
description:
modify MV default constructor to do nothing
Subject: [x265] encoder: force slicetype using analysis file

details:   http://hg.videolan.org/x265/rev/05d824463602
branches:  
changeset: 8846:05d824463602
user:      Gopu Govindaswamy <gopu at multicorewareinc.com>
date:      Mon Nov 17 16:38:52 2014 +0530
description:
encoder: force slicetype using analysis file

diffstat:

 source/common/cudata.cpp       |   6 +-
 source/common/lowres.h         |  16 +++---
 source/common/mv.h             |  22 +++++----
 source/encoder/analysis.cpp    |  91 ++++++++++++++++++++---------------------
 source/encoder/analysis.h      |   4 +-
 source/encoder/bitcost.h       |   2 +-
 source/encoder/encoder.cpp     |   7 ++-
 source/encoder/motion.cpp      |  81 +++++++++++++++++++-----------------
 source/encoder/motion.h        |   1 -
 source/encoder/ratecontrol.cpp |  51 ++++-------------------
 source/encoder/slicetype.cpp   |   9 ++-
 11 files changed, 133 insertions(+), 157 deletions(-)

diffs (truncated from 719 to 300 lines):

diff -r 8191e0d02455 -r 05d824463602 source/common/cudata.cpp

--- a/source/common/cudata.cpp	Fri Nov 14 16:14:39 2014 -0600
+++ b/source/common/cudata.cpp	Mon Nov 17 16:38:52 2014 +0530
@@ -1237,7 +1237,7 @@ void CUData::getMvField(const CUData* cu
     else
     {
         // OUT OF BOUNDARY
-        outMvField.mv.word = 0;
+        outMvField.mv = 0;
         outMvField.refIdx = REF_NOT_VALID;
     }
 }
@@ -1399,6 +1399,8 @@ uint32_t CUData::getInterMergeCandidates
 
     for (uint32_t i = 0; i < maxNumMergeCand; ++i)
     {
+        mvFieldNeighbours[i][0].mv = 0;
+        mvFieldNeighbours[i][1].mv = 0;
         mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
         mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
     }
@@ -1646,7 +1648,7 @@ uint32_t CUData::getInterMergeCandidates
     while (count < maxNumMergeCand)
     {
         interDirNeighbours[count] = 1;
-        mvFieldNeighbours[count][0].mv.word = 0;
+        mvFieldNeighbours[count][0].mv = 0;
         mvFieldNeighbours[count][0].refIdx = r;
 
         if (isInterB)
diff -r 8191e0d02455 -r 05d824463602 source/common/lowres.h
--- a/source/common/lowres.h	Fri Nov 14 16:14:39 2014 -0600
+++ b/source/common/lowres.h	Mon Nov 17 16:38:52 2014 +0530
@@ -56,11 +56,10 @@ struct ReferencePlanes
         {
             int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
             pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
-
-            MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
-            int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
-
-            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
+            int qmvx = qmv.x + (qmv.x & 1);
+            int qmvy = qmv.y + (qmv.y & 1);
+            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
             primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
             return buf;
         }
@@ -79,9 +78,10 @@ struct ReferencePlanes
             ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
             int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
             pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
-            MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
-            int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
-            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
+            int qmvx = qmv.x + (qmv.x & 1);
+            int qmvy = qmv.y + (qmv.y & 1);
+            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
             primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
             return comp(fenc, FENC_STRIDE, subpelbuf, 8);
         }
diff -r 8191e0d02455 -r 05d824463602 source/common/mv.h
--- a/source/common/mv.h	Fri Nov 14 16:14:39 2014 -0600
+++ b/source/common/mv.h	Mon Nov 17 16:38:52 2014 +0530
@@ -44,19 +44,19 @@ public:
         int32_t word;
     };
 
-    MV() : word(0)                             {}
-
+    MV()                                       {}
+    MV(int32_t w) : word(w)                    {}
     MV(int16_t _x, int16_t _y) : x(_x), y(_y)  {}
 
-    const MV& operator =(uint32_t w)           { word = w; return *this; }
+    MV& operator =(uint32_t w)                 { word = w; return *this; }
 
-    const MV& operator +=(const MV& other)     { x += other.x; y += other.y; return *this; }
+    MV& operator +=(const MV& other)           { x += other.x; y += other.y; return *this; }
 
-    const MV& operator -=(const MV& other)     { x -= other.x; y -= other.y; return *this; }
+    MV& operator -=(const MV& other)           { x -= other.x; y -= other.y; return *this; }
 
-    const MV& operator >>=(int i)              { x >>= i; y >>= i; return *this; }
+    MV& operator >>=(int i)                    { x >>= i; y >>= i; return *this; }
 
-    const MV& operator <<=(int i)              { x <<= i; y <<= i; return *this; }
+    MV& operator <<=(int i)                    { x <<= i; y <<= i; return *this; }
 
     MV operator >>(int i) const                { return MV(x >> i, y >> i); }
 
@@ -64,16 +64,18 @@ public:
 
     MV operator *(int16_t i) const             { return MV(x * i, y * i); }
 
-    const MV operator -(const MV& other) const { return MV(x - other.x, y - other.y); }
+    MV operator -(const MV& other) const       { return MV(x - other.x, y - other.y); }
 
-    const MV operator +(const MV& other) const { return MV(x + other.x, y + other.y); }
+    MV operator +(const MV& other) const       { return MV(x + other.x, y + other.y); }
 
     bool operator ==(const MV& other) const    { return word == other.word; }
 
     bool operator !=(const MV& other) const    { return word != other.word; }
 
+    bool operator !() const                    { return !word; }
+
     // Scale down a QPEL mv to FPEL mv, rounding up by one HPEL offset
-    MV roundToFPel() const                     { return MV(x + 2, y + 2) >> 2; }
+    MV roundToFPel() const                     { return MV((x + 2) >> 2, (y + 2) >> 2); }
 
     // Scale up an FPEL mv to QPEL by shifting up two bits
     MV toQPel() const                          { return *this << 2; }
diff -r 8191e0d02455 -r 05d824463602 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Nov 14 16:14:39 2014 -0600
+++ b/source/encoder/analysis.cpp	Mon Nov 17 16:38:52 2014 +0530
@@ -71,6 +71,8 @@ using namespace x265;
 Analysis::Analysis()
 {
     m_totalNumJobs = m_numAcquiredJobs = m_numCompletedJobs = 0;
+    m_reuseIntraDataCTU = NULL;
+    m_reuseInterDataCTU = NULL;
 }
 
 bool Analysis::create(ThreadLocalData *tld)
@@ -129,26 +131,21 @@ Mode& Analysis::compressCTU(CUData& ctu,
     uint32_t numPartition = ctu.m_numPartitions;
     if (m_param->analysisMode)
     {
-        m_intraDataCTU = (analysis_intra_data *)m_frame->m_analysisData.intraData;
+        m_reuseIntraDataCTU = (analysis_intra_data *)m_frame->m_analysisData.intraData;
         int numPredDir = m_slice->isInterP() ? 1 : 2;
-        m_interDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData + ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir;
+        m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData + ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir;
     }
 
     if (m_slice->m_sliceType == I_SLICE)
     {
         uint32_t zOrder = 0;
-        if (m_param->analysisMode == X265_ANALYSIS_LOAD)
-            compressIntraCU(ctu, cuGeom, m_intraDataCTU, zOrder);
-        else
+        compressIntraCU(ctu, cuGeom, m_reuseIntraDataCTU, zOrder);
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
         {
-            compressIntraCU(ctu, cuGeom, NULL, zOrder);
-            if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
-            {
-                CUData *bestCU = &m_modeDepth[0].bestMode->cu;
-                memcpy(&m_intraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
-                memcpy(&m_intraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
-                memcpy(&m_intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
-            }
+            CUData *bestCU = &m_modeDepth[0].bestMode->cu;
+            memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+            memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
+            memcpy(&m_reuseIntraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
         }
     }
     else
@@ -199,7 +196,7 @@ void Analysis::tryLossless(const CUGeom&
     }
 }
 
-void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, analysis_intra_data* intraData, uint32_t& zOrder)
+void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, analysis_intra_data* reuseIntraData, uint32_t& zOrder)
 {
     uint32_t depth = cuGeom.depth;
     ModeDepth& md = m_modeDepth[depth];
@@ -208,20 +205,20 @@ void Analysis::compressIntraCU(const CUD
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 
-    if (intraData)
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
     {
-        uint8_t* sharedDepth  = &intraData->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-        uint8_t* sharedModes  = &intraData->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-        char* sharedPartSizes = &intraData->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        uint8_t* reuseDepth  = &reuseIntraData->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        uint8_t* reuseModes  = &reuseIntraData->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        char* reusePartSizes = &reuseIntraData->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
 
-        if (mightNotSplit && depth == sharedDepth[zOrder] && zOrder == cuGeom.encodeIdx)
+        if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.encodeIdx)
         {
             m_quant.setQPforQuant(parentCTU);
 
-            PartSize size = (PartSize)sharedPartSizes[zOrder];
+            PartSize size = (PartSize)reusePartSizes[zOrder];
             Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
             mode.cu.initSubCU(parentCTU, cuGeom);
-            checkIntra(mode, cuGeom, size, sharedModes);
+            checkIntra(mode, cuGeom, size, &reuseModes[zOrder]);
             checkBestMode(mode, depth);
 
             if (m_bTryLossless)
@@ -231,7 +228,7 @@ void Analysis::compressIntraCU(const CUD
                 addSplitFlagCost(*md.bestMode, cuGeom.depth);
 
             // increment zOrder offset to point to next best depth in sharedDepth buffer
-            zOrder += g_depthInc[g_maxCUDepth - 1][sharedDepth[zOrder]];
+            zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
             mightSplit = false;
         }
     }
@@ -276,7 +273,7 @@ void Analysis::compressIntraCU(const CUD
             {
                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
                 m_rqt[nextDepth].cur.load(*nextContext);
-                compressIntraCU(parentCTU, childGeom, intraData, zOrder);
+                compressIntraCU(parentCTU, childGeom, reuseIntraData, zOrder);
 
                 // Save best CU and pred data for this sub CU
                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1450,18 +1447,18 @@ void Analysis::checkInter_rd0_4(Mode& in
     interMode.cu.setPredModeSubParts(MODE_INTER);
     int numPredDir = m_slice->isInterP() ? 1 : 2;
 
-    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interDataCTU)
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU)
     {
         for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
         {
             MotionData* bestME = interMode.bestME[part];
             for (int32_t i = 0; i < numPredDir; i++)
             {
-                bestME[i].mv.x = m_interDataCTU->mvx;
-                bestME[i].mv.y = m_interDataCTU->mvy;
-                bestME[i].ref = m_interDataCTU->ref;
-                bestME[i].costZero = !!m_interDataCTU->costZero;
-                m_interDataCTU++;
+                bestME[i].mv.x = m_reuseInterDataCTU->mvx;
+                bestME[i].mv.y = m_reuseInterDataCTU->mvy;
+                bestME[i].ref = m_reuseInterDataCTU->ref;
+                bestME[i].costZero = !!m_reuseInterDataCTU->costZero;
+                m_reuseInterDataCTU++;
             }
         }
     }
@@ -1473,18 +1470,18 @@ void Analysis::checkInter_rd0_4(Mode& in
         interMode.distortion = primitives.sa8d[cuGeom.log2CUSize - 2](fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
         interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
 
-        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interDataCTU)
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU)
         {
             for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
             {
                 MotionData* bestME = interMode.bestME[part];
                 for (int32_t i = 0; i < numPredDir; i++)
                 {
-                    m_interDataCTU->mvx = bestME[i].mv.x;
-                    m_interDataCTU->mvy = bestME[i].mv.y;
-                    m_interDataCTU->ref = bestME[i].ref;
-                    m_interDataCTU->costZero = bestME[i].costZero;
-                    m_interDataCTU++;
+                    m_reuseInterDataCTU->mvx = bestME[i].mv.x;
+                    m_reuseInterDataCTU->mvy = bestME[i].mv.y;
+                    m_reuseInterDataCTU->ref = bestME[i].ref;
+                    m_reuseInterDataCTU->costZero = bestME[i].costZero;
+                    m_reuseInterDataCTU++;
                 }
             }
         }
@@ -1503,18 +1500,18 @@ void Analysis::checkInter_rd5_6(Mode& in
     interMode.cu.setPredModeSubParts(MODE_INTER);
     int numPredDir = m_slice->isInterP() ? 1 : 2;
 
-    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interDataCTU)
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU)
     {
         for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
         {
             MotionData* bestME = interMode.bestME[part];
             for (int32_t i = 0; i < numPredDir; i++)
             {
-                bestME[i].mv.x = m_interDataCTU->mvx;
-                bestME[i].mv.y = m_interDataCTU->mvy;
-                bestME[i].ref = m_interDataCTU->ref;
-                bestME[i].costZero = !!m_interDataCTU->costZero;
-                m_interDataCTU++;
+                bestME[i].mv.x = m_reuseInterDataCTU->mvx;
+                bestME[i].mv.y = m_reuseInterDataCTU->mvy;
+                bestME[i].ref = m_reuseInterDataCTU->ref;
+                bestME[i].costZero = !!m_reuseInterDataCTU->costZero;
+                m_reuseInterDataCTU++;
             }
         }
     }
@@ -1522,18 +1519,18 @@ void Analysis::checkInter_rd5_6(Mode& in
     {
         /* predInterSearch sets interMode.sa8dBits, but this is ignored */
         encodeResAndCalcRdInterCU(interMode, cuGeom);
-        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interDataCTU)
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU)
         {
             for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)