[x265-commits] [x265] entropy: pass context model (state) to bitsCodeBin as uin...

Sun Nov 9 07:29:29 CET 2014

details:   http://hg.videolan.org/x265/rev/a67b848d6c04
branches:  
changeset: 8807:a67b848d6c04
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 13:03:07 2014 -0600
description:
entropy: pass context model (state) to bitsCodeBin as uint32_t

Should be slightly more efficient
Subject: [x265] nr: fix denoise offset memcopy size

details:   http://hg.videolan.org/x265/rev/0912563c4ac1
branches:  
changeset: 8808:0912563c4ac1
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 16:43:19 2014 -0600
description:
nr: fix denoise offset memcopy size
Subject: [x265] motion: remove trivial set methods; make some members public

details:   http://hg.videolan.org/x265/rev/53c146f7eb9f
branches:  
changeset: 8809:53c146f7eb9f
user:      Steve Borho <steve at borho.org>
date:      Sat Nov 08 12:00:38 2014 -0600
description:
motion: remove trivial set methods; make some members public
Subject: [x265] search: keep AMVP candidates in mode structure

details:   http://hg.videolan.org/x265/rev/6124c837b3ab
branches:  
changeset: 8810:6124c837b3ab
user:      Steve Borho <steve at borho.org>
date:      Sat Nov 08 12:03:03 2014 -0600
description:
search: keep AMVP candidates in mode structure

This fixes some work replication in --pme and will also make handling BIDIR
as a seperate prediction easier.
Subject: [x265] analysis: delay initialization of prediction cu until just before use

details:   http://hg.videolan.org/x265/rev/3f2d68368554
branches:  
changeset: 8811:3f2d68368554
user:      Steve Borho <steve at borho.org>
date:      Sat Nov 08 12:30:10 2014 -0600
description:
analysis: delay initialization of prediction cu until just before use

This avoids initializing CUs that may never be used because of various
early-outs
Subject: [x265] fix typo

details:   http://hg.videolan.org/x265/rev/3dc9857c59d3
branches:  
changeset: 8812:3dc9857c59d3
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Sun Nov 09 10:55:45 2014 +0900
description:
fix typo
Subject: [x265] analysis: delay initialization of prediction cu in RD 5 and 6

details:   http://hg.videolan.org/x265/rev/b9147e641ce6
branches:  
changeset: 8813:b9147e641ce6
user:      Steve Borho <steve at borho.org>
date:      Sat Nov 08 13:14:48 2014 -0600
description:
analysis: delay initialization of prediction cu in RD 5 and 6
Subject: [x265] reference: add methods for querying CU/PU pointers

details:   http://hg.videolan.org/x265/rev/9687a9d1205a
branches:  
changeset: 8814:9687a9d1205a
user:      Steve Borho <steve at borho.org>
date:      Sat Nov 08 13:53:32 2014 -0600
description:
reference: add methods for querying CU/PU pointers

diffstat:

 source/encoder/analysis.cpp     |  41 ++++++++++++++++++----------------
 source/encoder/entropy.h        |   2 +-
 source/encoder/frameencoder.cpp |   2 +-
 source/encoder/motion.h         |  25 ++++++++------------
 source/encoder/reference.cpp    |   6 ++--
 source/encoder/reference.h      |   5 +++-
 source/encoder/search.cpp       |  48 +++++++++++++++++-----------------------
 source/encoder/search.h         |   2 +
 source/encoder/slicetype.h      |   4 +-
 9 files changed, 65 insertions(+), 70 deletions(-)

diffs (truncated from 493 to 300 lines):

diff -r b55799a2f5ad -r 9687a9d1205a source/encoder/analysis.cpp

--- a/source/encoder/analysis.cpp	Fri Nov 07 12:45:29 2014 -0600
+++ b/source/encoder/analysis.cpp	Sat Nov 08 13:53:32 2014 -0600
@@ -777,24 +777,9 @@ void Analysis::compressInterCU_rd0_4(con
     {
         bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
 
-        /* Initialize all prediction CUs based on parentCTU */
-        md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+        /* Compute Merge Cost */
         md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
         md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
-        if (m_param->bEnableRectInter)
-        {
-            md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
-            md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
-        }
-        if (m_slice->m_sps->maxAMPDepth > depth && cuGeom.log2CUSize < 6)
-        {
-            md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
-            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
-            md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
-            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
-        }
-
-        /* Compute Merge Cost */
         checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
 
         bool earlyskip = false;
@@ -803,14 +788,18 @@ void Analysis::compressInterCU_rd0_4(con
 
         if (!earlyskip)
         {
+            md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
             checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
             Mode *bestInter = &md.pred[PRED_2Nx2N];
 
             if (m_param->bEnableRectInter)
             {
+                md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
                 checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
                 if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
                     bestInter = &md.pred[PRED_Nx2N];
+
+                md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
                 checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
                 if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
                     bestInter = &md.pred[PRED_2NxN];
@@ -832,18 +821,24 @@ void Analysis::compressInterCU_rd0_4(con
 
                 if (bHor)
                 {
+                    md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
                     checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU);
                     if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_2NxnU];
+
+                    md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
                     checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD);
                     if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_2NxnD];
                 }
                 if (bVer)
                 {
+                    md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
                     checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N);
                     if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_nLx2N];
+
+                    md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
                     checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N);
                     if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_nRx2N];
@@ -1046,14 +1041,14 @@ void Analysis::compressInterCU_rd5_6(con
 
     if (mightNotSplit)
     {
-        for (int i = 0; i < MAX_PRED_TYPES; i++)
-            md.pred[i].cu.initSubCU(parentCTU, cuGeom);
-
+        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
         checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
         bool earlySkip = m_param->bEnableEarlySkip && md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
 
         if (!earlySkip)
         {
+            md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
             checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
             checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
@@ -1062,11 +1057,13 @@ void Analysis::compressInterCU_rd5_6(con
                 // Nx2N rect
                 if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
                 {
+                    md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
                     checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
                     checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
                 }
                 if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
                 {
+                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
                     checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
                     checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
                 }
@@ -1092,11 +1089,13 @@ void Analysis::compressInterCU_rd5_6(con
                 {
                     if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
                     {
+                        md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
                         checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
                         checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
                     }
                     if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
                     {
+                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
                         checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
                         checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
                     }
@@ -1105,11 +1104,13 @@ void Analysis::compressInterCU_rd5_6(con
                 {
                     if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
                     {
+                        md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
                         checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
                         checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
                     }
                     if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
                     {
+                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
                         checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
                         checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
                     }
@@ -1119,11 +1120,13 @@ void Analysis::compressInterCU_rd5_6(con
             if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&
                 (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
             {
+                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
                 checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
                 checkBestMode(md.pred[PRED_INTRA], depth);
 
                 if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
                 {
+                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
                     checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL);
                     checkBestMode(md.pred[PRED_INTRA_NxN], depth);
                 }
diff -r b55799a2f5ad -r 9687a9d1205a source/encoder/entropy.h
--- a/source/encoder/entropy.h	Fri Nov 07 12:45:29 2014 -0600
+++ b/source/encoder/entropy.h	Sat Nov 08 13:53:32 2014 -0600
@@ -207,7 +207,7 @@ private:
     void encodeBinTrm(uint32_t binValue);
 
     /* return the bits of encoding the context bin without updating */
-    inline uint32_t bitsCodeBin(uint32_t binValue, uint8_t ctxModel) const
+    inline uint32_t bitsCodeBin(uint32_t binValue, uint32_t ctxModel) const
     {
         uint64_t fracBits = (m_fracBits & 32767) + sbacGetEntropyBits(ctxModel, binValue);
         return (uint32_t)(fracBits >> 15);
diff -r b55799a2f5ad -r 9687a9d1205a source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Fri Nov 07 12:45:29 2014 -0600
+++ b/source/encoder/frameencoder.cpp	Sat Nov 08 13:53:32 2014 -0600
@@ -481,7 +481,7 @@ void FrameEncoder::compressFrame()
         for (int i = 0; i < m_top->m_numThreadLocalData; i++)
         {
             NoiseReduction* nr = &m_top->m_threadLocalData[i].analysis.m_quant.m_frameNr[m_frameEncoderID];
-            memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
+            memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
             memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);
             memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
         }
diff -r b55799a2f5ad -r 9687a9d1205a source/encoder/motion.h
--- a/source/encoder/motion.h	Fri Nov 07 12:45:29 2014 -0600
+++ b/source/encoder/motion.h	Sat Nov 08 13:53:32 2014 -0600
@@ -37,7 +37,7 @@ class MotionEstimate : public BitCost
 protected:
 
     /* Aligned copy of original pixels, extra room for manual alignment */
-    pixel *fencplane;
+    pixel*   fencplane;
     intptr_t fencLumaStride;
 
     pixelcmp_t sad;
@@ -45,31 +45,26 @@ protected:
     pixelcmp_x3_t sad_x3;
     pixelcmp_x4_t sad_x4;
 
-    intptr_t blockOffset;
-    int partEnum;
-    int searchMethod;
-    int subpelRefine;
-
-    /* subpel generation buffers */
-    int blockwidth;
-    int blockheight;
-
     MotionEstimate& operator =(const MotionEstimate&);
 
 public:
 
+    intptr_t blockOffset;
+    int searchMethod;
+    int subpelRefine;
+
+    int blockwidth;
+    int blockheight;
+    int partEnum;
+
     static const int COST_MAX = 1 << 28;
 
-    pixel *fenc;
+    pixel*   fenc;
 
     MotionEstimate();
 
     ~MotionEstimate();
 
-    void setSearchMethod(int i) { searchMethod = i; }
-
-    void setSubpelRefine(int i) { subpelRefine = i; }
-
     /* Methods called at slice setup */
 
     void setSourcePlane(pixel *Y, intptr_t luma)
diff -r b55799a2f5ad -r 9687a9d1205a source/encoder/reference.cpp
--- a/source/encoder/reference.cpp	Fri Nov 07 12:45:29 2014 -0600
+++ b/source/encoder/reference.cpp	Sat Nov 08 13:53:32 2014 -0600
@@ -39,6 +39,7 @@ MotionReference::MotionReference()
 int MotionReference::init(PicYuv* recPic, WeightParam *w)
 {
     m_reconPic = recPic;
+    m_numWeightedRows = 0;
     lumaStride = recPic->m_stride;
     intptr_t startpad = recPic->m_lumaMarginY * lumaStride + recPic->m_lumaMarginX;
 
@@ -62,7 +63,6 @@ int MotionReference::init(PicYuv* recPic
         offset = w->inputOffset * (1 << (X265_DEPTH - 8));
         shift  = w->log2WeightDenom;
         round  = shift ? 1 << (shift - 1) : 0;
-        m_numWeightedRows = 0;
 
         /* use our buffer which will have weighted pixels written to it */
         fpelPlane = m_weightBuffer + startpad;
@@ -93,8 +93,7 @@ void MotionReference::applyWeight(int ro
     // Computing weighted CU rows
     int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
     int padwidth = (width + 15) & ~15;  // weightp assembly needs even 16 byte widths
-    primitives.weight_pp(src, dst, lumaStride, padwidth, height,
-                         weight, round << correction, shift + correction, offset);
+    primitives.weight_pp(src, dst, lumaStride, padwidth, height, weight, round << correction, shift + correction, offset);
 
     // Extending Left & Right
     primitives.extendRowBorder(dst, lumaStride, width, height, marginX);
@@ -114,5 +113,6 @@ void MotionReference::applyWeight(int ro
         for (int y = 0; y < marginY; y++)
             memcpy(pixY + (y + 1) * lumaStride, pixY, lumaStride * sizeof(pixel));
     }
+
     m_numWeightedRows = rows;
 }
diff -r b55799a2f5ad -r 9687a9d1205a source/encoder/reference.h
--- a/source/encoder/reference.h	Fri Nov 07 12:45:29 2014 -0600
+++ b/source/encoder/reference.h	Sat Nov 08 13:53:32 2014 -0600
@@ -25,13 +25,13 @@
 #define X265_REFERENCE_H
 
 #include "primitives.h"
+#include "picyuv.h"
 #include "lowres.h"
 #include "mv.h"
 
 namespace x265 {
 // private x265 namespace
 
-class PicYuv;
 struct WeightParam;
 
 class MotionReference : public ReferencePlanes
@@ -47,6 +47,9 @@ public:
     pixel*  m_weightBuffer;
     int     m_numWeightedRows;
 
+    pixel*  getLumaAddr(uint32_t ctuAddr)                      { return fpelPlane + m_reconPic->m_cuOffsetY[ctuAddr]; }
+    pixel*  getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) { return fpelPlane + m_reconPic->m_cuOffsetY[ctuAddr] + m_reconPic->m_buOffsetY[absPartIdx]; }
+
 protected:
 
     MotionReference& operator =(const MotionReference&);
diff -r b55799a2f5ad -r 9687a9d1205a source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Nov 07 12:45:29 2014 -0600
+++ b/source/encoder/search.cpp	Sat Nov 08 13:53:32 2014 -0600
@@ -68,8 +68,8 @@ bool Search::initSearch(const x265_param
     m_numLayers = g_log2Size[param.maxCUSize] - 2;
 
     m_rdCost.setPsyRdScale(param.psyRd);
-    m_me.setSearchMethod(param.searchMethod);
-    m_me.setSubpelRefine(param.subpelRefine);
+    m_me.searchMethod = param.searchMethod;
+    m_me.subpelRefine = param.subpelRefine;