[x265-commits] [x265] analysis save/load: refactor full implementation

Deepthi Nandakumar deepthi at multicorewareinc.com
Fri Nov 14 23:14:54 CET 2014


details:   http://hg.videolan.org/x265/rev/58c2e06c2e4a
branches:  
changeset: 8837:58c2e06c2e4a
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Fri Nov 14 17:23:14 2014 +0530
description:
analysis save/load: refactor full implementation

1. Move analysis inter/intra data into encoder
2. Encoder allocates and frees memory for x265 analysis, remove api calls
3. Inter and intra data allocated based on sliceType only
4. frame record size is now variable
Subject: [x265] analysis: encodeResidue() directly write to reconPic

details:   http://hg.videolan.org/x265/rev/c3096034934f
branches:  
changeset: 8838:c3096034934f
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Fri Nov 14 17:35:07 2014 +0900
description:
analysis: encodeResidue() directly write to reconPic
Subject: [x265] common: move analysis reuse structs to common.h

details:   http://hg.videolan.org/x265/rev/72f1222903a3
branches:  
changeset: 8839:72f1222903a3
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 14 12:16:57 2014 -0600
description:
common: move analysis reuse structs to common.h

files in common/ shouldn't include encoder.h
Subject: [x265] encoder: add prefix to FREAD and FWRITE macros to avoid MacOSX macro conflict

details:   http://hg.videolan.org/x265/rev/b617dca5ce12
branches:  
changeset: 8840:b617dca5ce12
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 14 12:20:18 2014 -0600
description:
encoder: add prefix to FREAD and FWRITE macros to avoid MacOSX macro conflict

/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk/usr/include/sys/fcntl.h:111:9: note:
      previous definition is here
#define FWRITE          0x0002
Subject: [x265] cli: fix analysis filename argument

details:   http://hg.videolan.org/x265/rev/8191e0d02455
branches:  
changeset: 8841:8191e0d02455
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 14 16:14:39 2014 -0600
description:
cli: fix analysis filename argument

This showed up as a GCC warning about an unused variable, but having the arg
handled here prevented the org from being passed to x265_param_parse()

diffstat:

 source/CMakeLists.txt       |    2 +-
 source/common/common.h      |   18 +++
 source/common/frame.h       |   33 +++---
 source/common/param.cpp     |    2 +
 source/encoder/analysis.cpp |  136 +++++++++++++--------------
 source/encoder/analysis.h   |    6 +-
 source/encoder/api.cpp      |   25 -----
 source/encoder/encoder.cpp  |  215 +++++++++++++++++++++++++++++++++++++++++--
 source/encoder/encoder.h    |    9 +
 source/x265.cpp             |  126 +------------------------
 source/x265.def.in          |    2 -
 source/x265.h               |   43 +-------
 12 files changed, 332 insertions(+), 285 deletions(-)

diffs (truncated from 1052 to 300 lines):

diff -r 64314f8061f1 -r 8191e0d02455 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Thu Nov 13 18:40:35 2014 +0900
+++ b/source/CMakeLists.txt	Fri Nov 14 16:14:39 2014 -0600
@@ -21,7 +21,7 @@ include(CheckSymbolExists)
 include(CheckCXXCompilerFlag)
 
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 36)
+set(X265_BUILD 37)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 64314f8061f1 -r 8191e0d02455 source/common/common.h
--- a/source/common/common.h	Thu Nov 13 18:40:35 2014 +0900
+++ b/source/common/common.h	Fri Nov 14 16:14:39 2014 -0600
@@ -294,6 +294,7 @@ typedef int16_t  coeff_t;      // transf
 
 #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
 #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
+#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
 
 namespace x265 {
 
@@ -345,6 +346,23 @@ struct SAOParam
     }
 };
 
+/* Stores inter (motion estimation) analysis data for a single frame */
+struct analysis_inter_data
+{
+    int      ref;
+    int      costZero;
+    int16_t  mvx;
+    int16_t  mvy;
+};
+
+/* Stores intra analysis data for a single frame. This struct needs better packing */
+struct analysis_intra_data
+{
+    uint8_t*  depth;
+    uint8_t*  modes;
+    char*     partSizes;
+};
+
 enum TextType
 {
     TEXT_LUMA     = 0,  // luma
diff -r 64314f8061f1 -r 8191e0d02455 source/common/frame.h
--- a/source/common/frame.h	Thu Nov 13 18:40:35 2014 +0900
+++ b/source/common/frame.h	Fri Nov 14 16:14:39 2014 -0600
@@ -43,30 +43,29 @@ public:
 
     /* These two items will be NULL until the Frame begins to be encoded, at which point
      * it will be assigned a FrameData instance, which comes with a reconstructed image PicYuv */
-    FrameData*        m_encData;
-    PicYuv*           m_reconPic;
+    FrameData*             m_encData;
+    PicYuv*                m_reconPic;
 
     /* Data associated with x265_picture */
-    PicYuv*           m_fencPic;
-    int               m_poc;
-    int64_t           m_pts;                // user provided presentation time stamp
-    int64_t           m_reorderedPts;
-    int64_t           m_dts;
-    int32_t           m_forceqp;            // Force to use the qp specified in qp file
-    x265_intra_data*  m_intraData;
-    x265_inter_data*  m_interData;
-    void*             m_userData;           // user provided pointer passed in with this picture
+    PicYuv*                m_fencPic;
+    int                    m_poc;
+    int64_t                m_pts;                // user provided presentation time stamp
+    int64_t                m_reorderedPts;
+    int64_t                m_dts;
+    int32_t                m_forceqp;            // Force to use the qp specified in qp file
+    void*                  m_userData;           // user provided pointer passed in with this picture
 
-    Lowres            m_lowres;
-    bool              m_bChromaExtended;    // orig chroma planes motion extended for weight analysis
+    Lowres                 m_lowres;
+    bool                   m_bChromaExtended;    // orig chroma planes motion extended for weight analysis
 
     /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
-    ThreadSafeInteger m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
-    volatile uint32_t m_countRefEncoders;   // count of FrameEncoder threads monitoring m_reconRowCount
+    ThreadSafeInteger      m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
+    volatile uint32_t      m_countRefEncoders;   // count of FrameEncoder threads monitoring m_reconRowCount
 
-    Frame*            m_next;               // PicList doubly linked list pointers
-    Frame*            m_prev;
+    Frame*                 m_next;               // PicList doubly linked list pointers
+    Frame*                 m_prev;
 
+    x265_analysis_data     m_analysisData;
     Frame();
 
     bool create(x265_param *param);
diff -r 64314f8061f1 -r 8191e0d02455 source/common/param.cpp
--- a/source/common/param.cpp	Thu Nov 13 18:40:35 2014 +0900
+++ b/source/common/param.cpp	Fri Nov 14 16:14:39 2014 -0600
@@ -177,6 +177,7 @@ void x265_param_default(x265_param *para
     param->psyRd = 0.0;
     param->psyRdoq = 0.0;
     param->analysisMode = 0;
+    param->analysisFileName = NULL;
     param->bIntraInBFrames = 0;
     param->bLossless = 0;
     param->bCULossless = 0;
@@ -696,6 +697,7 @@ int x265_param_parse(x265_param *p, cons
     OPT("cutree")    p->rc.cuTree = atobool(value);
     OPT("slow-firstpass") p->rc.bEnableSlowFirstPass = atobool(value);
     OPT("analysis-mode") p->analysisMode = parseName(value, x265_analysis_names, bError);
+    OPT("analysis-file") p->analysisFileName = strdup(value);
     OPT("sar")
     {
         p->vui.aspectRatioIdc = parseName(value, x265_sar_names, bError);
diff -r 64314f8061f1 -r 8191e0d02455 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Thu Nov 13 18:40:35 2014 +0900
+++ b/source/encoder/analysis.cpp	Fri Nov 14 16:14:39 2014 -0600
@@ -127,35 +127,38 @@ Mode& Analysis::compressCTU(CUData& ctu,
     m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
 
     uint32_t numPartition = ctu.m_numPartitions;
+    if (m_param->analysisMode)
+    {
+        m_intraDataCTU = (analysis_intra_data *)m_frame->m_analysisData.intraData;
+        int numPredDir = m_slice->isInterP() ? 1 : 2;
+        m_interDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData + ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir;
+    }
+
     if (m_slice->m_sliceType == I_SLICE)
     {
         uint32_t zOrder = 0;
         if (m_param->analysisMode == X265_ANALYSIS_LOAD)
-            compressIntraCU(ctu, cuGeom, m_frame->m_intraData, zOrder);
+            compressIntraCU(ctu, cuGeom, m_intraDataCTU, zOrder);
         else
         {
             compressIntraCU(ctu, cuGeom, NULL, zOrder);
-
-            if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_intraData)
+            if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
             {
-                const CUData* bestCU = &m_modeDepth[0].bestMode->cu;
-                memcpy(&m_frame->m_intraData->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
-                memcpy(&m_frame->m_intraData->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
-                memcpy(&m_frame->m_intraData->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
+                CUData *bestCU = &m_modeDepth[0].bestMode->cu;
+                memcpy(&m_intraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+                memcpy(&m_intraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
+                memcpy(&m_intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
             }
         }
     }
     else
     {
-        if (m_param->analysisMode)
-            m_interAnalysisData = m_frame->m_interData + (ctu.m_cuAddr * (CUGeom::MAX_GEOMS * NUM_SIZES));
-
         if (!m_param->rdLevel)
         {
             /* In RD Level 0/1, copy source pixels into the reconstructed block so
-             * they are available for intra predictions */
+            * they are available for intra predictions */
             m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
-            
+
             compressInterCU_rd0_4(ctu, cuGeom);
 
             /* generate residual for entire CTU at once and copy to reconPic */
@@ -196,7 +199,7 @@ void Analysis::tryLossless(const CUGeom&
     }
 }
 
-void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, x265_intra_data* shared, uint32_t& zOrder)
+void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, analysis_intra_data* intraData, uint32_t& zOrder)
 {
     uint32_t depth = cuGeom.depth;
     ModeDepth& md = m_modeDepth[depth];
@@ -205,11 +208,11 @@ void Analysis::compressIntraCU(const CUD
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 
-    if (shared)
+    if (intraData)
     {
-        uint8_t* sharedDepth = &shared->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-        char* sharedPartSizes = &shared->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-        uint8_t* sharedModes = &shared->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        uint8_t* sharedDepth  = &intraData->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        uint8_t* sharedModes  = &intraData->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        char* sharedPartSizes = &intraData->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
 
         if (mightNotSplit && depth == sharedDepth[zOrder] && zOrder == cuGeom.encodeIdx)
         {
@@ -273,7 +276,7 @@ void Analysis::compressIntraCU(const CUD
             {
                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
                 m_rqt[nextDepth].cur.load(*nextContext);
-                compressIntraCU(parentCTU, childGeom, shared, zOrder);
+                compressIntraCU(parentCTU, childGeom, intraData, zOrder);
 
                 // Save best CU and pred data for this sub CU
                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1447,19 +1450,19 @@ void Analysis::checkInter_rd0_4(Mode& in
     interMode.cu.setPredModeSubParts(MODE_INTER);
     int numPredDir = m_slice->isInterP() ? 1 : 2;
 
-    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interAnalysisData)
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interDataCTU)
     {
         for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
         {
             MotionData* bestME = interMode.bestME[part];
             for (int32_t i = 0; i < numPredDir; i++)
             {
-                bestME[i].costZero = !!m_interAnalysisData->costZero[i];
-                bestME[i].mv.x = m_interAnalysisData->mvx[i];
-                bestME[i].mv.y = m_interAnalysisData->mvy[i];
-                bestME[i].ref = m_interAnalysisData->ref[i];
+                bestME[i].mv.x = m_interDataCTU->mvx;
+                bestME[i].mv.y = m_interDataCTU->mvy;
+                bestME[i].ref = m_interDataCTU->ref;
+                bestME[i].costZero = !!m_interDataCTU->costZero;
+                m_interDataCTU++;
             }
-            m_interAnalysisData++;
         }
     }
     if (predInterSearch(interMode, cuGeom, false, false))
@@ -1470,21 +1473,19 @@ void Analysis::checkInter_rd0_4(Mode& in
         interMode.distortion = primitives.sa8d[cuGeom.log2CUSize - 2](fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
         interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
 
-        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interAnalysisData)
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interDataCTU)
         {
             for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
             {
                 MotionData* bestME = interMode.bestME[part];
                 for (int32_t i = 0; i < numPredDir; i++)
                 {
-                    m_interAnalysisData->costZero[i] = bestME[i].costZero;
-                    m_interAnalysisData->mvx[i] = bestME[i].mv.x;
-                    m_interAnalysisData->mvy[i] = bestME[i].mv.y;
-                    m_interAnalysisData->ref[i] = bestME[i].ref;
+                    m_interDataCTU->mvx = bestME[i].mv.x;
+                    m_interDataCTU->mvy = bestME[i].mv.y;
+                    m_interDataCTU->ref = bestME[i].ref;
+                    m_interDataCTU->costZero = bestME[i].costZero;
+                    m_interDataCTU++;
                 }
-                m_interAnalysisData->zOrder = cuGeom.encodeIdx;
-                m_interAnalysisData->depth  = cuGeom.depth;
-                m_interAnalysisData++;
             }
         }
     }
@@ -1502,40 +1503,38 @@ void Analysis::checkInter_rd5_6(Mode& in
     interMode.cu.setPredModeSubParts(MODE_INTER);
     int numPredDir = m_slice->isInterP() ? 1 : 2;
 
-    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interAnalysisData)
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interDataCTU)
     {
         for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
         {
             MotionData* bestME = interMode.bestME[part];
             for (int32_t i = 0; i < numPredDir; i++)
             {
-                bestME[i].costZero = !!m_interAnalysisData->costZero[i];
-                bestME[i].mv.x = m_interAnalysisData->mvx[i];
-                bestME[i].mv.y = m_interAnalysisData->mvy[i];
-                bestME[i].ref = m_interAnalysisData->ref[i];
+                bestME[i].mv.x = m_interDataCTU->mvx;
+                bestME[i].mv.y = m_interDataCTU->mvy;
+                bestME[i].ref = m_interDataCTU->ref;
+                bestME[i].costZero = !!m_interDataCTU->costZero;
+                m_interDataCTU++;
             }
-            m_interAnalysisData++;
         }
     }
     if (predInterSearch(interMode, cuGeom, bMergeOnly, true))
     {
         /* predInterSearch sets interMode.sa8dBits, but this is ignored */
         encodeResAndCalcRdInterCU(interMode, cuGeom);
-        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interAnalysisData)
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interDataCTU)
         {
             for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
             {
                 MotionData* bestME = interMode.bestME[part];
                 for (int32_t i = 0; i < numPredDir; i++)
                 {
-                    m_interAnalysisData->costZero[i] = bestME[i].costZero;
-                    m_interAnalysisData->mvx[i] = bestME[i].mv.x;
-                    m_interAnalysisData->mvy[i] = bestME[i].mv.y;
-                    m_interAnalysisData->ref[i] = bestME[i].ref;
+                    m_interDataCTU->mvx = bestME[i].mv.x;


More information about the x265-commits mailing list