[x265] [PATCH] cleanup: align NR buffer for asm, rearrange member variables to avoid padding

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Mon Sep 28 11:06:30 CEST 2015


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1443431081 -19800
#      Mon Sep 28 14:34:41 2015 +0530
# Node ID b5bd2f7a29fdc602b2a9f6c6eee68e3e85a7ea0b
# Parent  f4c267f28487161fa78c43cabb30dc4f4f82570c
cleanup: align NR buffer for asm, rearrange member variables to avoid padding

diff -r f4c267f28487 -r b5bd2f7a29fd source/common/slice.h
--- a/source/common/slice.h	Mon Sep 28 13:38:33 2015 +0530
+++ b/source/common/slice.h	Mon Sep 28 14:34:41 2015 +0530
@@ -105,6 +105,12 @@
 
 struct ProfileTierLevel
 {
+    int      profileIdc;
+    int      levelIdc;
+    uint32_t minCrForLevel;
+    uint32_t maxLumaSrForLevel;
+    uint32_t bitDepthConstraint;
+    int      chromaFormatConstraint;
     bool     tierFlag;
     bool     progressiveSourceFlag;
     bool     interlacedSourceFlag;
@@ -114,12 +120,6 @@
     bool     intraConstraintFlag;
     bool     onePictureOnlyConstraintFlag;
     bool     lowerBitRateConstraintFlag;
-    int      profileIdc;
-    int      levelIdc;
-    uint32_t minCrForLevel;
-    uint32_t maxLumaSrForLevel;
-    uint32_t bitDepthConstraint;
-    int      chromaFormatConstraint;
 };
 
 struct HRDInfo
@@ -152,21 +152,21 @@
 
 struct VPS
 {
+    HRDInfo          hrdParameters;
+    ProfileTierLevel ptl;
     uint32_t         maxTempSubLayers;
     uint32_t         numReorderPics;
     uint32_t         maxDecPicBuffering;
     uint32_t         maxLatencyIncrease;
-    HRDInfo          hrdParameters;
-    ProfileTierLevel ptl;
 };
 
 struct Window
 {
-    bool bEnabled;
     int  leftOffset;
     int  rightOffset;
     int  topOffset;
     int  bottomOffset;
+    bool bEnabled;
 
     Window()
     {
@@ -176,35 +176,29 @@
 
 struct VUI
 {
-    bool       aspectRatioInfoPresentFlag;
     int        aspectRatioIdc;
     int        sarWidth;
     int        sarHeight;
-
-    bool       overscanInfoPresentFlag;
-    bool       overscanAppropriateFlag;
-
-    bool       videoSignalTypePresentFlag;
     int        videoFormat;
-    bool       videoFullRangeFlag;
-
-    bool       colourDescriptionPresentFlag;
     int        colourPrimaries;
     int        transferCharacteristics;
     int        matrixCoefficients;
-
-    bool       chromaLocInfoPresentFlag;
     int        chromaSampleLocTypeTopField;
     int        chromaSampleLocTypeBottomField;
 
-    Window     defaultDisplayWindow;
-
+    bool       aspectRatioInfoPresentFlag;
+    bool       overscanInfoPresentFlag;
+    bool       overscanAppropriateFlag;
+    bool       videoSignalTypePresentFlag;
+    bool       videoFullRangeFlag;
+    bool       colourDescriptionPresentFlag;
+    bool       chromaLocInfoPresentFlag;
     bool       frameFieldInfoPresentFlag;
     bool       fieldSeqFlag;
+    bool       hrdParametersPresentFlag;
 
-    bool       hrdParametersPresentFlag;
     HRDInfo    hrdParameters;
-
+    Window     defaultDisplayWindow;
     TimingInfo timingInfo;
 };
 
@@ -236,8 +230,6 @@
     uint32_t quadtreeTUMaxDepthInter; // use param
     uint32_t quadtreeTUMaxDepthIntra; // use param
 
-    bool     bUseSAO; // use param
-    bool     bUseAMP; // use param
     uint32_t maxAMPDepth;
 
     uint32_t maxTempSubLayers;   // max number of Temporal Sub layers
@@ -245,6 +237,8 @@
     uint32_t maxLatencyIncrease;
     int      numReorderPics;
 
+    bool     bUseSAO; // use param
+    bool     bUseAMP; // use param
     bool     bUseStrongIntraSmoothing; // use param
     bool     bTemporalMVPEnabled;
 
@@ -270,6 +264,8 @@
     uint32_t maxCuDQPDepth;
 
     int      chromaQpOffset[2];      // use param
+    int      deblockingFilterBetaOffsetDiv2;
+    int      deblockingFilterTcOffsetDiv2;
 
     bool     bUseWeightPred;         // use param
     bool     bUseWeightedBiPred;     // use param
@@ -283,17 +279,15 @@
 
     bool     bDeblockingFilterControlPresent;
     bool     bPicDisableDeblockingFilter;
-    int      deblockingFilterBetaOffsetDiv2;
-    int      deblockingFilterTcOffsetDiv2;
 };
 
 struct WeightParam
 {
     // Explicit weighted prediction parameters parsed in slice header,
-    bool     bPresentFlag;
     uint32_t log2WeightDenom;
     int      inputWeight;
     int      inputOffset;
+    bool     bPresentFlag;
 
     /* makes a non-h265 weight (i.e. fix7), into an h265 weight */
     void setFromWeightAndOffset(int w, int o, int denom, bool bNormalize)
@@ -325,6 +319,9 @@
 
     const SPS*  m_sps;
     const PPS*  m_pps;
+    Frame*      m_refFrameList[2][MAX_NUM_REF + 1];
+    PicYuv*     m_refReconPicList[2][MAX_NUM_REF + 1];
+
     WeightParam m_weightPredTable[2][MAX_NUM_REF][3]; // [list][refIdx][0:Y, 1:U, 2:V]
     MotionReference (*m_mref)[MAX_NUM_REF + 1];
     RPS         m_rps;
@@ -333,21 +330,19 @@
     SliceType   m_sliceType;
     int         m_sliceQp;
     int         m_poc;
-    
     int         m_lastIDR;
 
+    uint32_t    m_colRefIdx;       // never modified
+
+    int         m_numRefIdx[2];
+    int         m_refPOCList[2][MAX_NUM_REF + 1];
+
+    uint32_t    m_maxNumMergeCand; // use param
+    uint32_t    m_endCUAddr;
+
     bool        m_bCheckLDC;       // TODO: is this necessary?
     bool        m_sLFaseFlag;      // loop filter boundary flag
     bool        m_colFromL0Flag;   // collocated picture from List0 or List1 flag
-    uint32_t    m_colRefIdx;       // never modified
-    
-    int         m_numRefIdx[2];
-    Frame*      m_refFrameList[2][MAX_NUM_REF + 1];
-    PicYuv*     m_refReconPicList[2][MAX_NUM_REF + 1];
-    int         m_refPOCList[2][MAX_NUM_REF + 1];
-
-    uint32_t    m_maxNumMergeCand; // use param
-    uint32_t    m_endCUAddr;
 
     Slice()
     {
diff -r f4c267f28487 -r b5bd2f7a29fd source/encoder/encoder.h
--- a/source/encoder/encoder.h	Mon Sep 28 13:38:33 2015 +0530
+++ b/source/encoder/encoder.h	Mon Sep 28 14:34:41 2015 +0530
@@ -45,10 +45,10 @@
     double        m_psnrSumV;
     double        m_globalSsim;
     double        m_totalQp;
+    double        m_maxFALL;
     uint64_t      m_accBits;
     uint32_t      m_numPics;
     uint16_t      m_maxCLL;
-    double        m_maxFALL;
 
     EncStats()
     {
@@ -79,62 +79,58 @@
 {
 public:
 
+    ALIGN_VAR_16(uint32_t, m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);
+    uint32_t           m_countEmergency[MAX_NUM_TR_CATEGORIES];
+    uint16_t           (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
+
+    int64_t            m_firstPts;
+    int64_t            m_bframeDelayTime;
+    int64_t            m_prevReorderedPts[2];
+    int64_t            m_encodeStartTime;
+
     int                m_pocLast;         // time index (POC)
     int                m_encodedFrameNum;
     int                m_outputCount;
+    int                m_bframeDelay;
+    int                m_numPools;
+    int                m_curEncoder;
 
-    int                m_bframeDelay;
-    int64_t            m_firstPts;
-    int64_t            m_bframeDelayTime;
-    int64_t            m_prevReorderedPts[2];
+    // weighted prediction
+    int                m_numLumaWPFrames;    // number of P frames with weighted luma reference
+    int                m_numChromaWPFrames;  // number of P frames with weighted chroma reference
+    int                m_numLumaWPBiFrames;  // number of B frames with weighted luma reference
+    int                m_numChromaWPBiFrames; // number of B frames with weighted chroma reference
+    int                m_conformanceMode;
+    int                m_lastBPSEI;
+    uint32_t           m_numDelayedPic;
 
     ThreadPool*        m_threadPool;
     FrameEncoder*      m_frameEncoder[X265_MAX_FRAME_THREADS];
     DPB*               m_dpb;
-
     Frame*             m_exportedPic;
-
-    int                m_numPools;
-    int                m_curEncoder;
+    FILE*              m_analysisFile;
+    x265_param*        m_param;
+    x265_param*        m_latestParam;
+    RateControl*       m_rateControl;
+    Lookahead*         m_lookahead;
 
     /* Collect statistics globally */
     EncStats           m_analyzeAll;
     EncStats           m_analyzeI;
     EncStats           m_analyzeP;
     EncStats           m_analyzeB;
-    int64_t            m_encodeStartTime;
-
-    // weighted prediction
-    int                m_numLumaWPFrames;    // number of P frames with weighted luma reference
-    int                m_numChromaWPFrames;  // number of P frames with weighted chroma reference
-    int                m_numLumaWPBiFrames;  // number of B frames with weighted luma reference
-    int                m_numChromaWPBiFrames; // number of B frames with weighted chroma reference
-    FILE*              m_analysisFile;
-    int                m_conformanceMode;
     VPS                m_vps;
     SPS                m_sps;
     PPS                m_pps;
     NALList            m_nalList;
     ScalingList        m_scalingList;      // quantization matrix information
+    Window             m_conformanceWindow;
 
     bool               m_emitCLLSEI;
-    int                m_lastBPSEI;
-    uint32_t           m_numDelayedPic;
-
-    x265_param*        m_param;
-    x265_param*        m_latestParam;
-    RateControl*       m_rateControl;
-    Lookahead*         m_lookahead;
-    Window             m_conformanceWindow;
-
     bool               m_bZeroLatency;     // x265_encoder_encode() returns NALs for the input picture, zero lag
     bool               m_aborted;          // fatal error detected
     bool               m_reconfigured;      // reconfigure of encoder detected
 
-    uint32_t           m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
-    uint16_t           (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
-    uint32_t           m_countEmergency[MAX_NUM_TR_CATEGORIES];
-
     Encoder();
     ~Encoder() {}
 
diff -r f4c267f28487 -r b5bd2f7a29fd source/encoder/slicetype.h
--- a/source/encoder/slicetype.h	Mon Sep 28 13:38:33 2015 +0530
+++ b/source/encoder/slicetype.h	Mon Sep 28 14:34:41 2015 +0530
@@ -104,22 +104,14 @@
     PicList       m_outputQueue;     // pictures to be encoded, in encode order
     Lock          m_inputLock;
     Lock          m_outputLock;
-
-    /* pre-lookahead */
-    int           m_fullQueueSize;
-    bool          m_isActive;
-    bool          m_sliceTypeBusy;
-    bool          m_bAdaptiveQuant;
-    bool          m_outputSignalRequired;
-    bool          m_bBatchMotionSearch;
-    bool          m_bBatchFrameCosts;
     Event         m_outputSignal;
-
     LookaheadTLD* m_tld;
     x265_param*   m_param;
     Lowres*       m_lastNonB;
     int*          m_scratch;         // temp buffer for cutree propagate
-    
+
+    /* pre-lookahead */
+    int           m_fullQueueSize;
     int           m_histogram[X265_BFRAME_MAX + 1];
     int           m_lastKeyframe;
     int           m_8x8Width;
@@ -127,6 +119,13 @@
     int           m_8x8Blocks;
     int           m_numCoopSlices;
     int           m_numRowsPerSlice;
+
+    bool          m_isActive;
+    bool          m_sliceTypeBusy;
+    bool          m_bAdaptiveQuant;
+    bool          m_outputSignalRequired;
+    bool          m_bBatchMotionSearch;
+    bool          m_bBatchFrameCosts;
     bool          m_filled;
     bool          m_isSceneTransition;
     Lookahead(x265_param *param, ThreadPool *pool);


More information about the x265-devel mailing list