[x265] [PATCH] cleanup: align NR buffer for asm, rearrange member variables to avoid padding
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Mon Sep 28 11:06:30 CEST 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1443431081 -19800
# Mon Sep 28 14:34:41 2015 +0530
# Node ID b5bd2f7a29fdc602b2a9f6c6eee68e3e85a7ea0b
# Parent f4c267f28487161fa78c43cabb30dc4f4f82570c
cleanup: align NR buffer for asm, rearrange member variables to avoid padding
diff -r f4c267f28487 -r b5bd2f7a29fd source/common/slice.h
--- a/source/common/slice.h Mon Sep 28 13:38:33 2015 +0530
+++ b/source/common/slice.h Mon Sep 28 14:34:41 2015 +0530
@@ -105,6 +105,12 @@
struct ProfileTierLevel
{
+ int profileIdc;
+ int levelIdc;
+ uint32_t minCrForLevel;
+ uint32_t maxLumaSrForLevel;
+ uint32_t bitDepthConstraint;
+ int chromaFormatConstraint;
bool tierFlag;
bool progressiveSourceFlag;
bool interlacedSourceFlag;
@@ -114,12 +120,6 @@
bool intraConstraintFlag;
bool onePictureOnlyConstraintFlag;
bool lowerBitRateConstraintFlag;
- int profileIdc;
- int levelIdc;
- uint32_t minCrForLevel;
- uint32_t maxLumaSrForLevel;
- uint32_t bitDepthConstraint;
- int chromaFormatConstraint;
};
struct HRDInfo
@@ -152,21 +152,21 @@
struct VPS
{
+ HRDInfo hrdParameters;
+ ProfileTierLevel ptl;
uint32_t maxTempSubLayers;
uint32_t numReorderPics;
uint32_t maxDecPicBuffering;
uint32_t maxLatencyIncrease;
- HRDInfo hrdParameters;
- ProfileTierLevel ptl;
};
struct Window
{
- bool bEnabled;
int leftOffset;
int rightOffset;
int topOffset;
int bottomOffset;
+ bool bEnabled;
Window()
{
@@ -176,35 +176,29 @@
struct VUI
{
- bool aspectRatioInfoPresentFlag;
int aspectRatioIdc;
int sarWidth;
int sarHeight;
-
- bool overscanInfoPresentFlag;
- bool overscanAppropriateFlag;
-
- bool videoSignalTypePresentFlag;
int videoFormat;
- bool videoFullRangeFlag;
-
- bool colourDescriptionPresentFlag;
int colourPrimaries;
int transferCharacteristics;
int matrixCoefficients;
-
- bool chromaLocInfoPresentFlag;
int chromaSampleLocTypeTopField;
int chromaSampleLocTypeBottomField;
- Window defaultDisplayWindow;
-
+ bool aspectRatioInfoPresentFlag;
+ bool overscanInfoPresentFlag;
+ bool overscanAppropriateFlag;
+ bool videoSignalTypePresentFlag;
+ bool videoFullRangeFlag;
+ bool colourDescriptionPresentFlag;
+ bool chromaLocInfoPresentFlag;
bool frameFieldInfoPresentFlag;
bool fieldSeqFlag;
+ bool hrdParametersPresentFlag;
- bool hrdParametersPresentFlag;
HRDInfo hrdParameters;
-
+ Window defaultDisplayWindow;
TimingInfo timingInfo;
};
@@ -236,8 +230,6 @@
uint32_t quadtreeTUMaxDepthInter; // use param
uint32_t quadtreeTUMaxDepthIntra; // use param
- bool bUseSAO; // use param
- bool bUseAMP; // use param
uint32_t maxAMPDepth;
uint32_t maxTempSubLayers; // max number of Temporal Sub layers
@@ -245,6 +237,8 @@
uint32_t maxLatencyIncrease;
int numReorderPics;
+ bool bUseSAO; // use param
+ bool bUseAMP; // use param
bool bUseStrongIntraSmoothing; // use param
bool bTemporalMVPEnabled;
@@ -270,6 +264,8 @@
uint32_t maxCuDQPDepth;
int chromaQpOffset[2]; // use param
+ int deblockingFilterBetaOffsetDiv2;
+ int deblockingFilterTcOffsetDiv2;
bool bUseWeightPred; // use param
bool bUseWeightedBiPred; // use param
@@ -283,17 +279,15 @@
bool bDeblockingFilterControlPresent;
bool bPicDisableDeblockingFilter;
- int deblockingFilterBetaOffsetDiv2;
- int deblockingFilterTcOffsetDiv2;
};
struct WeightParam
{
// Explicit weighted prediction parameters parsed in slice header,
- bool bPresentFlag;
uint32_t log2WeightDenom;
int inputWeight;
int inputOffset;
+ bool bPresentFlag;
/* makes a non-h265 weight (i.e. fix7), into an h265 weight */
void setFromWeightAndOffset(int w, int o, int denom, bool bNormalize)
@@ -325,6 +319,9 @@
const SPS* m_sps;
const PPS* m_pps;
+ Frame* m_refFrameList[2][MAX_NUM_REF + 1];
+ PicYuv* m_refReconPicList[2][MAX_NUM_REF + 1];
+
WeightParam m_weightPredTable[2][MAX_NUM_REF][3]; // [list][refIdx][0:Y, 1:U, 2:V]
MotionReference (*m_mref)[MAX_NUM_REF + 1];
RPS m_rps;
@@ -333,21 +330,19 @@
SliceType m_sliceType;
int m_sliceQp;
int m_poc;
-
int m_lastIDR;
+ uint32_t m_colRefIdx; // never modified
+
+ int m_numRefIdx[2];
+ int m_refPOCList[2][MAX_NUM_REF + 1];
+
+ uint32_t m_maxNumMergeCand; // use param
+ uint32_t m_endCUAddr;
+
bool m_bCheckLDC; // TODO: is this necessary?
bool m_sLFaseFlag; // loop filter boundary flag
bool m_colFromL0Flag; // collocated picture from List0 or List1 flag
- uint32_t m_colRefIdx; // never modified
-
- int m_numRefIdx[2];
- Frame* m_refFrameList[2][MAX_NUM_REF + 1];
- PicYuv* m_refReconPicList[2][MAX_NUM_REF + 1];
- int m_refPOCList[2][MAX_NUM_REF + 1];
-
- uint32_t m_maxNumMergeCand; // use param
- uint32_t m_endCUAddr;
Slice()
{
diff -r f4c267f28487 -r b5bd2f7a29fd source/encoder/encoder.h
--- a/source/encoder/encoder.h Mon Sep 28 13:38:33 2015 +0530
+++ b/source/encoder/encoder.h Mon Sep 28 14:34:41 2015 +0530
@@ -45,10 +45,10 @@
double m_psnrSumV;
double m_globalSsim;
double m_totalQp;
+ double m_maxFALL;
uint64_t m_accBits;
uint32_t m_numPics;
uint16_t m_maxCLL;
- double m_maxFALL;
EncStats()
{
@@ -79,62 +79,58 @@
{
public:
+ ALIGN_VAR_16(uint32_t, m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);
+ uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES];
+ uint16_t (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
+
+ int64_t m_firstPts;
+ int64_t m_bframeDelayTime;
+ int64_t m_prevReorderedPts[2];
+ int64_t m_encodeStartTime;
+
int m_pocLast; // time index (POC)
int m_encodedFrameNum;
int m_outputCount;
+ int m_bframeDelay;
+ int m_numPools;
+ int m_curEncoder;
- int m_bframeDelay;
- int64_t m_firstPts;
- int64_t m_bframeDelayTime;
- int64_t m_prevReorderedPts[2];
+ // weighted prediction
+ int m_numLumaWPFrames; // number of P frames with weighted luma reference
+ int m_numChromaWPFrames; // number of P frames with weighted chroma reference
+ int m_numLumaWPBiFrames; // number of B frames with weighted luma reference
+ int m_numChromaWPBiFrames; // number of B frames with weighted chroma reference
+ int m_conformanceMode;
+ int m_lastBPSEI;
+ uint32_t m_numDelayedPic;
ThreadPool* m_threadPool;
FrameEncoder* m_frameEncoder[X265_MAX_FRAME_THREADS];
DPB* m_dpb;
-
Frame* m_exportedPic;
-
- int m_numPools;
- int m_curEncoder;
+ FILE* m_analysisFile;
+ x265_param* m_param;
+ x265_param* m_latestParam;
+ RateControl* m_rateControl;
+ Lookahead* m_lookahead;
/* Collect statistics globally */
EncStats m_analyzeAll;
EncStats m_analyzeI;
EncStats m_analyzeP;
EncStats m_analyzeB;
- int64_t m_encodeStartTime;
-
- // weighted prediction
- int m_numLumaWPFrames; // number of P frames with weighted luma reference
- int m_numChromaWPFrames; // number of P frames with weighted chroma reference
- int m_numLumaWPBiFrames; // number of B frames with weighted luma reference
- int m_numChromaWPBiFrames; // number of B frames with weighted chroma reference
- FILE* m_analysisFile;
- int m_conformanceMode;
VPS m_vps;
SPS m_sps;
PPS m_pps;
NALList m_nalList;
ScalingList m_scalingList; // quantization matrix information
+ Window m_conformanceWindow;
bool m_emitCLLSEI;
- int m_lastBPSEI;
- uint32_t m_numDelayedPic;
-
- x265_param* m_param;
- x265_param* m_latestParam;
- RateControl* m_rateControl;
- Lookahead* m_lookahead;
- Window m_conformanceWindow;
-
bool m_bZeroLatency; // x265_encoder_encode() returns NALs for the input picture, zero lag
bool m_aborted; // fatal error detected
bool m_reconfigured; // reconfigure of encoder detected
- uint32_t m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
- uint16_t (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
- uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES];
-
Encoder();
~Encoder() {}
diff -r f4c267f28487 -r b5bd2f7a29fd source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Mon Sep 28 13:38:33 2015 +0530
+++ b/source/encoder/slicetype.h Mon Sep 28 14:34:41 2015 +0530
@@ -104,22 +104,14 @@
PicList m_outputQueue; // pictures to be encoded, in encode order
Lock m_inputLock;
Lock m_outputLock;
-
- /* pre-lookahead */
- int m_fullQueueSize;
- bool m_isActive;
- bool m_sliceTypeBusy;
- bool m_bAdaptiveQuant;
- bool m_outputSignalRequired;
- bool m_bBatchMotionSearch;
- bool m_bBatchFrameCosts;
Event m_outputSignal;
-
LookaheadTLD* m_tld;
x265_param* m_param;
Lowres* m_lastNonB;
int* m_scratch; // temp buffer for cutree propagate
-
+
+ /* pre-lookahead */
+ int m_fullQueueSize;
int m_histogram[X265_BFRAME_MAX + 1];
int m_lastKeyframe;
int m_8x8Width;
@@ -127,6 +119,13 @@
int m_8x8Blocks;
int m_numCoopSlices;
int m_numRowsPerSlice;
+
+ bool m_isActive;
+ bool m_sliceTypeBusy;
+ bool m_bAdaptiveQuant;
+ bool m_outputSignalRequired;
+ bool m_bBatchMotionSearch;
+ bool m_bBatchFrameCosts;
bool m_filled;
bool m_isSceneTransition;
Lookahead(x265_param *param, ThreadPool *pool);
More information about the x265-devel
mailing list