[x265-commits] [x265] nal: greatly simplify NAL header generation, do not use T...
Steve Borho
steve at borho.org
Thu Jun 12 07:05:17 CEST 2014
details: http://hg.videolan.org/x265/rev/7868d22e535d
branches:
changeset: 7032:7868d22e535d
user: Steve Borho <steve at borho.org>
date: Wed Jun 11 12:46:04 2014 -0500
description:
nal: greatly simplify NAL header generation, do not use TComOutputBitstream
This saves a malloc/free and a great deal of needless overhead
Subject: [x265] nal: msvc and its integer conversion warnings
details: http://hg.videolan.org/x265/rev/b76c4e415cdd
branches:
changeset: 7033:b76c4e415cdd
user: Steve Borho <steve at borho.org>
date: Wed Jun 11 14:17:51 2014 -0500
description:
nal: msvc and its integer conversion warnings
Subject: [x265] frameencoder: remove obsolete check
details: http://hg.videolan.org/x265/rev/ea6f7da090ef
branches:
changeset: 7034:ea6f7da090ef
user: Steve Borho <steve at borho.org>
date: Wed Jun 11 21:27:17 2014 -0500
description:
frameencoder: remove obsolete check
Subject: [x265] TComDataCU: nits
details: http://hg.videolan.org/x265/rev/ebe26cc5fdd8
branches:
changeset: 7035:ebe26cc5fdd8
user: Steve Borho <steve at borho.org>
date: Wed Jun 11 21:27:24 2014 -0500
description:
TComDataCU: nits
Subject: [x265] compress: remove floating point math to avoid needing to use EMMS
details: http://hg.videolan.org/x265/rev/8da75c4dbbc0
branches:
changeset: 7036:8da75c4dbbc0
user: Steve Borho <steve at borho.org>
date: Wed Jun 11 21:35:08 2014 -0500
description:
compress: remove floating point math to avoid needing to use EMMS
Subject: [x265] weight: ensure weight table is initialized on failure, simplify weightAnalyse()
details: http://hg.videolan.org/x265/rev/10a4c2d42d3f
branches:
changeset: 7037:10a4c2d42d3f
user: Steve Borho <steve at borho.org>
date: Wed Jun 11 21:42:36 2014 -0500
description:
weight: ensure weight table is initialized on failure, simplify weightAnalyse()
Subject: [x265] compress: nit comment spacing
details: http://hg.videolan.org/x265/rev/e8df9b57eb09
branches:
changeset: 7038:e8df9b57eb09
user: Steve Borho <steve at borho.org>
date: Wed Jun 11 21:35:54 2014 -0500
description:
compress: nit comment spacing
diffstat:
source/Lib/TLibCommon/TComDataCU.h | 16 +++++++-------
source/encoder/compress.cpp | 41 ++++++++++++++++--------------------
source/encoder/frameencoder.cpp | 5 ----
source/encoder/nal.cpp | 25 +++++++++------------
source/encoder/weightPrediction.cpp | 28 +++++++++++-------------
5 files changed, 50 insertions(+), 65 deletions(-)
diffs (truncated from 344 to 300 lines):
diff -r 20d74192e097 -r e8df9b57eb09 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Tue Jun 10 20:07:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.h Wed Jun 11 21:35:54 2014 -0500
@@ -222,7 +222,7 @@ public:
char* getPartitionSize() { return m_partSizes; }
- PartSize getPartitionSize(uint32_t idx) { return static_cast<PartSize>(m_partSizes[idx]); }
+ PartSize getPartitionSize(uint32_t idx) { return static_cast<PartSize>(m_partSizes[idx]); }
void setPartSizeSubParts(PartSize eMode, uint32_t absPartIdx, uint32_t depth);
void setCUTransquantBypassSubParts(bool flag, uint32_t absPartIdx, uint32_t depth);
@@ -235,23 +235,23 @@ public:
char* getPredictionMode() { return m_predModes; }
- PredMode getPredictionMode(uint32_t idx) { return static_cast<PredMode>(m_predModes[idx]); }
+ PredMode getPredictionMode(uint32_t idx) { return static_cast<PredMode>(m_predModes[idx]); }
bool* getCUTransquantBypass() { return m_cuTransquantBypass; }
- bool getCUTransquantBypass(uint32_t idx) { return m_cuTransquantBypass[idx]; }
+ bool getCUTransquantBypass(uint32_t idx) { return m_cuTransquantBypass[idx]; }
void setPredModeSubParts(PredMode eMode, uint32_t absPartIdx, uint32_t depth);
uint8_t* getCUSize() { return m_cuSize; }
- uint8_t getCUSize(uint32_t idx) { return m_cuSize[idx]; }
+ uint8_t getCUSize(uint32_t idx) { return m_cuSize[idx]; }
- char* getQP() { return m_qp; }
+ char* getQP() { return m_qp; }
- char getQP(uint32_t idx) { return m_qp[idx]; }
+ char getQP(uint32_t idx) { return m_qp[idx]; }
- void setQP(uint32_t idx, char value) { m_qp[idx] = value; }
+ void setQP(uint32_t idx, char value) { m_qp[idx] = value; }
void setQPSubParts(int qp, uint32_t absPartIdx, uint32_t depth);
int getLastValidPartIdx(int absPartIdx);
@@ -447,7 +447,7 @@ public:
// member functions for RD cost storage
// -------------------------------------------------------------------------------------------------------------------
- uint32_t& getTotalNumPart() { return m_numPartitions; }
+ uint32_t& getTotalNumPart() { return m_numPartitions; }
uint32_t getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra);
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/compress.cpp Wed Jun 11 21:35:54 2014 -0500
@@ -383,7 +383,7 @@ void TEncCu::xCompressInterCU(TComDataCU
char currentQP = outTempCU->getQP(0);
char previousQP = colocated0->getQP(0);
uint8_t delta = 0, minDepth0 = 4, minDepth1 = 4;
- double sum0 = 0, sum1 = 0, avgDepth0 = 0, avgDepth1 = 0, avgDepth = 0;
+ uint32_t sum0 = 0, sum1 = 0;
for (uint32_t i = 0; i < outTempCU->getTotalNumPart(); i = i + 4)
{
if (colocated0 && colocated0->getDepth(i) < minDepth0)
@@ -396,12 +396,9 @@ void TEncCu::xCompressInterCU(TComDataCU
sum1 += (colocated1->getDepth(i) * 4);
}
- avgDepth0 = sum0 / outTempCU->getTotalNumPart();
- avgDepth1 = sum1 / outTempCU->getTotalNumPart();
- avgDepth = (avgDepth0 + avgDepth1) / 2;
-
+ uint32_t avgDepth2 = (sum0 + sum1) / outTempCU->getTotalNumPart();
minDepth = X265_MIN(minDepth0, minDepth1);
- if (((currentQP - previousQP) < 0) || (((currentQP - previousQP) >= 0) && ((avgDepth - minDepth) > 0.5)))
+ if (((currentQP - previousQP) < 0) || (((currentQP - previousQP) >= 0) && ((avgDepth2 - 2 * minDepth) > 1)))
delta = 0;
else
delta = 1;
@@ -441,17 +438,17 @@ void TEncCu::xCompressInterCU(TComDataCU
if (!earlyskip)
{
- /*Compute 2Nx2N mode costs*/
+ /* Compute 2Nx2N mode costs */
{
xComputeCostInter(m_interCU_2Nx2N[depth], m_modePredYuv[0][depth], SIZE_2Nx2N);
- /*Choose best mode; initialise outBestCU to 2Nx2N*/
+ /* Choose best mode; initialise outBestCU to 2Nx2N */
outBestCU = m_interCU_2Nx2N[depth];
tempYuv = m_modePredYuv[0][depth];
m_modePredYuv[0][depth] = m_bestPredYuv[depth];
m_bestPredYuv[depth] = tempYuv;
}
- /*Compute Rect costs*/
+ /* Compute Rect costs */
if (m_param->bEnableRectInter)
{
xComputeCostInter(m_interCU_Nx2N[depth], m_modePredYuv[1][depth], SIZE_Nx2N);
@@ -476,7 +473,7 @@ void TEncCu::xCompressInterCU(TComDataCU
if (m_param->rdLevel > 2)
{
- //calculate the motion compensation for chroma for the best mode selected
+ // calculate the motion compensation for chroma for the best mode selected
int numPart = outBestCU->getNumPartInter();
for (int partIdx = 0; partIdx < numPart; partIdx++)
{
@@ -501,7 +498,7 @@ void TEncCu::xCompressInterCU(TComDataCU
/* Check for Intra in inter frames only if its a P-slice*/
if (slice->getSliceType() == P_SLICE)
{
- /*compute intra cost */
+ /* compute intra cost */
bool bdoIntra = true;
if (m_param->rdLevel > 2)
@@ -612,7 +609,7 @@ void TEncCu::xCompressInterCU(TComDataCU
m_bestMergeRecoYuv[depth] = tempYuv;
}
- if (m_param->rdLevel > 0) //checkDQP can be done only after residual encoding is done
+ if (m_param->rdLevel > 0) // checkDQP can be done only after residual encoding is done
xCheckDQP(outBestCU);
/* Disable recursive analysis for whole CUs temporarily */
if ((outBestCU != 0) && (outBestCU->isSkipped(0)))
@@ -649,7 +646,6 @@ void TEncCu::xCompressInterCU(TComDataCU
#endif
{
uint64_t totalCostNeigh = 0, totalCostCU = 0, totalCountNeigh = 0, totalCountCU = 0;
- double avgCost = 0;
TComDataCU* above = outTempCU->getCUAbove();
TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
TComDataCU* aboveRight = outTempCU->getCUAboveRight();
@@ -679,13 +675,12 @@ void TEncCu::xCompressInterCU(TComDataCU
totalCountNeigh += left->m_count[depth];
}
- //giving 60% weight to all CU's and 40% weight to neighbour CU's
+ // give 60% weight to all CU's and 40% weight to neighbour CU's
+ uint64_t avgCost = 0;
if (totalCountNeigh + totalCountCU)
- avgCost = ((0.6 * totalCostCU) + (0.4 * totalCostNeigh)) / ((0.6 * totalCountCU) + (0.4 * totalCountNeigh));
+ avgCost = ((3 * totalCostCU) + (2 * totalCostNeigh)) / ((3 * totalCountCU) + (2 * totalCountNeigh));
- float lambda = 1.0f;
-
- if (outBestCU->m_totalRDCost < lambda * avgCost && avgCost != 0 && depth != 0)
+ if (outBestCU->m_totalRDCost < avgCost && avgCost != 0 && depth != 0)
{
/* Copy Best data to Picture for next partition prediction. */
outBestCU->copyToPic((uint8_t)depth);
@@ -710,7 +705,7 @@ void TEncCu::xCompressInterCU(TComDataCU
((subTempPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
(subTempPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples())))
{
- if (0 == nextDepth_partIndex) //initialize RD with previous depth buffer
+ if (0 == nextDepth_partIndex) // initialize RD with previous depth buffer
{
m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
}
@@ -873,7 +868,7 @@ void TEncCu::encodeResidue(TComDataCU* l
{
if (!lcu->getSkipFlag(absPartIdx))
{
- //Calculate Residue
+ // Calculate Residue
pixel* src2 = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
pixel* src1 = m_origYuv[0]->getLumaAddr(absPartIdx);
int16_t* dst = m_tmpResiYuv[depth]->getLumaAddr();
@@ -897,7 +892,7 @@ void TEncCu::encodeResidue(TComDataCU* l
dststride = m_tmpResiYuv[depth]->m_cwidth;
primitives.chroma[m_param->internalCsp].sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
- //Residual encoding
+ // Residual encoding
m_search->residualTransformQuantInter(cu, 0, m_tmpResiYuv[depth], cu->getDepth(0), true);
xCheckDQP(cu);
@@ -910,7 +905,7 @@ void TEncCu::encodeResidue(TComDataCU* l
{
cu->copyCodedToPic(depth);
- //Generate Recon
+ // Generate Recon
pixel* pred = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
int16_t* res = m_tmpResiYuv[depth]->getLumaAddr();
pixel* reco = m_bestRecoYuv[depth]->getLumaAddr();
@@ -937,7 +932,7 @@ void TEncCu::encodeResidue(TComDataCU* l
}
}
- //Generate Recon
+ // Generate Recon
TComPicYuv* rec = pic->getPicYuvRec();
int part = partitionFromSize(cu->getCUSize(0));
pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/frameencoder.cpp Wed Jun 11 21:35:54 2014 -0500
@@ -494,14 +494,9 @@ void FrameEncoder::compressFrame()
bool bUseWeightP = slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP();
bool bUseWeightB = slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred();
if (bUseWeightP || bUseWeightB)
- {
- X265_CHECK(slice->getPPS()->getUseWP(), "weightp not enabled in PPS, but in use\n");
weightAnalyse(*slice, *m_cfg->m_param);
- }
else
- {
slice->resetWpScaling();
- }
// Generate motion references
int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/nal.cpp
--- a/source/encoder/nal.cpp Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/nal.cpp Wed Jun 11 21:35:54 2014 -0500
@@ -30,27 +30,24 @@ namespace x265 {
void NALUnit::serialize(NalUnitType nalUnitType, const TComOutputBitstream& bs)
{
- TComOutputBitstream header;
- header.write(0, 1); // forbidden_zero_bit
- header.write(nalUnitType, 6); // nal_unit_type
- header.write(0, 6); // nuh_reserved_zero_6bits
- header.write(1, 3); // nuh_temporal_id_plus1
-
- uint32_t headerSize = header.getNumberOfWrittenBytes();
- const uint8_t* hpayload = header.getFIFO();
-
uint32_t bitsSize = bs.getNumberOfWrittenBytes();
const uint8_t* bpayload = bs.getFIFO();
- if (!bpayload || !hpayload)
+ if (!bpayload)
return;
/* padded allocation for emulation prevention bytes */
- uint8_t* out = m_nalUnitData = X265_MALLOC(uint8_t, headerSize + bitsSize + (bitsSize >> 1));
+ uint8_t* out = m_nalUnitData = X265_MALLOC(uint8_t, 2 + bitsSize + (bitsSize >> 1));
if (!out)
return;
- memcpy(out, hpayload, headerSize);
- uint32_t bytes = headerSize;
+ /* 16bit NAL header:
+ * forbidden_zero_bit 1-bit
+ * nal_unit_type 6-bits
+ * nuh_reserved_zero_6bits 6-bits
+ * nuh_temporal_id_plus1 3-bits */
+ out[0] = (uint8_t)nalUnitType << 1;
+ out[1] = 1;
+ uint32_t bytes = 2;
/* 7.4.1 ...
* Within the NAL unit, the following three-byte sequences shall not occur at
@@ -80,7 +77,7 @@ void NALUnit::serialize(NalUnitType nalU
if (!out[bytes - 1])
out[bytes++] = 0x03;
- X265_CHECK(bytes <= headerSize + bitsSize + (bitsSize >> 1), "NAL buffer overflow\n");
+ X265_CHECK(bytes <= 2 + bitsSize + (bitsSize >> 1), "NAL buffer overflow\n");
m_nalUnitType = nalUnitType;
m_packetSize = bytes;
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/weightPrediction.cpp Wed Jun 11 21:35:54 2014 -0500
@@ -30,7 +30,7 @@
#include "bitstream.h"
using namespace x265;
-namespace weightp {
+namespace {
struct Cache
{
const int * intraCost;
@@ -225,13 +225,16 @@ uint32_t weightCost(pixel * fenc
x265_emms();
return cost;
}
+}
-void analyzeWeights(TComSlice& slice, x265_param& param, wpScalingParam wp[2][MAX_NUM_REF][3])
+namespace x265 {
+void weightAnalyse(TComSlice& slice, x265_param& param)
{
+ wpScalingParam wp[2][MAX_NUM_REF][3];
TComPicYuv *fencYuv = slice.getPic()->getPicYuvOrg();
Lowres& fenc = slice.getPic()->m_lowres;
- weightp::Cache cache;
+ Cache cache;
memset(&cache, 0, sizeof(cache));
cache.intraCost = fenc.intraCost;
@@ -245,7 +248,10 @@ void analyzeWeights(TComSlice& slice, x2
/* Use single allocation for motion compensated ref and weight buffers */
pixel *mcbuf = X265_MALLOC(pixel, 2 * fencYuv->getStride() * fencYuv->getHeight());
if (!mcbuf)
More information about the x265-commits
mailing list