[x265] refine intra neighbors
Satoshi Nakagawa
nakagawa424 at oki.com
Tue Dec 23 06:53:49 CET 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1419313799 -32400
# Tue Dec 23 14:49:59 2014 +0900
# Node ID 6b59452a17d75c42c1750d47e2318c8da80c39fb
# Parent 8d2f418829c894c25da79daa861f16c61e5060d7
refine intra neighbors
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/common.h
--- a/source/common/common.h Sat Dec 20 21:27:14 2014 +0900
+++ b/source/common/common.h Tue Dec 23 14:49:59 2014 +0900
@@ -163,6 +163,9 @@
template<typename T>
inline T x265_max(T a, T b) { return a > b ? a : b; }
+template<typename T>
+inline T x265_clip3(T minVal, T maxVal, T a) { return x265_min(x265_max(minVal, a), maxVal); }
+
typedef int16_t coeff_t; // transform coefficient
#define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.cpp
--- a/source/common/cudata.cpp Sat Dec 20 21:27:14 2014 +0900
+++ b/source/common/cudata.cpp Tue Dec 23 14:49:59 2014 +0900
@@ -608,7 +608,7 @@
{
if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1])
{
- uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
+ uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1];
if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))
return m_encData->getPicCTU(m_cuAddr);
@@ -689,8 +689,6 @@
return NULL;
}
blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1];
- if (!m_cuLeft || !m_cuLeft->m_slice)
- return NULL;
return m_cuLeft;
}
@@ -723,8 +721,6 @@
return NULL;
}
arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset];
- if (!m_cuAbove || !m_cuAbove->m_slice)
- return NULL;
return m_cuAbove;
}
@@ -732,8 +728,6 @@
return NULL;
arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1];
- if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL || (m_cuAboveRight->m_cuAddr) > m_cuAddr))
- return NULL;
return m_cuAboveRight;
}
@@ -904,7 +898,7 @@
tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
- tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1]));
+ tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag));
}
void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
@@ -916,7 +910,7 @@
tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
- tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1]));
+ tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag));
}
uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
@@ -1363,14 +1357,6 @@
return outPartIdxRB;
}
-void CUData::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const
-{
- uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth);
-
- outPartIdxLT = m_absIdxInCTU + partOffset;
- outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1];
-}
-
bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const
{
if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.h
--- a/source/common/cudata.h Sat Dec 20 21:27:14 2014 +0900
+++ b/source/common/cudata.h Tue Dec 23 14:49:59 2014 +0900
@@ -212,7 +212,6 @@
void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const;
int getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const;
- void deriveLeftRightTopIdxAdi(uint32_t& partIdxLT, uint32_t& partIdxRT, uint32_t partOffset, uint32_t partDepth) const;
uint32_t getSCUAddr() const { return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInCTU; }
uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const;
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.cpp
--- a/source/common/predict.cpp Sat Dec 20 21:27:14 2014 +0900
+++ b/source/common/predict.cpp Tue Dec 23 14:49:59 2014 +0900
@@ -654,11 +654,8 @@
}
}
-void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode)
+void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode)
{
- IntraNeighbors intraNeighbors;
- initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors);
-
pixel* adiBuf = m_predBuf;
pixel* refAbove = m_refAbove;
pixel* refLeft = m_refLeft;
@@ -700,12 +697,12 @@
int refTL = refAbove[0];
int refTR = refAbove[trSize2];
bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&
- abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
+ abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
if (bStrongSmoothing)
{
// bilinear interpolation
- const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1;
+ const int shift = 5 + 1; // log2TrSize + 1;
int init = (refTL << shift) + tuSize;
int delta;
@@ -738,10 +735,8 @@
}
}
-void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId)
+void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId)
{
- IntraNeighbors intraNeighbors;
- initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors);
uint32_t tuSize = intraNeighbors.tuSize;
const pixel* adiOrigin = cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
@@ -751,9 +746,9 @@
fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors);
}
-void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors)
+void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors)
{
- uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
int log2UnitWidth = LOG2_UNIT_SIZE;
int log2UnitHeight = LOG2_UNIT_SIZE;
@@ -764,12 +759,12 @@
log2UnitHeight -= cu.m_vChromaShift;
}
- int numIntraNeighbor = 0;
+ int numIntraNeighbor;
bool* bNeighborFlags = intraNeighbors->bNeighborFlags;
- uint32_t partIdxLT, partIdxRT, partIdxLB;
-
- cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, partDepth);
+ uint32_t numPartInWidth = 1 << (cu.m_log2CUSize[0] - LOG2_UNIT_SIZE - tuDepth);
+ uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx;
+ uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + numPartInWidth - 1];
uint32_t tuSize = 1 << log2TrSize;
int tuWidthInUnits = tuSize >> log2UnitWidth;
@@ -777,14 +772,26 @@
int aboveUnits = tuWidthInUnits << 1;
int leftUnits = tuHeightInUnits << 1;
int partIdxStride = cu.m_slice->m_sps->numPartInCUSize;
- partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
+ uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
- bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
- numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
- numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
- numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
- numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
- numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits));
+ if (cu.m_slice->isIntra() || !cu.m_slice->m_pps->bConstrainedIntraPred)
+ {
+ bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
+ numIntraNeighbor = (int)(bNeighborFlags[leftUnits]);
+ numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);
+ numIntraNeighbor += isAboveRightAvailable(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);
+ numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);
+ numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);
+ }
+ else
+ {
+ bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT);
+ numIntraNeighbor = (int)(bNeighborFlags[leftUnits]);
+ numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);
+ numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);
+ numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);
+ numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);
+ }
intraNeighbors->numIntraNeighbor = numIntraNeighbor;
intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
@@ -793,7 +800,6 @@
intraNeighbors->unitWidth = 1 << log2UnitWidth;
intraNeighbors->unitHeight = 1 << log2UnitHeight;
intraNeighbors->tuSize = tuSize;
- intraNeighbors->log2TrSize = log2TrSize;
}
void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors)
@@ -953,33 +959,27 @@
uint32_t partAboveLeft;
const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
- if (!cu.m_slice->m_pps->bConstrainedIntraPred)
- return cuAboveLeft ? true : false;
- else
- return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);
+ return !!cuAboveLeft;
}
int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
{
const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
- const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1;
+ const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT];
const uint32_t idxStep = 1;
- bool* validFlagPtr = bValidFlags;
int numIntra = 0;
- for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
+ for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++)
{
uint32_t partAbove;
const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
- if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAbove->isIntra(partAbove)))
+ if (cuAbove)
{
numIntra++;
- *validFlagPtr = true;
+ *bValidFlags = true;
}
else
- *validFlagPtr = false;
-
- validFlagPtr++;
+ *bValidFlags = false;
}
return numIntra;
@@ -988,73 +988,156 @@
int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
{
const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
- const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1;
+ const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];
const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;
- bool* validFlagPtr = bValidFlags;
int numIntra = 0;
- for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
+ for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction
{
uint32_t partLeft;
const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
- if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuLeft->isIntra(partLeft)))
+ if (cuLeft)
{
numIntra++;
- *validFlagPtr = true;
+ *bValidFlags = true;
}
else
- *validFlagPtr = false;
-
- validFlagPtr--; // opposite direction
+ *bValidFlags = false;
}
return numIntra;
}
-int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
+int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits)
{
- const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1;
- bool* validFlagPtr = bValidFlags;
int numIntra = 0;
- for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
+ for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++)
{
uint32_t partAboveRight;
const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
- if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAboveRight->isIntra(partAboveRight)))
+ if (cuAboveRight)
{
numIntra++;
- *validFlagPtr = true;
+ *bValidFlags = true;
}
else
- *validFlagPtr = false;
-
- validFlagPtr++;
+ *bValidFlags = false;
}
return numIntra;
}
-int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
+int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits)
{
- const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1;
- bool* validFlagPtr = bValidFlags;
int numIntra = 0;
- for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
+ for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction
{
uint32_t partBelowLeft;
const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
- if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuBelowLeft->isIntra(partBelowLeft)))
+ if (cuBelowLeft)
{
numIntra++;
- *validFlagPtr = true;
+ *bValidFlags = true;
}
else
- *validFlagPtr = false;
-
- validFlagPtr--; // opposite direction
+ *bValidFlags = false;
}
return numIntra;
}
+
+bool Predict::isAboveLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT)
+{
+ uint32_t partAboveLeft;
+ const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
+
+ return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);
+}
+
+int Predict::isAboveAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
+{
+ const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
+ const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT];
+ const uint32_t idxStep = 1;
+ int numIntra = 0;
+
+ for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++)
+ {
+ uint32_t partAbove;
+ const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
+ if (cuAbove && cuAbove->isIntra(partAbove))
+ {
+ numIntra++;
+ *bValidFlags = true;
+ }
+ else
+ *bValidFlags = false;
+ }
+
+ return numIntra;
+}
+
+int Predict::isLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
+{
+ const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
+ const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];
+ const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;
+ int numIntra = 0;
+
+ for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction
+ {
+ uint32_t partLeft;
+ const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
+ if (cuLeft && cuLeft->isIntra(partLeft))
+ {
+ numIntra++;
+ *bValidFlags = true;
+ }
+ else
+ *bValidFlags = false;
+ }
+
+ return numIntra;
+}
+
+int Predict::isAboveRightAvailableCIP(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits)
+{
+ int numIntra = 0;
+
+ for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++)
+ {
+ uint32_t partAboveRight;
+ const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
+ if (cuAboveRight && cuAboveRight->isIntra(partAboveRight))
+ {
+ numIntra++;
+ *bValidFlags = true;
+ }
+ else
+ *bValidFlags = false;
+ }
+
+ return numIntra;
+}
+
+int Predict::isBelowLeftAvailableCIP(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits)
+{
+ int numIntra = 0;
+
+ for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction
+ {
+ uint32_t partBelowLeft;
+ const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
+ if (cuBelowLeft && cuBelowLeft->isIntra(partBelowLeft))
+ {
+ numIntra++;
+ *bValidFlags = true;
+ }
+ else
+ *bValidFlags = false;
+ }
+
+ return numIntra;
+}
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.h
--- a/source/common/predict.h Sat Dec 20 21:27:14 2014 +0900
+++ b/source/common/predict.h Tue Dec 23 14:49:59 2014 +0900
@@ -57,7 +57,6 @@
int unitWidth;
int unitHeight;
int tuSize;
- uint32_t log2TrSize;
bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
};
@@ -105,14 +104,20 @@
void addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const;
/* Intra prediction helper functions */
- static void initIntraNeighbors(const CUData& cu, uint32_t zOrderIdxInPart, uint32_t partDepth, bool isLuma, IntraNeighbors *IntraNeighbors);
+ static void initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *IntraNeighbors);
static void fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors);
static bool isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT);
static int isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);
static int isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);
- static int isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);
- static int isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);
+ static int isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits);
+ static int isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits);
+
+ static bool isAboveLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT);
+ static int isAboveAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);
+ static int isLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);
+ static int isAboveRightAvailableCIP(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits);
+ static int isBelowLeftAvailableCIP(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits);
public:
@@ -125,8 +130,8 @@
void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSize);
void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSizeC, int chFmt);
- void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode);
- void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId);
+ void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode);
+ void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId);
pixel* getAdiChromaBuf(uint32_t chromaId, int tuSize)
{
return m_predBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (tuSize * 2 + 1));
diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Sat Dec 20 21:27:14 2014 +0900
+++ b/source/encoder/analysis.cpp Tue Dec 23 14:49:59 2014 +0900
@@ -914,7 +914,7 @@
cu.getInterTUQtDepthRange(tuDepthRange, 0);
m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
- residualTransformQuantInter(*md.bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);
+ residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
if (cu.getQtRootCbf(0))
md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]);
else
@@ -938,8 +938,7 @@
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
- uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
- residualTransformQuantIntra(*md.bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
+ residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
getBestIntraModeChroma(*md.bestMode, cuGeom);
residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.encodeIdx); // TODO:
@@ -1702,8 +1701,7 @@
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
- uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
- residualTransformQuantIntra(*bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
+ residualTransformQuantIntra(*bestMode, cuGeom, 0, 0, tuDepthRange);
getBestIntraModeChroma(*bestMode, cuGeom);
residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
}
@@ -1736,7 +1734,7 @@
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);
- residualTransformQuantInter(*bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);
+ residualTransformQuantInter(*bestMode, cuGeom, 0, 0, tuDepthRange);
if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
cu.setPredModeSubParts(MODE_SKIP);
diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/search.cpp
--- a/source/encoder/search.cpp Sat Dec 20 21:27:14 2014 +0900
+++ b/source/encoder/search.cpp Tue Dec 23 14:49:59 2014 +0900
@@ -239,7 +239,8 @@
void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const uint32_t depthRange[2])
{
- uint32_t fullDepth = mode.cu.m_cuDepth[0] + tuDepth;
+ CUData& cu = mode.cu;
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t qtLayer = log2TrSize - 2;
uint32_t sizeIdx = log2TrSize - 2;
@@ -253,8 +254,6 @@
mightSplit = true;
}
- CUData& cu = mode.cu;
-
Cost fullCost;
uint32_t bCBF = 0;
@@ -273,7 +272,9 @@
// init availability pattern
uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
- initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);
+ initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);
// get prediction signal
predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
@@ -365,7 +366,7 @@
m_entropyCoder.load(m_rqt[fullDepth].rqtRoot); // prep state of split encode
}
- // code split block
+ /* code split block */
uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];
@@ -451,11 +452,13 @@
pixel* pred = predYuv->getLumaAddr(absPartIdx);
int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
uint32_t stride = fencYuv->m_size;
- int sizeIdx = log2TrSize - 2;
+ uint32_t sizeIdx = log2TrSize - 2;
// init availability pattern
uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
- initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);
+ initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);
// get prediction signal
predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
@@ -597,13 +600,12 @@
}
/* fast luma intra residual generation. Only perform the minimum number of TU splits required by the CU size */
-void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t depthRange[2])
+void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2])
{
CUData& cu = mode.cu;
-
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
- bool bCheckFull = log2TrSize <= depthRange[1];
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ bool bCheckFull = log2TrSize <= depthRange[1];
X265_CHECK(m_slice->m_sliceType != I_SLICE, "residualTransformQuantIntra not intended for I slices\n");
@@ -614,28 +616,36 @@
if (bCheckFull)
{
- const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx);
- pixel* pred = mode.predYuv.getLumaAddr(absPartIdx);
- int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
+ const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx);
+ pixel* pred = mode.predYuv.getLumaAddr(absPartIdx);
+ int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
+ uint32_t stride = mode.fencYuv->m_size;
+
+ // init availability pattern
+ uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);
+ initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);
+
+ // get prediction signal
+ predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
+
+ X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], "unexpected tskip flag in residualTransformQuantIntra\n");
+ cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
+
+ uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
+ coeff_t* coeffY = cu.m_trCoeff[0] + coeffOffsetY;
+
+ uint32_t sizeIdx = log2TrSize - 2;
+ primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
+
pixel* picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
intptr_t picStride = m_frame->m_reconPic->m_stride;
- uint32_t stride = mode.fencYuv->m_size;
- uint32_t sizeIdx = log2TrSize - 2;
- uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
- uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
- coeff_t* coeff = cu.m_trCoeff[TEXT_LUMA] + coeffOffsetY;
-
- initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);
- predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
-
- X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], "unexpected tskip flag in residualTransformQuantIntra\n");
- cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
-
- primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
- uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, false);
+
+ uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
{
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, false, numSig);
+ m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
primitives.luma_add_ps[sizeIdx](picReconY, picStride, pred, residual, stride, stride);
cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
}
@@ -654,11 +664,11 @@
uint32_t cbf = 0;
for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- residualTransformQuantIntra(mode, cuGeom, tuDepth + 1, qPartIdx, depthRange);
+ residualTransformQuantIntra(mode, cuGeom, qPartIdx, tuDepth + 1, depthRange);
cbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
}
for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
- cu.m_cbf[TEXT_LUMA][absPartIdx + offs] |= (cbf << tuDepth);
+ cu.m_cbf[0][absPartIdx + offs] |= (cbf << tuDepth);
}
}
@@ -739,15 +749,14 @@
}
for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
{
- cu.m_cbf[TEXT_CHROMA_U][absPartIdx + offs] |= (splitCbfU << tuDepth);
- cu.m_cbf[TEXT_CHROMA_V][absPartIdx + offs] |= (splitCbfV << tuDepth);
+ cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << tuDepth);
+ cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << tuDepth);
}
return outDist;
}
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-
uint32_t tuDepthC = tuDepth;
if (log2TrSizeC < 2)
{
@@ -766,46 +775,48 @@
if (checkTransformSkip)
return codeIntraChromaTSkip(mode, cuGeom, tuDepth, tuDepthC, absPartIdx, psyEnergy);
+ ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
uint32_t qtLayer = log2TrSize - 2;
uint32_t tuSize = 1 << log2TrSizeC;
+ uint32_t stride = mode.fencYuv->m_csize;
+ const uint32_t sizeIdxC = log2TrSizeC - 2;
uint32_t outDist = 0;
uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+ TURecurse tuIterator(splitType, curPartNum, absPartIdx);
+ do
{
- TextType ttype = (TextType)chromaId;
-
- TURecurse tuIterator(splitType, curPartNum, absPartIdx);
- do
+ uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, &intraNeighbors);
+
+ for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
- uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+ TextType ttype = (TextType)chromaId;
const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);
pixel* pred = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
- int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getChromaAddr(chromaId, absPartIdxC);
- uint32_t stride = mode.fencYuv->m_csize;
- uint32_t sizeIdxC = log2TrSizeC - 2;
-
+ int16_t* residual = resiYuv.getChromaAddr(chromaId, absPartIdxC);
uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
coeff_t* coeffC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
pixel* reconQt = m_rqt[qtLayer].reconQtYuv.getChromaAddr(chromaId, absPartIdxC);
uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;
-
pixel* picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);
intptr_t picStride = m_frame->m_reconPic->m_strideC;
- // init availability pattern
- initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);
- pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
-
uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
if (chromaPredMode == DM_CHROMA_IDX)
chromaPredMode = cu.m_lumaIntraDir[(m_csp == X265_CSP_I444) ? absPartIdxC : 0];
if (m_csp == X265_CSP_I422)
chromaPredMode = g_chroma422IntraAngleMappingTable[chromaPredMode];
+ // init availability pattern
+ initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);
+ pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
+
// get prediction signal
predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
@@ -813,7 +824,6 @@
primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
- uint32_t tmpDist;
if (numSig)
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
@@ -827,7 +837,7 @@
cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
- tmpDist = primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, fenc, stride);
+ uint32_t tmpDist = primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, fenc, stride);
outDist += (ttype == TEXT_CHROMA_U) ? m_rdCost.scaleChromaDistCb(tmpDist) : m_rdCost.scaleChromaDistCr(tmpDist);
if (m_rdCost.m_psyRd)
@@ -835,10 +845,13 @@
primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, reconQt, reconQtStride);
}
- while (tuIterator.isNextSection());
-
- if (splitType == VERTICAL_SPLIT)
- offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx);
+ }
+ while (tuIterator.isNextSection());
+
+ if (splitType == VERTICAL_SPLIT)
+ {
+ offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);
+ offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);
}
return outDist;
@@ -866,14 +879,17 @@
uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+ TURecurse tuIterator(splitType, curPartNum, absPartIdx);
+ do
{
- TextType ttype = (TextType)chromaId;
-
- TURecurse tuIterator(splitType, curPartNum, absPartIdx);
- do
+ uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, &intraNeighbors);
+
+ for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
- uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+ TextType ttype = (TextType)chromaId;
const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);
pixel* pred = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
@@ -887,7 +903,7 @@
uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;
// init availability pattern
- initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);
+ initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);
pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
@@ -980,10 +996,13 @@
outDist += bDist;
psyEnergy += bEnergy;
}
- while (tuIterator.isNextSection());
-
- if (splitType == VERTICAL_SPLIT)
- offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx);
+ }
+ while (tuIterator.isNextSection());
+
+ if (splitType == VERTICAL_SPLIT)
+ {
+ offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);
+ offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);
}
m_entropyCoder.load(m_rqt[fullDepth].rqtRoot);
@@ -1022,91 +1041,18 @@
}
}
-void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx)
+void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth)
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
-
- if (tuDepth == cu.m_tuDepth[absPartIdx])
- {
- uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- uint32_t tuDepthC = tuDepth;
- if (log2TrSizeC < 2)
- {
- X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
- if (absPartIdx & 3)
- return;
- log2TrSizeC = 2;
- tuDepthC--;
- }
-
- ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
- uint32_t tuSize = 1 << log2TrSizeC;
- uint32_t stride = mode.fencYuv->m_csize;
- const int sizeIdxC = log2TrSizeC - 2;
-
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
- const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
-
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
- {
- TextType ttype = (TextType)chromaId;
-
- TURecurse tuIterator(splitType, curPartNum, absPartIdx);
- do
- {
- uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
-
- const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);
- pixel* pred = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
- int16_t* residual = resiYuv.getChromaAddr(chromaId, absPartIdxC);
- pixel* recon = mode.reconYuv.getChromaAddr(chromaId, absPartIdxC); // TODO: needed?
- uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
- coeff_t* coeff = cu.m_trCoeff[ttype] + coeffOffsetC;
- pixel* picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);
- uint32_t picStride = m_frame->m_reconPic->m_strideC;
-
- uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
- if (chromaPredMode == DM_CHROMA_IDX)
- chromaPredMode = cu.m_lumaIntraDir[(m_csp == X265_CSP_I444) ? absPartIdxC : 0];
- chromaPredMode = (m_csp == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
- initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);
- pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
-
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
-
- X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not supported at low RD levels\n");
-
- primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
- uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, false);
- if (numSig)
- {
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], residual, stride, coeff, log2TrSizeC, ttype, true, false, numSig);
- primitives.luma_add_ps[sizeIdxC](recon, stride, pred, residual, stride, stride);
- primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, recon, stride);
- cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
- }
- else
- {
- primitives.luma_copy_pp[sizeIdxC](recon, stride, pred, stride);
- primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, pred, stride);
- cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
- }
- }
- while (tuIterator.isNextSection());
-
- if (splitType == VERTICAL_SPLIT)
- offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, absPartIdx);
- }
- }
- else
+ uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
+
+ if (tuDepth < cu.m_tuDepth[absPartIdx])
{
uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
uint32_t splitCbfU = 0, splitCbfV = 0;
for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- residualQTIntraChroma(mode, cuGeom, tuDepth + 1, qPartIdx);
+ residualQTIntraChroma(mode, cuGeom, qPartIdx, tuDepth + 1);
splitCbfU |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
splitCbfV |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
}
@@ -1115,12 +1061,91 @@
cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << tuDepth);
cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << tuDepth);
}
+
+ return;
+ }
+
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
+ uint32_t tuDepthC = tuDepth;
+ if (log2TrSizeC < 2)
+ {
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ if (absPartIdx & 3)
+ return;
+ log2TrSizeC = 2;
+ tuDepthC--;
+ }
+
+ ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
+ uint32_t tuSize = 1 << log2TrSizeC;
+ uint32_t stride = mode.fencYuv->m_csize;
+ const uint32_t sizeIdxC = log2TrSizeC - 2;
+
+ uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
+ const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
+
+ TURecurse tuIterator(splitType, curPartNum, absPartIdx);
+ do
+ {
+ uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, &intraNeighbors);
+
+ for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+ {
+ TextType ttype = (TextType)chromaId;
+
+ const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);
+ pixel* pred = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
+ int16_t* residual = resiYuv.getChromaAddr(chromaId, absPartIdxC);
+ uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
+ coeff_t* coeffC = cu.m_trCoeff[ttype] + coeffOffsetC;
+ pixel* picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);
+ intptr_t picStride = m_frame->m_reconPic->m_strideC;
+
+ uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
+ if (chromaPredMode == DM_CHROMA_IDX)
+ chromaPredMode = cu.m_lumaIntraDir[(m_csp == X265_CSP_I444) ? absPartIdxC : 0];
+ if (m_csp == X265_CSP_I422)
+ chromaPredMode = g_chroma422IntraAngleMappingTable[chromaPredMode];
+
+ // init availability pattern
+ initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);
+ pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
+
+ // get prediction signal
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
+
+ X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not supported at low RD levels\n");
+
+ primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
+ uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
+ if (numSig)
+ {
+ m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
+ primitives.luma_add_ps[sizeIdxC](picReconC, picStride, pred, residual, stride, stride);
+ cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
+ }
+ else
+ {
+ // no coded residual, recon = pred
+ primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, pred, stride);
+ cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
+ }
+ }
+ }
+ while (tuIterator.isNextSection());
+
+ if (splitType == VERTICAL_SPLIT)
+ {
+ offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);
+ offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);
}
}
void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes)
{
- uint32_t depth = cuGeom.depth;
CUData& cu = intraMode.cu;
cu.setPartSizeSubParts(partSize);
@@ -1143,7 +1168,7 @@
m_entropyCoder.codePredMode(cu.m_predMode[0]);
}
- m_entropyCoder.codePartSize(cu, 0, depth);
+ m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
m_entropyCoder.codePredInfo(cu, 0);
intraMode.mvBits = m_entropyCoder.getNumberOfWrittenBits();
@@ -1153,7 +1178,10 @@
intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits();
intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits;
if (m_rdCost.m_psyRd)
- intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, intraMode.fencYuv->m_buf[0], intraMode.fencYuv->m_size, intraMode.reconYuv.m_buf[0], intraMode.reconYuv.m_size);
+ {
+ const Yuv* fencYuv = intraMode.fencYuv;
+ intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, intraMode.reconYuv.m_buf[0], intraMode.reconYuv.m_size);
+ }
updateModeCost(intraMode);
}
@@ -1174,7 +1202,9 @@
const uint32_t absPartIdx = 0;
// Reference sample smoothing
- initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX);
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdx, initTuDepth, true, &intraNeighbors);
+ initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, ALL_IDX);
const pixel* fenc = intraMode.fencYuv->m_buf[0];
uint32_t stride = intraMode.fencYuv->m_size;
@@ -1335,7 +1365,6 @@
{
CUData& cu = intraMode.cu;
Yuv* reconYuv = &intraMode.reconYuv;
- const Yuv* fencYuv = intraMode.fencYuv;
X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not expect NxN intra\n");
X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
@@ -1369,7 +1398,10 @@
intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits();
intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits;
if (m_rdCost.m_psyRd)
+ {
+ const Yuv* fencYuv = intraMode.fencYuv;
intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+ }
m_entropyCoder.store(intraMode.contexts);
updateModeCost(intraMode);
@@ -1404,7 +1436,9 @@
else
{
// Reference sample smoothing
- initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX);
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, absPartIdx, initTuDepth, true, &intraNeighbors);
+ initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, ALL_IDX);
// determine set of modes to be tested (using prediction signal only)
const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
@@ -1602,8 +1636,10 @@
log2TrSizeC = 5;
}
- Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 1);
- Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 2);
+ IntraNeighbors intraNeighbors;
+ initIntraNeighbors(cu, 0, tuDepth, false, &intraNeighbors);
+ Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 1); // U
+ Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 2); // V
cu.getAllowedChromaDir(0, modeList);
// check chroma modes
@@ -2581,16 +2617,16 @@
updateModeCost(interMode);
}
-void Search::residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2])
+void Search::residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2])
{
+ uint32_t depth = cuGeom.depth + tuDepth;
CUData& cu = mode.cu;
X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "invalid depth\n");
uint32_t log2TrSize = g_maxLog2CUSize - depth;
- uint32_t tuDepth = depth - cu.m_cuDepth[0];
bool bCheckFull = log2TrSize <= depthRange[1];
- if (cu.m_partSize[0] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && log2TrSize > depthRange[0])
+ if (cu.m_partSize[0] != SIZE_2Nx2N && !tuDepth && log2TrSize > depthRange[0])
bCheckFull = false;
if (bCheckFull)
@@ -2611,7 +2647,7 @@
uint32_t setCbf = 1 << tuDepth;
uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
- coeff_t *coeffCurY = cu.m_trCoeff[0] + coeffOffsetY;
+ coeff_t* coeffCurY = cu.m_trCoeff[0] + coeffOffsetY;
uint32_t sizeIdx = log2TrSize - 2;
@@ -2644,8 +2680,8 @@
uint32_t strideResiC = resiYuv.m_csize;
uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
- coeff_t *coeffCurU = cu.m_trCoeff[1] + coeffOffsetC;
- coeff_t *coeffCurV = cu.m_trCoeff[2] + coeffOffsetC;
+ coeff_t* coeffCurU = cu.m_trCoeff[1] + coeffOffsetC;
+ coeff_t* coeffCurV = cu.m_trCoeff[2] + coeffOffsetC;
bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
@@ -2702,16 +2738,16 @@
uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- residualTransformQuantInter(mode, cuGeom, qPartIdx, depth + 1, depthRange);
- ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
+ residualTransformQuantInter(mode, cuGeom, qPartIdx, tuDepth + 1, depthRange);
+ ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
}
- for (uint32_t i = 0; i < 4 * qNumParts; i++)
+ for (uint32_t i = 0; i < 4 * qNumParts; ++i)
{
- cu.m_cbf[TEXT_LUMA][absPartIdx + i] |= ycbf << tuDepth;
- cu.m_cbf[TEXT_CHROMA_U][absPartIdx + i] |= ucbf << tuDepth;
- cu.m_cbf[TEXT_CHROMA_V][absPartIdx + i] |= vcbf << tuDepth;
+ cu.m_cbf[0][absPartIdx + i] |= ycbf << tuDepth;
+ cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth;
+ cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth;
}
}
}
@@ -2769,7 +2805,7 @@
uint32_t trSize = 1 << log2TrSize;
const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
- uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
+ uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
const Yuv* fencYuv = mode.fencYuv;
// code full block
@@ -3127,16 +3163,19 @@
//Encode cbf flags
if (bCodeChroma)
{
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+ if (!splitIntoSubTUs)
{
- if (!splitIntoSubTUs)
- m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], tuDepth);
- else
- {
- offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, absPartIdx);
- m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], tuDepth);
- m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][1], tuDepth);
- }
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][0], tuDepth);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][0], tuDepth);
+ }
+ else
+ {
+ offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx);
+ offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][0], tuDepth);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][1], tuDepth);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][0], tuDepth);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][1], tuDepth);
}
}
diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/search.h
--- a/source/encoder/search.h Sat Dec 20 21:27:14 2014 +0900
+++ b/source/encoder/search.h Tue Dec 23 14:49:59 2014 +0900
@@ -178,9 +178,9 @@
void encodeResAndCalcRdSkipCU(Mode& interMode);
// encode residual without rd-cost
- void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2]);
- void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t depthRange[2]);
- void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx);
+ void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
+ void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
+ void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth);
// pick be chroma mode from available using just sa8d costs
void getBestIntraModeChroma(Mode& intraMode, const CUGeom& cuGeom);
More information about the x265-devel
mailing list