[x265] more use CUGeom
Satoshi Nakagawa
nakagawa424 at oki.com
Sat Jan 17 10:36:30 CET 2015
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1421487172 -32400
# Sat Jan 17 18:32:52 2015 +0900
# Node ID 270c9786681069d34c8eb709b74412843e37373a
# Parent 65e71f08c55a0e9303d51691b3435cb5fdf6c6a1
more use CUGeom
diff -r 65e71f08c55a -r 270c97866810 source/common/cudata.cpp
--- a/source/common/cudata.cpp Sat Jan 17 10:12:34 2015 +0530
+++ b/source/common/cudata.cpp Sat Jan 17 18:32:52 2015 +0900
@@ -57,51 +57,51 @@
void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
/* Check whether 2 addresses point to the same column */
-inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow)
+inline bool isEqualCol(int addrA, int addrB, int numUnits)
{
- // addrA % numUnitsPerRow == addrB % numUnitsPerRow
- return ((addrA ^ addrB) & (numUnitsPerRow - 1)) == 0;
+ // addrA % numUnits == addrB % numUnits
+ return ((addrA ^ addrB) & (numUnits - 1)) == 0;
}
/* Check whether 2 addresses point to the same row */
-inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow)
+inline bool isEqualRow(int addrA, int addrB, int numUnits)
{
- // addrA / numUnitsPerRow == addrB / numUnitsPerRow
- return ((addrA ^ addrB) & ~(numUnitsPerRow - 1)) == 0;
+ // addrA / numUnits == addrB / numUnits
+ return ((addrA ^ addrB) & ~(numUnits - 1)) == 0;
}
/* Check whether 2 addresses point to the same row or column */
-inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow)
+inline bool isEqualRowOrCol(int addrA, int addrB, int numUnits)
{
- return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, numUnitsPerRow);
+ return isEqualCol(addrA, addrB, numUnits) | isEqualRow(addrA, addrB, numUnits);
}
/* Check whether one address points to the first column */
-inline bool isZeroCol(int addr, int numUnitsPerRow)
+inline bool isZeroCol(int addr, int numUnits)
{
- // addr % numUnitsPerRow == 0
- return (addr & (numUnitsPerRow - 1)) == 0;
+ // addr % numUnits == 0
+ return (addr & (numUnits - 1)) == 0;
}
/* Check whether one address points to the first row */
-inline bool isZeroRow(int addr, int numUnitsPerRow)
+inline bool isZeroRow(int addr, int numUnits)
{
- // addr / numUnitsPerRow == 0
- return (addr & ~(numUnitsPerRow - 1)) == 0;
+ // addr / numUnits == 0
+ return (addr & ~(numUnits - 1)) == 0;
}
/* Check whether one address points to a column whose index is smaller than a given value */
-inline bool lessThanCol(int addr, int val, int numUnitsPerRow)
+inline bool lessThanCol(int addr, int val, int numUnits)
{
- // addr % numUnitsPerRow < val
- return (addr & (numUnitsPerRow - 1)) < val;
+ // addr % numUnits < val
+ return (addr & (numUnits - 1)) < val;
}
/* Check whether one address points to a row whose index is smaller than a given value */
-inline bool lessThanRow(int addr, int val, int numUnitsPerRow)
+inline bool lessThanRow(int addr, int val, int numUnits)
{
- // addr / numUnitsPerRow < val
- return addr < val * numUnitsPerRow;
+ // addr / numUnits < val
+ return addr < val * numUnits;
}
inline MV scaleMv(MV mv, int scale)
@@ -1533,17 +1533,17 @@
m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
{
uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
- uint32_t numPartInCUSize = s_numPartInCUSize;
- bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
- bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
+ uint32_t numUnits = s_numPartInCUSize;
+ bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU
+ bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU
if (bNotLastCol && bNotLastRow)
{
- absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
+ absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1];
ctuIdx = m_cuAddr;
}
else if (bNotLastCol)
- absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
+ absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)];
else if (bNotLastRow)
{
absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
@@ -1760,17 +1760,17 @@
m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
{
uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
- uint32_t numPartInCUSize = s_numPartInCUSize;
- bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
- bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
+ uint32_t numUnits = s_numPartInCUSize;
+ bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU
+ bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU
if (bNotLastCol && bNotLastRow)
{
- absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
+ absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1];
ctuIdx = m_cuAddr;
}
else if (bNotLastCol)
- absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
+ absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)];
else if (bNotLastRow)
{
absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
diff -r 65e71f08c55a -r 270c97866810 source/common/deblock.cpp
--- a/source/common/deblock.cpp Sat Jan 17 10:12:34 2015 +0530
+++ b/source/common/deblock.cpp Sat Jan 17 18:32:52 2015 +0900
@@ -33,13 +33,13 @@
#define DEBLOCK_SMALLEST_BLOCK 8
#define DEFAULT_INTRA_TC_OFFSET 2
-void Deblock::deblockCTU(const CUData* ctu, int32_t dir)
+void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
{
uint8_t blockStrength[MAX_NUM_PARTITIONS];
- memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions);
+ memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
- deblockCU(ctu, 0, 0, dir, blockStrength);
+ deblockCU(ctu, cuGeom, dir, blockStrength);
}
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
@@ -68,32 +68,31 @@
/* Deblocking filter process in CU-based (the same function as conventional's)
* param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
-void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[])
+void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
{
+ uint32_t absPartIdx = cuGeom.encodeIdx;
+ uint32_t depth = cuGeom.depth;
if (cu->m_predMode[absPartIdx] == MODE_NONE)
return;
- uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
-
- const SPS& sps = *cu->m_slice->m_sps;
-
if (cu->m_cuDepth[absPartIdx] > depth)
{
- uint32_t qNumParts = curNumParts >> 2;
- uint32_t xmax = sps.picWidthInLumaSamples - cu->m_cuPelX;
- uint32_t ymax = sps.picHeightInLumaSamples - cu->m_cuPelY;
- for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts)
- if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
- deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength);
+ for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
+ {
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
+ deblockCU(cu, childGeom, dir, blockStrength);
+ }
return;
}
- const uint32_t numUnits = sps.numPartInCUSize >> depth;
+ uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
- setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength);
+ setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
- for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + curNumParts; partIdx++)
+ uint32_t numParts = cuGeom.numPartitions;
+ for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
{
uint32_t bsCheck = !(partIdx & (1 << dir));
@@ -102,12 +101,11 @@
}
const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
- uint32_t sizeInPU = sps.numPartInCUSize >> depth;
uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
- for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
+ for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
{
edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
if (!((e0 + e) & chromaMask))
@@ -117,12 +115,12 @@
static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
{
- uint32_t numPartInCUSize = cu->m_slice->m_sps->numPartInCUSize;
+ uint32_t numUnits = cu->m_slice->m_sps->numPartInCUSize;
if (dir)
- return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numPartInCUSize + baseUnitIdx];
+ return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numUnits + baseUnitIdx];
else
- return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numPartInCUSize + edgeIdx];
+ return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numUnits + edgeIdx];
}
void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
@@ -135,19 +133,18 @@
}
}
-void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[])
+void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
{
- if ((uint32_t)cu->m_tuDepth[absPartIdx] + cu->m_cuDepth[absPartIdx] > depth)
+ uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
+ if (cu->m_tuDepth[absPartIdx] > tuDepth)
{
- const uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
- const uint32_t qNumParts = curNumParts >> 2;
-
- for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts)
- setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockStrength);
+ uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+ setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
return;
}
- uint32_t numUnits = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE);
+ uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits);
}
@@ -501,7 +498,6 @@
srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
-
for (uint32_t idx = 0; idx < numUnits; idx++)
{
uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift);
diff -r 65e71f08c55a -r 270c97866810 source/common/deblock.h
--- a/source/common/deblock.h Sat Jan 17 10:12:34 2015 +0530
+++ b/source/common/deblock.h Sat Jan 17 18:32:52 2015 +0900
@@ -30,27 +30,22 @@
// private namespace
class CUData;
+struct CUGeom;
class Deblock
{
public:
enum { EDGE_VER, EDGE_HOR };
- uint32_t m_numPartitions;
-
- Deblock() : m_numPartitions(0) {}
-
- void init() { m_numPartitions = 1 << (g_maxFullDepth * 2); }
-
- void deblockCTU(const CUData* ctu, int32_t dir);
+ void deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir);
protected:
// CU-level deblocking function
- void deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[]);
+ void deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[]);
// set filtering functions
- void setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[]);
+ void setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[]);
void setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits);
void setEdgefilterMultiple(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits);
diff -r 65e71f08c55a -r 270c97866810 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Sat Jan 17 10:12:34 2015 +0530
+++ b/source/encoder/framefilter.cpp Sat Jan 17 18:32:52 2015 +0900
@@ -63,8 +63,6 @@
m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize;
- m_deblock.init();
-
if (m_param->bEnableSAO)
if (!m_sao.create(m_param))
m_param->bEnableSAO = 0;
@@ -96,22 +94,24 @@
if (m_param->bEnableLoopFilter)
{
+ const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
+
for (uint32_t col = 0; col < numCols; col++)
{
uint32_t cuAddr = lineStartCUAddr + col;
const CUData* ctu = encData.getPicCTU(cuAddr);
-
- m_deblock.deblockCTU(ctu, Deblock::EDGE_VER);
+ deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
if (col > 0)
{
const CUData* ctuPrev = encData.getPicCTU(cuAddr - 1);
- m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR);
+ deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
}
}
const CUData* ctuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1);
- m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR);
+ deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[lineStartCUAddr + numCols - 1]], Deblock::EDGE_HOR);
}
// SAO
@@ -394,23 +394,24 @@
}
/* restore original YUV samples to recon after SAO (if lossless) */
-static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth)
+static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx)
{
- int size = g_maxLog2CUSize - depth - 2;
+ int size = cu->m_log2CUSize[absPartIdx] - 2;
+ uint32_t cuAddr = cu->m_cuAddr;
PicYuv* reconPic = frame.m_reconPic;
PicYuv* fencPic = frame.m_fencPic;
- pixel* dst = reconPic->getLumaAddr(cu->m_cuAddr, absPartIdx);
- pixel* src = fencPic->getLumaAddr(cu->m_cuAddr, absPartIdx);
+ pixel* dst = reconPic->getLumaAddr(cuAddr, absPartIdx);
+ pixel* src = fencPic->getLumaAddr(cuAddr, absPartIdx);
primitives.cu[size].copy_pp(dst, reconPic->m_stride, src, fencPic->m_stride);
- pixel* dstCb = reconPic->getCbAddr(cu->m_cuAddr, absPartIdx);
- pixel* srcCb = fencPic->getCbAddr(cu->m_cuAddr, absPartIdx);
+ pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
+ pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
- pixel* dstCr = reconPic->getCrAddr(cu->m_cuAddr, absPartIdx);
- pixel* srcCr = fencPic->getCrAddr(cu->m_cuAddr, absPartIdx);
+ pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
+ pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
int csp = fencPic->m_picCsp;
primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
@@ -418,34 +419,29 @@
}
/* Original YUV restoration for CU in lossless coding */
-static void origCUSampleRestoration(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth)
+static void origCUSampleRestoration(const CUData* cu, const CUGeom& cuGeom, Frame& frame)
{
- if (cu->m_cuDepth[absPartIdx] > depth)
+ uint32_t absPartIdx = cuGeom.encodeIdx;
+ if (cu->m_cuDepth[absPartIdx] > cuGeom.depth)
{
- /* TODO: this could use cuGeom.numPartition and flags */
- uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
- uint32_t qNumParts = curNumParts >> 2;
- uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - cu->m_cuPelX;
- uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->m_cuPelY;
-
- /* process four split sub-cu at next depth */
- for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
+ for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
- if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
- origCUSampleRestoration(cu, frame, absPartIdx, depth + 1);
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
+ origCUSampleRestoration(cu, childGeom, frame);
}
-
return;
}
// restore original YUV samples
if (cu->m_tqBypass[absPartIdx])
- restoreOrigLosslessYuv(cu, frame, absPartIdx, depth);
+ restoreOrigLosslessYuv(cu, frame, absPartIdx);
}
void FrameFilter::processSao(int row)
{
- SAOParam* saoParam = m_frame->m_encData->m_saoParam;
+ FrameData& encData = *m_frame->m_encData;
+ SAOParam* saoParam = encData.m_saoParam;
if (saoParam->bSaoFlag[0])
m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0);
@@ -456,12 +452,19 @@
m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2);
}
- if (m_frame->m_encData->m_slice->m_pps->bTransquantBypassEnabled)
+ if (encData.m_slice->m_pps->bTransquantBypassEnabled)
{
- uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
+ uint32_t numCols = encData.m_slice->m_sps->numCuInWidth;
uint32_t lineStartCUAddr = row * numCols;
+ const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
+
for (uint32_t col = 0; col < numCols; col++)
- origCUSampleRestoration(m_frame->m_encData->getPicCTU(lineStartCUAddr + col), *m_frame, 0, 0);
+ {
+ uint32_t cuAddr = lineStartCUAddr + col;
+ const CUData* ctu = encData.getPicCTU(cuAddr);
+ origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);
+ }
}
}
diff -r 65e71f08c55a -r 270c97866810 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Sat Jan 17 10:12:34 2015 +0530
+++ b/source/encoder/framefilter.h Sat Jan 17 18:32:52 2015 +0900
@@ -39,7 +39,7 @@
struct ThreadLocalData;
// Manages the processing of a single frame loopfilter
-class FrameFilter
+class FrameFilter : public Deblock
{
public:
@@ -50,7 +50,6 @@
int m_vChromaShift;
int m_pad[2];
- Deblock m_deblock;
SAO m_sao;
int m_numRows;
int m_saoRowDelay;
diff -r 65e71f08c55a -r 270c97866810 source/encoder/search.cpp
--- a/source/encoder/search.cpp Sat Jan 17 10:12:34 2015 +0530
+++ b/source/encoder/search.cpp Sat Jan 17 18:32:52 2015 +0900
@@ -63,6 +63,7 @@
bool Search::initSearch(const x265_param& param, ScalingList& scalingList)
{
+ uint32_t maxLog2CUSize = g_log2Size[param.maxCUSize];
m_param = ¶m;
m_bEnableRDOQ = param.rdLevel >= 4;
m_bFrameParallel = param.frameNumThreads > 1;
@@ -81,9 +82,9 @@
* available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */
m_refLagPixels = m_bFrameParallel ? param.searchRange : param.sourceHeight;
- uint32_t sizeL = 1 << (g_maxLog2CUSize * 2);
+ uint32_t sizeL = 1 << (maxLog2CUSize * 2);
uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
- uint32_t numPartitions = NUM_CU_PARTITIONS;
+ uint32_t numPartitions = 1 << (maxLog2CUSize - LOG2_UNIT_SIZE) * 2;
/* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
* the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
@@ -167,9 +168,8 @@
void Search::codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx)
{
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
uint32_t subdiv = tuDepth < cu.m_tuDepth[absPartIdx];
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
if (!(log2TrSize - m_hChromaShift < 2))
{
@@ -192,8 +192,7 @@
if (!cu.getCbf(absPartIdx, ttype, tuDepth))
return;
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
if (tuDepth < cu.m_tuDepth[absPartIdx])
{
@@ -241,8 +240,8 @@
void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const uint32_t depthRange[2])
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t fullDepth = cuGeom.depth + tuDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
uint32_t qtLayer = log2TrSize - 2;
uint32_t sizeIdx = log2TrSize - 2;
bool mightNotSplit = log2TrSize <= depthRange[1];
@@ -317,7 +316,7 @@
m_entropyCoder.codePredMode(cu.m_predMode[0]);
}
- m_entropyCoder.codePartSize(cu, 0, cu.m_cuDepth[0]);
+ m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
}
if (cu.m_partSize[0] == SIZE_2Nx2N)
{
@@ -434,8 +433,8 @@
void Search::codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& outCost)
{
- uint32_t fullDepth = mode.cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t fullDepth = cuGeom.depth + tuDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
uint32_t tuSize = 1 << log2TrSize;
X265_CHECK(tuSize == MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
@@ -528,7 +527,7 @@
m_entropyCoder.codePredMode(cu.m_predMode[0]);
}
- m_entropyCoder.codePartSize(cu, 0, cu.m_cuDepth[0]);
+ m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
}
if (cu.m_partSize[0] == SIZE_2Nx2N)
{
@@ -604,8 +603,8 @@
void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2])
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t fullDepth = cuGeom.depth + tuDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
bool bCheckFull = log2TrSize <= depthRange[1];
X265_CHECK(m_slice->m_sliceType != I_SLICE, "residualTransformQuantIntra not intended for I slices\n");
@@ -675,8 +674,7 @@
void Search::extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t tuDepth, uint32_t absPartIdx)
{
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
if (tuDepth == cu.m_tuDepth[absPartIdx])
{
@@ -709,9 +707,7 @@
/* 4:2:2 post-TU split processing */
void Search::offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx)
{
- uint32_t depth = cu.m_cuDepth[0];
- uint32_t fullDepth = depth + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
if (log2TrSize == 2)
{
@@ -735,8 +731,7 @@
uint32_t Search::codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, uint32_t& psyEnergy)
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
if (tuDepth < cu.m_tuDepth[absPartIdx])
{
@@ -782,7 +777,7 @@
const uint32_t sizeIdxC = log2TrSizeC - 2;
uint32_t outDist = 0;
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
+ uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
TURecurse tuIterator(splitType, curPartNum, absPartIdx);
@@ -858,8 +853,8 @@
uint32_t Search::codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t tuDepthC, uint32_t absPartIdx, uint32_t& psyEnergy)
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t fullDepth = cuGeom.depth + tuDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
const uint32_t log2TrSizeC = 2;
uint32_t qtLayer = log2TrSize - 2;
uint32_t outDist = 0;
@@ -872,7 +867,7 @@
ALIGN_VAR_32(coeff_t, tskipCoeffC[MAX_TS_SIZE * MAX_TS_SIZE]);
ALIGN_VAR_32(pixel, tskipReconC[MAX_TS_SIZE * MAX_TS_SIZE]);
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
+ uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
TURecurse tuIterator(splitType, curPartNum, absPartIdx);
@@ -1006,9 +1001,8 @@
void Search::extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t tuDepth)
{
- uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
uint32_t tuDepthL = cu.m_tuDepth[absPartIdx];
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
if (tuDepthL == tuDepth || log2TrSizeC == 2)
@@ -1075,7 +1069,7 @@
uint32_t stride = mode.fencYuv->m_csize;
const uint32_t sizeIdxC = log2TrSizeC - 2;
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
+ uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
TURecurse tuIterator(splitType, curPartNum, absPartIdx);
@@ -1184,13 +1178,13 @@
void Search::checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom)
{
CUData& cu = intraMode.cu;
- uint32_t depth = cu.m_cuDepth[0];
+ uint32_t depth = cuGeom.depth;
cu.setPartSizeSubParts(SIZE_2Nx2N);
cu.setPredModeSubParts(MODE_INTRA);
const uint32_t initTuDepth = 0;
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - initTuDepth;
uint32_t tuSize = 1 << log2TrSize;
const uint32_t absPartIdx = 0;
@@ -1403,10 +1397,10 @@
Yuv* predYuv = &intraMode.predYuv;
const Yuv* fencYuv = intraMode.fencYuv;
- uint32_t depth = cu.m_cuDepth[0];
+ uint32_t depth = cuGeom.depth;
uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
uint32_t numPU = 1 << (2 * initTuDepth);
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - initTuDepth;
uint32_t tuSize = 1 << log2TrSize;
uint32_t qNumParts = cuGeom.numPartitions >> 2;
uint32_t sizeIdx = log2TrSize - 2;
@@ -1657,7 +1651,7 @@
}
}
- cu.setChromIntraDirSubParts(bestMode, 0, cu.m_cuDepth[0]);
+ cu.setChromIntraDirSubParts(bestMode, 0, cuGeom.depth);
}
uint32_t Search::estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom)
@@ -1665,10 +1659,10 @@
CUData& cu = intraMode.cu;
Yuv& reconYuv = intraMode.reconYuv;
- uint32_t depth = cu.m_cuDepth[0];
+ uint32_t depth = cuGeom.depth;
uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N && m_csp == X265_CSP_I444;
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
- uint32_t absPartStep = (NUM_CU_PARTITIONS >> (depth << 1));
+ uint32_t log2TrSize = cuGeom.log2CUSize - initTuDepth;
+ uint32_t absPartStep = cuGeom.numPartitions;
uint32_t totalDistortion = 0;
int size = partitionFromLog2Size(log2TrSize);
@@ -2490,13 +2484,13 @@
CUData& cu = interMode.cu;
Yuv* reconYuv = &interMode.reconYuv;
Yuv* predYuv = &interMode.predYuv;
- ShortYuv* resiYuv = &m_rqt[cuGeom.depth].tmpResiYuv;
+ uint32_t depth = cuGeom.depth;
+ ShortYuv* resiYuv = &m_rqt[depth].tmpResiYuv;
const Yuv* fencYuv = interMode.fencYuv;
X265_CHECK(!cu.isIntra(0), "intra CU not expected\n");
- uint32_t log2CUSize = cu.m_log2CUSize[0];
- uint32_t depth = cu.m_cuDepth[0];
+ uint32_t log2CUSize = cuGeom.log2CUSize;
int sizeIdx = log2CUSize - 2;
m_quant.setQPforQuant(interMode.cu);
@@ -2509,7 +2503,7 @@
m_entropyCoder.load(m_rqt[depth].cur);
Cost costs;
- estimateResidualQT(interMode, cuGeom, 0, depth, *resiYuv, costs, tuDepthRange);
+ estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
if (!cu.m_tqBypass[0])
{
@@ -2541,7 +2535,7 @@
}
if (cu.getQtRootCbf(0))
- saveResidualQTData(cu, *resiYuv, 0, depth);
+ saveResidualQTData(cu, *resiYuv, 0, 0);
/* calculate signal bits for inter/merge/skip coded CU */
m_entropyCoder.load(m_rqt[depth].cur);
@@ -2567,7 +2561,7 @@
m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
m_entropyCoder.codeSkipFlag(cu, 0);
m_entropyCoder.codePredMode(cu.m_predMode[0]);
- m_entropyCoder.codePartSize(cu, 0, cu.m_cuDepth[0]);
+ m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
m_entropyCoder.codePredInfo(cu, 0);
uint32_t mvBits = m_entropyCoder.getNumberOfWrittenBits();
@@ -2603,9 +2597,7 @@
{
uint32_t depth = cuGeom.depth + tuDepth;
CUData& cu = mode.cu;
- X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "invalid depth\n");
-
- uint32_t log2TrSize = g_maxLog2CUSize - depth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
bool bCheckFull = log2TrSize <= depthRange[1];
if (cu.m_partSize[0] != SIZE_2Nx2N && !tuDepth && log2TrSize > depthRange[0])
@@ -2625,7 +2617,7 @@
bCodeChroma = !(absPartIdx & 3);
}
- uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
+ uint32_t absPartIdxStep = cuGeom.numPartitions >> tuDepthC * 2;
uint32_t setCbf = 1 << tuDepth;
uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
@@ -2633,7 +2625,7 @@
uint32_t sizeIdx = log2TrSize - 2;
- cu.setTUDepthSubParts(depth - cu.m_cuDepth[0], absPartIdx, depth);
+ cu.setTUDepthSubParts(tuDepth, absPartIdx, depth);
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
@@ -2744,22 +2736,21 @@
return m_rdCost.calcRdCost(dist, nullBits);
}
-void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, const uint32_t depthRange[2])
+void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& outCosts, const uint32_t depthRange[2])
{
CUData& cu = mode.cu;
- uint32_t log2TrSize = g_maxLog2CUSize - depth;
+ uint32_t depth = cuGeom.depth + tuDepth;
+ uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
bool bCheckSplit = log2TrSize > depthRange[0];
bool bCheckFull = log2TrSize <= depthRange[1];
bool bSplitPresentFlag = bCheckSplit && bCheckFull;
- if (cu.m_partSize[0] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && bCheckSplit)
+ if (cu.m_partSize[0] != SIZE_2Nx2N && !tuDepth && bCheckSplit)
bCheckFull = false;
X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
- X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
-
- uint32_t tuDepth = depth - cu.m_cuDepth[0];
+
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
uint32_t tuDepthC = tuDepth;
@@ -2787,7 +2778,7 @@
uint32_t trSize = 1 << log2TrSize;
const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
- uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
+ uint32_t absPartIdxStep = cuGeom.numPartitions >> tuDepthC * 2;
const Yuv* fencYuv = mode.fencYuv;
// code full block
@@ -2804,7 +2795,7 @@
bool checkTransformSkipY = checkTransformSkip && log2TrSize <= MAX_LOG2_TS_SIZE;
bool checkTransformSkipC = checkTransformSkip && log2TrSizeC <= MAX_LOG2_TS_SIZE;
- cu.setTUDepthSubParts(depth - cu.m_cuDepth[0], absPartIdx, depth);
+ cu.setTUDepthSubParts(tuDepth, absPartIdx, depth);
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
if (m_bEnableRDOQ)
@@ -3215,7 +3206,7 @@
uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- estimateResidualQT(mode, cuGeom, qPartIdx, depth + 1, resiYuv, splitCost, depthRange);
+ estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange);
ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
@@ -3234,7 +3225,7 @@
m_entropyCoder.load(m_rqt[depth].rqtRoot);
m_entropyCoder.resetBits();
- codeInterSubdivCbfQT(cu, absPartIdx, depth, depthRange);
+ codeInterSubdivCbfQT(cu, absPartIdx, tuDepth, depthRange);
uint32_t splitCbfBits = m_entropyCoder.getNumberOfWrittenBits();
splitCost.bits += splitCbfBits;
@@ -3307,14 +3298,12 @@
outCosts.energy += fullCost.energy;
}
-void Search::codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, const uint32_t depthRange[2])
+void Search::codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2])
{
- X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
X265_CHECK(cu.isInter(absPartIdx), "codeInterSubdivCbfQT() with intra block\n");
- const uint32_t tuDepth = depth - cu.m_cuDepth[0];
- const bool bSubdiv = tuDepth != cu.m_tuDepth[absPartIdx];
- const uint32_t log2TrSize = g_maxLog2CUSize - depth;
+ const bool bSubdiv = tuDepth < cu.m_tuDepth[absPartIdx];
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
if (!(log2TrSize - m_hChromaShift < 2))
{
@@ -3337,102 +3326,19 @@
{
uint32_t qNumParts = 1 << (log2TrSize -1 - LOG2_UNIT_SIZE) * 2;
for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
- codeInterSubdivCbfQT(cu, absPartIdx, depth + 1, depthRange);
+ codeInterSubdivCbfQT(cu, absPartIdx, tuDepth + 1, depthRange);
}
}
-void Search::encodeResidualQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, TextType ttype, const uint32_t depthRange[2])
+void Search::saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth)
{
- X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
- X265_CHECK(cu.isInter(absPartIdx), "encodeResidualQT() with intra block\n");
-
- const uint32_t curTuDepth = depth - cu.m_cuDepth[0];
- const uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
- const bool bSubdiv = curTuDepth != tuDepth;
- const uint32_t log2TrSize = g_maxLog2CUSize - depth;
-
- if (bSubdiv)
- {
- if (cu.getCbf(absPartIdx, ttype, curTuDepth))
- {
- uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
- for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
- encodeResidualQT(cu, absPartIdx, depth + 1, ttype, depthRange);
- }
- return;
- }
- else
- {
- const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
- uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-
- // Luma
- const uint32_t qtLayer = log2TrSize - 2;
- uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
- coeff_t* coeffCurY = m_rqt[qtLayer].coeffRQT[0] + coeffOffsetY;
-
- // Chroma
- bool bCodeChroma = true;
- uint32_t tuDepthC = tuDepth;
- if (log2TrSize == 2 && m_csp != X265_CSP_I444)
- {
- X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
- log2TrSizeC++;
- tuDepthC--;
- bCodeChroma = !(absPartIdx & 3);
- }
-
- if (ttype == TEXT_LUMA && cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
-
- if (bCodeChroma)
- {
- uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
- coeff_t* coeffCurU = m_rqt[qtLayer].coeffRQT[1] + coeffOffsetC;
- coeff_t* coeffCurV = m_rqt[qtLayer].coeffRQT[2] + coeffOffsetC;
-
- if (!splitIntoSubTUs)
- {
- if (ttype == TEXT_CHROMA_U && cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
- if (ttype == TEXT_CHROMA_V && cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
- }
- else
- {
- uint32_t tuNumParts = 2 << ((log2TrSizeC - LOG2_UNIT_SIZE) * 2);
- uint32_t subTUSize = 1 << (log2TrSizeC * 2);
- if (ttype == TEXT_CHROMA_U && cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth))
- {
- if (cu.getCbf(absPartIdx, ttype, tuDepth + 1))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
- if (cu.getCbf(absPartIdx + tuNumParts, ttype, tuDepth + 1))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + tuNumParts, log2TrSizeC, TEXT_CHROMA_U);
- }
- if (ttype == TEXT_CHROMA_V && cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth))
- {
- if (cu.getCbf(absPartIdx, ttype, tuDepth + 1))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
- if (cu.getCbf(absPartIdx + tuNumParts, ttype, tuDepth + 1))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + tuNumParts, log2TrSizeC, TEXT_CHROMA_V);
- }
- }
- }
- }
-}
-
-void Search::saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t depth)
-{
- X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
- const uint32_t curTrMode = depth - cu.m_cuDepth[0];
- const uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
- const uint32_t log2TrSize = g_maxLog2CUSize - depth;
-
- if (curTrMode < tuDepth)
+ const uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
+
+ if (tuDepth < cu.m_tuDepth[absPartIdx])
{
uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
- saveResidualQTData(cu, resiYuv, absPartIdx, depth + 1);
+ saveResidualQTData(cu, resiYuv, absPartIdx, tuDepth + 1);
return;
}
diff -r 65e71f08c55a -r 270c97866810 source/encoder/search.h
--- a/source/encoder/search.h Sat Jan 17 10:12:34 2015 +0530
+++ b/source/encoder/search.h Sat Jan 17 18:32:52 2015 +0900
@@ -201,7 +201,7 @@
bool m_bJobsQueued;
void singleMotionEstimation(Search& master, Mode& interMode, const CUGeom& cuGeom, int part, int list, int ref);
- void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t depth);
+ void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
// RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
@@ -210,7 +210,7 @@
uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
void codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
- void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, const uint32_t depthRange[2]);
+ void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2]);
void codeCoeffQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx, TextType ttype);
struct Cost
@@ -225,9 +225,6 @@
uint64_t estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2]);
- // estimate bit cost of residual QT
- void encodeResidualQT(CUData& cu, uint32_t absPartIdx, uint32_t depth, TextType ttype, const uint32_t depthRange[2]);
-
// generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, const uint32_t depthRange[2]);
void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& costs);
More information about the x265-devel
mailing list