[x265] more use CUGeom
Steve Borho
steve at borho.org
Sat Jan 17 12:48:02 CET 2015
On 01/17, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1421487172 -32400
> # Sat Jan 17 18:32:52 2015 +0900
> # Node ID 270c9786681069d34c8eb709b74412843e37373a
> # Parent 65e71f08c55a0e9303d51691b3435cb5fdf6c6a1
> more use CUGeom
looks good, queued
> diff -r 65e71f08c55a -r 270c97866810 source/common/cudata.cpp
> --- a/source/common/cudata.cpp Sat Jan 17 10:12:34 2015 +0530
> +++ b/source/common/cudata.cpp Sat Jan 17 18:32:52 2015 +0900
> @@ -57,51 +57,51 @@
> void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
>
> /* Check whether 2 addresses point to the same column */
> -inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow)
> +inline bool isEqualCol(int addrA, int addrB, int numUnits)
> {
> - // addrA % numUnitsPerRow == addrB % numUnitsPerRow
> - return ((addrA ^ addrB) & (numUnitsPerRow - 1)) == 0;
> + // addrA % numUnits == addrB % numUnits
> + return ((addrA ^ addrB) & (numUnits - 1)) == 0;
> }
>
> /* Check whether 2 addresses point to the same row */
> -inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow)
> +inline bool isEqualRow(int addrA, int addrB, int numUnits)
> {
> - // addrA / numUnitsPerRow == addrB / numUnitsPerRow
> - return ((addrA ^ addrB) & ~(numUnitsPerRow - 1)) == 0;
> + // addrA / numUnits == addrB / numUnits
> + return ((addrA ^ addrB) & ~(numUnits - 1)) == 0;
> }
>
> /* Check whether 2 addresses point to the same row or column */
> -inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow)
> +inline bool isEqualRowOrCol(int addrA, int addrB, int numUnits)
> {
> - return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, numUnitsPerRow);
> + return isEqualCol(addrA, addrB, numUnits) | isEqualRow(addrA, addrB, numUnits);
> }
>
> /* Check whether one address points to the first column */
> -inline bool isZeroCol(int addr, int numUnitsPerRow)
> +inline bool isZeroCol(int addr, int numUnits)
> {
> - // addr % numUnitsPerRow == 0
> - return (addr & (numUnitsPerRow - 1)) == 0;
> + // addr % numUnits == 0
> + return (addr & (numUnits - 1)) == 0;
> }
>
> /* Check whether one address points to the first row */
> -inline bool isZeroRow(int addr, int numUnitsPerRow)
> +inline bool isZeroRow(int addr, int numUnits)
> {
> - // addr / numUnitsPerRow == 0
> - return (addr & ~(numUnitsPerRow - 1)) == 0;
> + // addr / numUnits == 0
> + return (addr & ~(numUnits - 1)) == 0;
> }
>
> /* Check whether one address points to a column whose index is smaller than a given value */
> -inline bool lessThanCol(int addr, int val, int numUnitsPerRow)
> +inline bool lessThanCol(int addr, int val, int numUnits)
> {
> - // addr % numUnitsPerRow < val
> - return (addr & (numUnitsPerRow - 1)) < val;
> + // addr % numUnits < val
> + return (addr & (numUnits - 1)) < val;
> }
>
> /* Check whether one address points to a row whose index is smaller than a given value */
> -inline bool lessThanRow(int addr, int val, int numUnitsPerRow)
> +inline bool lessThanRow(int addr, int val, int numUnits)
> {
> - // addr / numUnitsPerRow < val
> - return addr < val * numUnitsPerRow;
> + // addr / numUnits < val
> + return addr < val * numUnits;
> }
>
> inline MV scaleMv(MV mv, int scale)
> @@ -1533,17 +1533,17 @@
> m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
> {
> uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
> - uint32_t numPartInCUSize = s_numPartInCUSize;
> - bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
> - bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
> + uint32_t numUnits = s_numPartInCUSize;
> + bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU
> + bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU
>
> if (bNotLastCol && bNotLastRow)
> {
> - absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
> + absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1];
> ctuIdx = m_cuAddr;
> }
> else if (bNotLastCol)
> - absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
> + absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)];
> else if (bNotLastRow)
> {
> absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
> @@ -1760,17 +1760,17 @@
> m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
> {
> uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
> - uint32_t numPartInCUSize = s_numPartInCUSize;
> - bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU
> - bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last row of CTU
> + uint32_t numUnits = s_numPartInCUSize;
> + bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU
> + bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU
>
> if (bNotLastCol && bNotLastRow)
> {
> - absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1];
> + absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1];
> ctuIdx = m_cuAddr;
> }
> else if (bNotLastCol)
> - absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) & (numPartInCUSize - 1)];
> + absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)];
> else if (bNotLastRow)
> {
> absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
> diff -r 65e71f08c55a -r 270c97866810 source/common/deblock.cpp
> --- a/source/common/deblock.cpp Sat Jan 17 10:12:34 2015 +0530
> +++ b/source/common/deblock.cpp Sat Jan 17 18:32:52 2015 +0900
> @@ -33,13 +33,13 @@
> #define DEBLOCK_SMALLEST_BLOCK 8
> #define DEFAULT_INTRA_TC_OFFSET 2
>
> -void Deblock::deblockCTU(const CUData* ctu, int32_t dir)
> +void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
> {
> uint8_t blockStrength[MAX_NUM_PARTITIONS];
>
> - memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions);
> + memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
>
> - deblockCU(ctu, 0, 0, dir, blockStrength);
> + deblockCU(ctu, cuGeom, dir, blockStrength);
> }
>
> static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
> @@ -68,32 +68,31 @@
>
> /* Deblocking filter process in CU-based (the same function as conventional's)
> * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
> -void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[])
> +void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
> {
> + uint32_t absPartIdx = cuGeom.encodeIdx;
> + uint32_t depth = cuGeom.depth;
> if (cu->m_predMode[absPartIdx] == MODE_NONE)
> return;
>
> - uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
> -
> - const SPS& sps = *cu->m_slice->m_sps;
> -
> if (cu->m_cuDepth[absPartIdx] > depth)
> {
> - uint32_t qNumParts = curNumParts >> 2;
> - uint32_t xmax = sps.picWidthInLumaSamples - cu->m_cuPelX;
> - uint32_t ymax = sps.picHeightInLumaSamples - cu->m_cuPelY;
> - for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts)
> - if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
> - deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength);
> + for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> + {
> + const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
> + if (childGeom.flags & CUGeom::PRESENT)
> + deblockCU(cu, childGeom, dir, blockStrength);
> + }
> return;
> }
>
> - const uint32_t numUnits = sps.numPartInCUSize >> depth;
> + uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
> setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
> - setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength);
> + setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
> setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
>
> - for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + curNumParts; partIdx++)
> + uint32_t numParts = cuGeom.numPartitions;
> + for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
> {
> uint32_t bsCheck = !(partIdx & (1 << dir));
>
> @@ -102,12 +101,11 @@
> }
>
> const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
> - uint32_t sizeInPU = sps.numPartInCUSize >> depth;
> uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
> uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
> uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
>
> - for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
> + for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
> {
> edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
> if (!((e0 + e) & chromaMask))
> @@ -117,12 +115,12 @@
>
> static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
> {
> - uint32_t numPartInCUSize = cu->m_slice->m_sps->numPartInCUSize;
> + uint32_t numUnits = cu->m_slice->m_sps->numPartInCUSize;
>
> if (dir)
> - return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numPartInCUSize + baseUnitIdx];
> + return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numUnits + baseUnitIdx];
> else
> - return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numPartInCUSize + edgeIdx];
> + return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numUnits + edgeIdx];
> }
>
> void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
> @@ -135,19 +133,18 @@
> }
> }
>
> -void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[])
> +void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
> {
> - if ((uint32_t)cu->m_tuDepth[absPartIdx] + cu->m_cuDepth[absPartIdx] > depth)
> + uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
> + if (cu->m_tuDepth[absPartIdx] > tuDepth)
> {
> - const uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
> - const uint32_t qNumParts = curNumParts >> 2;
> -
> - for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absPartIdx += qNumParts)
> - setEdgefilterTU(cu, absPartIdx, depth + 1, dir, blockStrength);
> + uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
> + for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
> + setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
> return;
> }
>
> - uint32_t numUnits = 1 << (cu->m_log2CUSize[absPartIdx] - cu->m_tuDepth[absPartIdx] - LOG2_UNIT_SIZE);
> + uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
> setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits);
> }
>
> @@ -501,7 +498,6 @@
> srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
>
> uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
> -
> for (uint32_t idx = 0; idx < numUnits; idx++)
> {
> uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift);
> diff -r 65e71f08c55a -r 270c97866810 source/common/deblock.h
> --- a/source/common/deblock.h Sat Jan 17 10:12:34 2015 +0530
> +++ b/source/common/deblock.h Sat Jan 17 18:32:52 2015 +0900
> @@ -30,27 +30,22 @@
> // private namespace
>
> class CUData;
> +struct CUGeom;
>
> class Deblock
> {
> public:
> enum { EDGE_VER, EDGE_HOR };
>
> - uint32_t m_numPartitions;
> -
> - Deblock() : m_numPartitions(0) {}
> -
> - void init() { m_numPartitions = 1 << (g_maxFullDepth * 2); }
> -
> - void deblockCTU(const CUData* ctu, int32_t dir);
> + void deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir);
>
> protected:
>
> // CU-level deblocking function
> - void deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[]);
> + void deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[]);
>
> // set filtering functions
> - void setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, int32_t dir, uint8_t blockStrength[]);
> + void setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[]);
> void setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits);
> void setEdgefilterMultiple(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits);
>
> diff -r 65e71f08c55a -r 270c97866810 source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp Sat Jan 17 10:12:34 2015 +0530
> +++ b/source/encoder/framefilter.cpp Sat Jan 17 18:32:52 2015 +0900
> @@ -63,8 +63,6 @@
> m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
> m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize;
>
> - m_deblock.init();
> -
> if (m_param->bEnableSAO)
> if (!m_sao.create(m_param))
> m_param->bEnableSAO = 0;
> @@ -96,22 +94,24 @@
>
> if (m_param->bEnableLoopFilter)
> {
> + const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
> + const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
> +
> for (uint32_t col = 0; col < numCols; col++)
> {
> uint32_t cuAddr = lineStartCUAddr + col;
> const CUData* ctu = encData.getPicCTU(cuAddr);
> -
> - m_deblock.deblockCTU(ctu, Deblock::EDGE_VER);
> + deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
>
> if (col > 0)
> {
> const CUData* ctuPrev = encData.getPicCTU(cuAddr - 1);
> - m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR);
> + deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
> }
> }
>
> const CUData* ctuPrev = encData.getPicCTU(lineStartCUAddr + numCols - 1);
> - m_deblock.deblockCTU(ctuPrev, Deblock::EDGE_HOR);
> + deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[lineStartCUAddr + numCols - 1]], Deblock::EDGE_HOR);
> }
>
> // SAO
> @@ -394,23 +394,24 @@
> }
>
> /* restore original YUV samples to recon after SAO (if lossless) */
> -static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth)
> +static void restoreOrigLosslessYuv(const CUData* cu, Frame& frame, uint32_t absPartIdx)
> {
> - int size = g_maxLog2CUSize - depth - 2;
> + int size = cu->m_log2CUSize[absPartIdx] - 2;
> + uint32_t cuAddr = cu->m_cuAddr;
>
> PicYuv* reconPic = frame.m_reconPic;
> PicYuv* fencPic = frame.m_fencPic;
>
> - pixel* dst = reconPic->getLumaAddr(cu->m_cuAddr, absPartIdx);
> - pixel* src = fencPic->getLumaAddr(cu->m_cuAddr, absPartIdx);
> + pixel* dst = reconPic->getLumaAddr(cuAddr, absPartIdx);
> + pixel* src = fencPic->getLumaAddr(cuAddr, absPartIdx);
>
> primitives.cu[size].copy_pp(dst, reconPic->m_stride, src, fencPic->m_stride);
>
> - pixel* dstCb = reconPic->getCbAddr(cu->m_cuAddr, absPartIdx);
> - pixel* srcCb = fencPic->getCbAddr(cu->m_cuAddr, absPartIdx);
> + pixel* dstCb = reconPic->getCbAddr(cuAddr, absPartIdx);
> + pixel* srcCb = fencPic->getCbAddr(cuAddr, absPartIdx);
>
> - pixel* dstCr = reconPic->getCrAddr(cu->m_cuAddr, absPartIdx);
> - pixel* srcCr = fencPic->getCrAddr(cu->m_cuAddr, absPartIdx);
> + pixel* dstCr = reconPic->getCrAddr(cuAddr, absPartIdx);
> + pixel* srcCr = fencPic->getCrAddr(cuAddr, absPartIdx);
>
> int csp = fencPic->m_picCsp;
> primitives.chroma[csp].cu[size].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
> @@ -418,34 +419,29 @@
> }
>
> /* Original YUV restoration for CU in lossless coding */
> -static void origCUSampleRestoration(const CUData* cu, Frame& frame, uint32_t absPartIdx, uint32_t depth)
> +static void origCUSampleRestoration(const CUData* cu, const CUGeom& cuGeom, Frame& frame)
> {
> - if (cu->m_cuDepth[absPartIdx] > depth)
> + uint32_t absPartIdx = cuGeom.encodeIdx;
> + if (cu->m_cuDepth[absPartIdx] > cuGeom.depth)
> {
> - /* TODO: this could use cuGeom.numPartition and flags */
> - uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
> - uint32_t qNumParts = curNumParts >> 2;
> - uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - cu->m_cuPelX;
> - uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->m_cuPelY;
> -
> - /* process four split sub-cu at next depth */
> - for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
> + for (int subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> {
> - if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
> - origCUSampleRestoration(cu, frame, absPartIdx, depth + 1);
> + const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
> + if (childGeom.flags & CUGeom::PRESENT)
> + origCUSampleRestoration(cu, childGeom, frame);
> }
> -
> return;
> }
>
> // restore original YUV samples
> if (cu->m_tqBypass[absPartIdx])
> - restoreOrigLosslessYuv(cu, frame, absPartIdx, depth);
> + restoreOrigLosslessYuv(cu, frame, absPartIdx);
> }
>
> void FrameFilter::processSao(int row)
> {
> - SAOParam* saoParam = m_frame->m_encData->m_saoParam;
> + FrameData& encData = *m_frame->m_encData;
> + SAOParam* saoParam = encData.m_saoParam;
>
> if (saoParam->bSaoFlag[0])
> m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0);
> @@ -456,12 +452,19 @@
> m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2);
> }
>
> - if (m_frame->m_encData->m_slice->m_pps->bTransquantBypassEnabled)
> + if (encData.m_slice->m_pps->bTransquantBypassEnabled)
> {
> - uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
> + uint32_t numCols = encData.m_slice->m_sps->numCuInWidth;
> uint32_t lineStartCUAddr = row * numCols;
>
> + const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
> + const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
> +
> for (uint32_t col = 0; col < numCols; col++)
> - origCUSampleRestoration(m_frame->m_encData->getPicCTU(lineStartCUAddr + col), *m_frame, 0, 0);
> + {
> + uint32_t cuAddr = lineStartCUAddr + col;
> + const CUData* ctu = encData.getPicCTU(cuAddr);
> + origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);
> + }
> }
> }
> diff -r 65e71f08c55a -r 270c97866810 source/encoder/framefilter.h
> --- a/source/encoder/framefilter.h Sat Jan 17 10:12:34 2015 +0530
> +++ b/source/encoder/framefilter.h Sat Jan 17 18:32:52 2015 +0900
> @@ -39,7 +39,7 @@
> struct ThreadLocalData;
>
> // Manages the processing of a single frame loopfilter
> -class FrameFilter
> +class FrameFilter : public Deblock
> {
> public:
>
> @@ -50,7 +50,6 @@
> int m_vChromaShift;
> int m_pad[2];
>
> - Deblock m_deblock;
> SAO m_sao;
> int m_numRows;
> int m_saoRowDelay;
> diff -r 65e71f08c55a -r 270c97866810 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Sat Jan 17 10:12:34 2015 +0530
> +++ b/source/encoder/search.cpp Sat Jan 17 18:32:52 2015 +0900
> @@ -63,6 +63,7 @@
>
> bool Search::initSearch(const x265_param& param, ScalingList& scalingList)
> {
> + uint32_t maxLog2CUSize = g_log2Size[param.maxCUSize];
> m_param = ¶m;
> m_bEnableRDOQ = param.rdLevel >= 4;
> m_bFrameParallel = param.frameNumThreads > 1;
> @@ -81,9 +82,9 @@
> * available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */
> m_refLagPixels = m_bFrameParallel ? param.searchRange : param.sourceHeight;
>
> - uint32_t sizeL = 1 << (g_maxLog2CUSize * 2);
> + uint32_t sizeL = 1 << (maxLog2CUSize * 2);
> uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
> - uint32_t numPartitions = NUM_CU_PARTITIONS;
> + uint32_t numPartitions = 1 << (maxLog2CUSize - LOG2_UNIT_SIZE) * 2;
>
> /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
> * the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
> @@ -167,9 +168,8 @@
>
> void Search::codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx)
> {
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> uint32_t subdiv = tuDepth < cu.m_tuDepth[absPartIdx];
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
>
> if (!(log2TrSize - m_hChromaShift < 2))
> {
> @@ -192,8 +192,7 @@
> if (!cu.getCbf(absPartIdx, ttype, tuDepth))
> return;
>
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
>
> if (tuDepth < cu.m_tuDepth[absPartIdx])
> {
> @@ -241,8 +240,8 @@
> void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const uint32_t depthRange[2])
> {
> CUData& cu = mode.cu;
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t fullDepth = cuGeom.depth + tuDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
> uint32_t qtLayer = log2TrSize - 2;
> uint32_t sizeIdx = log2TrSize - 2;
> bool mightNotSplit = log2TrSize <= depthRange[1];
> @@ -317,7 +316,7 @@
> m_entropyCoder.codePredMode(cu.m_predMode[0]);
> }
>
> - m_entropyCoder.codePartSize(cu, 0, cu.m_cuDepth[0]);
> + m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
> }
> if (cu.m_partSize[0] == SIZE_2Nx2N)
> {
> @@ -434,8 +433,8 @@
>
> void Search::codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& outCost)
> {
> - uint32_t fullDepth = mode.cu.m_cuDepth[0] + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t fullDepth = cuGeom.depth + tuDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
> uint32_t tuSize = 1 << log2TrSize;
>
> X265_CHECK(tuSize == MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
> @@ -528,7 +527,7 @@
> m_entropyCoder.codePredMode(cu.m_predMode[0]);
> }
>
> - m_entropyCoder.codePartSize(cu, 0, cu.m_cuDepth[0]);
> + m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
> }
> if (cu.m_partSize[0] == SIZE_2Nx2N)
> {
> @@ -604,8 +603,8 @@
> void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2])
> {
> CUData& cu = mode.cu;
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t fullDepth = cuGeom.depth + tuDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
> bool bCheckFull = log2TrSize <= depthRange[1];
>
> X265_CHECK(m_slice->m_sliceType != I_SLICE, "residualTransformQuantIntra not intended for I slices\n");
> @@ -675,8 +674,7 @@
>
> void Search::extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t tuDepth, uint32_t absPartIdx)
> {
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
>
> if (tuDepth == cu.m_tuDepth[absPartIdx])
> {
> @@ -709,9 +707,7 @@
> /* 4:2:2 post-TU split processing */
> void Search::offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx)
> {
> - uint32_t depth = cu.m_cuDepth[0];
> - uint32_t fullDepth = depth + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
>
> if (log2TrSize == 2)
> {
> @@ -735,8 +731,7 @@
> uint32_t Search::codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, uint32_t& psyEnergy)
> {
> CUData& cu = mode.cu;
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
>
> if (tuDepth < cu.m_tuDepth[absPartIdx])
> {
> @@ -782,7 +777,7 @@
> const uint32_t sizeIdxC = log2TrSizeC - 2;
> uint32_t outDist = 0;
>
> - uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
> + uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
> const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
>
> TURecurse tuIterator(splitType, curPartNum, absPartIdx);
> @@ -858,8 +853,8 @@
> uint32_t Search::codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t tuDepthC, uint32_t absPartIdx, uint32_t& psyEnergy)
> {
> CUData& cu = mode.cu;
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t fullDepth = cuGeom.depth + tuDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
> const uint32_t log2TrSizeC = 2;
> uint32_t qtLayer = log2TrSize - 2;
> uint32_t outDist = 0;
> @@ -872,7 +867,7 @@
> ALIGN_VAR_32(coeff_t, tskipCoeffC[MAX_TS_SIZE * MAX_TS_SIZE]);
> ALIGN_VAR_32(pixel, tskipReconC[MAX_TS_SIZE * MAX_TS_SIZE]);
>
> - uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
> + uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
> const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
>
> TURecurse tuIterator(splitType, curPartNum, absPartIdx);
> @@ -1006,9 +1001,8 @@
>
> void Search::extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t tuDepth)
> {
> - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
> uint32_t tuDepthL = cu.m_tuDepth[absPartIdx];
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> + uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
> uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
>
> if (tuDepthL == tuDepth || log2TrSizeC == 2)
> @@ -1075,7 +1069,7 @@
> uint32_t stride = mode.fencYuv->m_csize;
> const uint32_t sizeIdxC = log2TrSizeC - 2;
>
> - uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
> + uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
> const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
>
> TURecurse tuIterator(splitType, curPartNum, absPartIdx);
> @@ -1184,13 +1178,13 @@
> void Search::checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom)
> {
> CUData& cu = intraMode.cu;
> - uint32_t depth = cu.m_cuDepth[0];
> + uint32_t depth = cuGeom.depth;
>
> cu.setPartSizeSubParts(SIZE_2Nx2N);
> cu.setPredModeSubParts(MODE_INTRA);
>
> const uint32_t initTuDepth = 0;
> - uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - initTuDepth;
> uint32_t tuSize = 1 << log2TrSize;
> const uint32_t absPartIdx = 0;
>
> @@ -1403,10 +1397,10 @@
> Yuv* predYuv = &intraMode.predYuv;
> const Yuv* fencYuv = intraMode.fencYuv;
>
> - uint32_t depth = cu.m_cuDepth[0];
> + uint32_t depth = cuGeom.depth;
> uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
> uint32_t numPU = 1 << (2 * initTuDepth);
> - uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - initTuDepth;
> uint32_t tuSize = 1 << log2TrSize;
> uint32_t qNumParts = cuGeom.numPartitions >> 2;
> uint32_t sizeIdx = log2TrSize - 2;
> @@ -1657,7 +1651,7 @@
> }
> }
>
> - cu.setChromIntraDirSubParts(bestMode, 0, cu.m_cuDepth[0]);
> + cu.setChromIntraDirSubParts(bestMode, 0, cuGeom.depth);
> }
>
> uint32_t Search::estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom)
> @@ -1665,10 +1659,10 @@
> CUData& cu = intraMode.cu;
> Yuv& reconYuv = intraMode.reconYuv;
>
> - uint32_t depth = cu.m_cuDepth[0];
> + uint32_t depth = cuGeom.depth;
> uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N && m_csp == X265_CSP_I444;
> - uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
> - uint32_t absPartStep = (NUM_CU_PARTITIONS >> (depth << 1));
> + uint32_t log2TrSize = cuGeom.log2CUSize - initTuDepth;
> + uint32_t absPartStep = cuGeom.numPartitions;
> uint32_t totalDistortion = 0;
>
> int size = partitionFromLog2Size(log2TrSize);
> @@ -2490,13 +2484,13 @@
> CUData& cu = interMode.cu;
> Yuv* reconYuv = &interMode.reconYuv;
> Yuv* predYuv = &interMode.predYuv;
> - ShortYuv* resiYuv = &m_rqt[cuGeom.depth].tmpResiYuv;
> + uint32_t depth = cuGeom.depth;
> + ShortYuv* resiYuv = &m_rqt[depth].tmpResiYuv;
> const Yuv* fencYuv = interMode.fencYuv;
>
> X265_CHECK(!cu.isIntra(0), "intra CU not expected\n");
>
> - uint32_t log2CUSize = cu.m_log2CUSize[0];
> - uint32_t depth = cu.m_cuDepth[0];
> + uint32_t log2CUSize = cuGeom.log2CUSize;
> int sizeIdx = log2CUSize - 2;
>
> m_quant.setQPforQuant(interMode.cu);
> @@ -2509,7 +2503,7 @@
> m_entropyCoder.load(m_rqt[depth].cur);
>
> Cost costs;
> - estimateResidualQT(interMode, cuGeom, 0, depth, *resiYuv, costs, tuDepthRange);
> + estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
>
> if (!cu.m_tqBypass[0])
> {
> @@ -2541,7 +2535,7 @@
> }
>
> if (cu.getQtRootCbf(0))
> - saveResidualQTData(cu, *resiYuv, 0, depth);
> + saveResidualQTData(cu, *resiYuv, 0, 0);
>
> /* calculate signal bits for inter/merge/skip coded CU */
> m_entropyCoder.load(m_rqt[depth].cur);
> @@ -2567,7 +2561,7 @@
> m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
> m_entropyCoder.codeSkipFlag(cu, 0);
> m_entropyCoder.codePredMode(cu.m_predMode[0]);
> - m_entropyCoder.codePartSize(cu, 0, cu.m_cuDepth[0]);
> + m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
> m_entropyCoder.codePredInfo(cu, 0);
> uint32_t mvBits = m_entropyCoder.getNumberOfWrittenBits();
>
> @@ -2603,9 +2597,7 @@
> {
> uint32_t depth = cuGeom.depth + tuDepth;
> CUData& cu = mode.cu;
> - X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "invalid depth\n");
> -
> - uint32_t log2TrSize = g_maxLog2CUSize - depth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
>
> bool bCheckFull = log2TrSize <= depthRange[1];
> if (cu.m_partSize[0] != SIZE_2Nx2N && !tuDepth && log2TrSize > depthRange[0])
> @@ -2625,7 +2617,7 @@
> bCodeChroma = !(absPartIdx & 3);
> }
>
> - uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
> + uint32_t absPartIdxStep = cuGeom.numPartitions >> tuDepthC * 2;
> uint32_t setCbf = 1 << tuDepth;
>
> uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
> @@ -2633,7 +2625,7 @@
>
> uint32_t sizeIdx = log2TrSize - 2;
>
> - cu.setTUDepthSubParts(depth - cu.m_cuDepth[0], absPartIdx, depth);
> + cu.setTUDepthSubParts(tuDepth, absPartIdx, depth);
> cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
>
> ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
> @@ -2744,22 +2736,21 @@
> return m_rdCost.calcRdCost(dist, nullBits);
> }
>
> -void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, const uint32_t depthRange[2])
> +void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& outCosts, const uint32_t depthRange[2])
> {
> CUData& cu = mode.cu;
> - uint32_t log2TrSize = g_maxLog2CUSize - depth;
> + uint32_t depth = cuGeom.depth + tuDepth;
> + uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
>
> bool bCheckSplit = log2TrSize > depthRange[0];
> bool bCheckFull = log2TrSize <= depthRange[1];
> bool bSplitPresentFlag = bCheckSplit && bCheckFull;
>
> - if (cu.m_partSize[0] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && bCheckSplit)
> + if (cu.m_partSize[0] != SIZE_2Nx2N && !tuDepth && bCheckSplit)
> bCheckFull = false;
>
> X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
> - X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
> -
> - uint32_t tuDepth = depth - cu.m_cuDepth[0];
> +
> uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> bool bCodeChroma = true;
> uint32_t tuDepthC = tuDepth;
> @@ -2787,7 +2778,7 @@
>
> uint32_t trSize = 1 << log2TrSize;
> const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
> - uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
> + uint32_t absPartIdxStep = cuGeom.numPartitions >> tuDepthC * 2;
> const Yuv* fencYuv = mode.fencYuv;
>
> // code full block
> @@ -2804,7 +2795,7 @@
> bool checkTransformSkipY = checkTransformSkip && log2TrSize <= MAX_LOG2_TS_SIZE;
> bool checkTransformSkipC = checkTransformSkip && log2TrSizeC <= MAX_LOG2_TS_SIZE;
>
> - cu.setTUDepthSubParts(depth - cu.m_cuDepth[0], absPartIdx, depth);
> + cu.setTUDepthSubParts(tuDepth, absPartIdx, depth);
> cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
>
> if (m_bEnableRDOQ)
> @@ -3215,7 +3206,7 @@
> uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
> for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
> {
> - estimateResidualQT(mode, cuGeom, qPartIdx, depth + 1, resiYuv, splitCost, depthRange);
> + estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange);
> ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
> ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
> vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
> @@ -3234,7 +3225,7 @@
> m_entropyCoder.load(m_rqt[depth].rqtRoot);
> m_entropyCoder.resetBits();
>
> - codeInterSubdivCbfQT(cu, absPartIdx, depth, depthRange);
> + codeInterSubdivCbfQT(cu, absPartIdx, tuDepth, depthRange);
> uint32_t splitCbfBits = m_entropyCoder.getNumberOfWrittenBits();
> splitCost.bits += splitCbfBits;
>
> @@ -3307,14 +3298,12 @@
> outCosts.energy += fullCost.energy;
> }
>
> -void Search::codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, const uint32_t depthRange[2])
> +void Search::codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2])
> {
> - X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
> X265_CHECK(cu.isInter(absPartIdx), "codeInterSubdivCbfQT() with intra block\n");
>
> - const uint32_t tuDepth = depth - cu.m_cuDepth[0];
> - const bool bSubdiv = tuDepth != cu.m_tuDepth[absPartIdx];
> - const uint32_t log2TrSize = g_maxLog2CUSize - depth;
> + const bool bSubdiv = tuDepth < cu.m_tuDepth[absPartIdx];
> + uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
>
> if (!(log2TrSize - m_hChromaShift < 2))
> {
> @@ -3337,102 +3326,19 @@
> {
> uint32_t qNumParts = 1 << (log2TrSize -1 - LOG2_UNIT_SIZE) * 2;
> for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
> - codeInterSubdivCbfQT(cu, absPartIdx, depth + 1, depthRange);
> + codeInterSubdivCbfQT(cu, absPartIdx, tuDepth + 1, depthRange);
> }
> }
>
> -void Search::encodeResidualQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, TextType ttype, const uint32_t depthRange[2])
> +void Search::saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth)
> {
> - X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
> - X265_CHECK(cu.isInter(absPartIdx), "encodeResidualQT() with intra block\n");
> -
> - const uint32_t curTuDepth = depth - cu.m_cuDepth[0];
> - const uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
> - const bool bSubdiv = curTuDepth != tuDepth;
> - const uint32_t log2TrSize = g_maxLog2CUSize - depth;
> -
> - if (bSubdiv)
> - {
> - if (cu.getCbf(absPartIdx, ttype, curTuDepth))
> - {
> - uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
> - for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
> - encodeResidualQT(cu, absPartIdx, depth + 1, ttype, depthRange);
> - }
> - return;
> - }
> - else
> - {
> - const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
> - uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> -
> - // Luma
> - const uint32_t qtLayer = log2TrSize - 2;
> - uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
> - coeff_t* coeffCurY = m_rqt[qtLayer].coeffRQT[0] + coeffOffsetY;
> -
> - // Chroma
> - bool bCodeChroma = true;
> - uint32_t tuDepthC = tuDepth;
> - if (log2TrSize == 2 && m_csp != X265_CSP_I444)
> - {
> - X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
> - log2TrSizeC++;
> - tuDepthC--;
> - bCodeChroma = !(absPartIdx & 3);
> - }
> -
> - if (ttype == TEXT_LUMA && cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth))
> - m_entropyCoder.codeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
> -
> - if (bCodeChroma)
> - {
> - uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> - coeff_t* coeffCurU = m_rqt[qtLayer].coeffRQT[1] + coeffOffsetC;
> - coeff_t* coeffCurV = m_rqt[qtLayer].coeffRQT[2] + coeffOffsetC;
> -
> - if (!splitIntoSubTUs)
> - {
> - if (ttype == TEXT_CHROMA_U && cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth))
> - m_entropyCoder.codeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
> - if (ttype == TEXT_CHROMA_V && cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth))
> - m_entropyCoder.codeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
> - }
> - else
> - {
> - uint32_t tuNumParts = 2 << ((log2TrSizeC - LOG2_UNIT_SIZE) * 2);
> - uint32_t subTUSize = 1 << (log2TrSizeC * 2);
> - if (ttype == TEXT_CHROMA_U && cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth))
> - {
> - if (cu.getCbf(absPartIdx, ttype, tuDepth + 1))
> - m_entropyCoder.codeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
> - if (cu.getCbf(absPartIdx + tuNumParts, ttype, tuDepth + 1))
> - m_entropyCoder.codeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + tuNumParts, log2TrSizeC, TEXT_CHROMA_U);
> - }
> - if (ttype == TEXT_CHROMA_V && cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth))
> - {
> - if (cu.getCbf(absPartIdx, ttype, tuDepth + 1))
> - m_entropyCoder.codeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
> - if (cu.getCbf(absPartIdx + tuNumParts, ttype, tuDepth + 1))
> - m_entropyCoder.codeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + tuNumParts, log2TrSizeC, TEXT_CHROMA_V);
> - }
> - }
> - }
> - }
> -}
> -
> -void Search::saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t depth)
> -{
> - X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
> - const uint32_t curTrMode = depth - cu.m_cuDepth[0];
> - const uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
> - const uint32_t log2TrSize = g_maxLog2CUSize - depth;
> -
> - if (curTrMode < tuDepth)
> + const uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
> +
> + if (tuDepth < cu.m_tuDepth[absPartIdx])
> {
> uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
> for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
> - saveResidualQTData(cu, resiYuv, absPartIdx, depth + 1);
> + saveResidualQTData(cu, resiYuv, absPartIdx, tuDepth + 1);
> return;
> }
>
> diff -r 65e71f08c55a -r 270c97866810 source/encoder/search.h
> --- a/source/encoder/search.h Sat Jan 17 10:12:34 2015 +0530
> +++ b/source/encoder/search.h Sat Jan 17 18:32:52 2015 +0900
> @@ -201,7 +201,7 @@
> bool m_bJobsQueued;
> void singleMotionEstimation(Search& master, Mode& interMode, const CUGeom& cuGeom, int part, int list, int ref);
>
> - void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t depth);
> + void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
>
> // RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
> uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
> @@ -210,7 +210,7 @@
> uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
>
> void codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
> - void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, const uint32_t depthRange[2]);
> + void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2]);
> void codeCoeffQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx, TextType ttype);
>
> struct Cost
> @@ -225,9 +225,6 @@
> uint64_t estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
> void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2]);
>
> - // estimate bit cost of residual QT
> - void encodeResidualQT(CUData& cu, uint32_t absPartIdx, uint32_t depth, TextType ttype, const uint32_t depthRange[2]);
> -
> // generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
> void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, const uint32_t depthRange[2]);
> void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& costs);
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
More information about the x265-devel
mailing list