[x265] [PATCH 1 of 7] replace global g_maxCUSize with param->maxCUSize
Pradeep Ramachandran
pradeep at multicorewareinc.com
Thu Jun 22 07:25:27 CEST 2017
On Wed, Jun 21, 2017 at 12:14 PM, <kavitha at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Kavitha Sampath <kavitha at multicorewareinc.com>
> # Date 1498023302 -19800
> # Wed Jun 21 11:05:02 2017 +0530
> # Node ID 68b27c44790d200ceb95b26962a84c8230d29eba
> # Parent 4436e1ca6f3987292dea608c7ecb2780fdcfc4df
> replace global g_maxCUSize with param->maxCUSize
>
This will enable running multiple instances of x265 in the same application
with different maxCUsizes. Thanks!
Pushed the full set of 7 patches into default branch
>
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/cudata.cpp
> --- a/source/common/cudata.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/cudata.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -119,8 +119,9 @@
> memset(this, 0, sizeof(*this));
> }
>
> -void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth,
> int csp, int instance)
> +void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth,
> const x265_param& param, int instance)
> {
> + int csp = param.internalCsp;
> m_chromaFormat = csp;
> m_hChromaShift = CHROMA_H_SHIFT(csp);
> m_vChromaShift = CHROMA_V_SHIFT(csp);
> @@ -221,7 +222,7 @@
>
> m_distortion = dataPool.distortionMemBlock + instance *
> m_numPartitions;
>
> - uint32_t cuSize = g_maxCUSize >> depth;
> + uint32_t cuSize = param.maxCUSize >> depth;
> m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize *
> cuSize);
> m_trCoeff[1] = m_trCoeff[2] = 0;
> m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0;
> @@ -263,7 +264,7 @@
>
> m_distortion = dataPool.distortionMemBlock + instance *
> m_numPartitions;
>
> - uint32_t cuSize = g_maxCUSize >> depth;
> + uint32_t cuSize = param.maxCUSize >> depth;
> uint32_t sizeL = cuSize * cuSize;
> uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); //
> block chroma part
> m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL +
> sizeC * 2);
> @@ -1917,10 +1918,10 @@
> uint32_t offset = 8;
>
> int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples +
> offset - m_cuPelX - 1) << mvshift);
> - int16_t xmin = -(int16_t)((g_maxCUSize + offset + m_cuPelX - 1) <<
> mvshift);
> + int16_t xmin = -(int16_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelX - 1) << mvshift);
>
> int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples +
> offset - m_cuPelY - 1) << mvshift);
> - int16_t ymin = -(int16_t)((g_maxCUSize + offset + m_cuPelY - 1) <<
> mvshift);
> + int16_t ymin = -(int16_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelY - 1) << mvshift);
>
> outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
> outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/cudata.h
> --- a/source/common/cudata.h Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/cudata.h Wed Jun 21 11:05:02 2017 +0530
> @@ -225,7 +225,7 @@
>
> CUData();
>
> - void initialize(const CUDataMemPool& dataPool, uint32_t depth,
> int csp, int instance);
> + void initialize(const CUDataMemPool& dataPool, uint32_t depth,
> const x265_param& param, int instance);
> static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight,
> uint32_t maxCUSize, uint32_t minCUSize, CUGeom
> cuDataArray[CUGeom::MAX_GEOMS]);
>
> void initCTU(const Frame& frame, uint32_t cuAddr, int qp,
> uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCUInSlice);
> @@ -350,10 +350,10 @@
>
> CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL; }
>
> - bool create(uint32_t depth, uint32_t csp, uint32_t numInstances)
> + bool create(uint32_t depth, uint32_t csp, uint32_t numInstances,
> const x265_param& param)
> {
> uint32_t numPartition = NUM_4x4_PARTITIONS >> (depth * 2);
> - uint32_t cuSize = g_maxCUSize >> depth;
> + uint32_t cuSize = param.maxCUSize >> depth;
> uint32_t sizeL = cuSize * cuSize;
> if (csp == X265_CSP_I400)
> {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/frame.cpp
> --- a/source/common/frame.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/frame.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -63,8 +63,8 @@
>
> if (param->bCTUInfo)
> {
> - uint32_t widthInCTU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> - uint32_t heightInCTU = (m_param->sourceHeight + g_maxCUSize - 1)
> >> g_maxLog2CUSize;
> + uint32_t widthInCTU = (m_param->sourceWidth + param->maxCUSize -
> 1) >> g_maxLog2CUSize;
> + uint32_t heightInCTU = (m_param->sourceHeight + param->maxCUSize
> - 1) >> g_maxLog2CUSize;
> uint32_t numCTUsInFrame = widthInCTU * heightInCTU;
> CHECKED_MALLOC_ZERO(m_addOnDepth, uint8_t *, numCTUsInFrame);
> CHECKED_MALLOC_ZERO(m_addOnCtuInfo, uint8_t *, numCTUsInFrame);
> @@ -77,11 +77,10 @@
> }
> }
>
> - if (m_fencPic->create(param->sourceWidth, param->sourceHeight,
> param->internalCsp) &&
> - m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode ||
> !!param->bAQMotion, param->rc.qgSize))
> + if (m_fencPic->create(param) && m_lowres.create(m_fencPic,
> param->bframes, !!param->rc.aqMode || !!param->bAQMotion, param->rc.qgSize))
> {
> X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
> initialized");
> - m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> + m_numRows = (m_fencPic->m_picHeight + param->maxCUSize - 1) /
> param->maxCUSize;
> m_reconRowFlag = new ThreadSafeInteger[m_numRows];
> m_reconColCount = new ThreadSafeInteger[m_numRows];
>
> @@ -107,12 +106,12 @@
> m_reconPic = new PicYuv;
> m_param = param;
> m_encData->m_reconPic = m_reconPic;
> - bool ok = m_encData->create(*param, sps, m_fencPic->m_picCsp) &&
> m_reconPic->create(param->sourceWidth, param->sourceHeight,
> param->internalCsp);
> + bool ok = m_encData->create(*param, sps, m_fencPic->m_picCsp) &&
> m_reconPic->create(param);
> if (ok)
> {
> /* initialize right border of m_reconpicYuv as SAO may read
> beyond the
> * end of the picture accessing uninitialized pixels */
> - int maxHeight = sps.numCuInHeight * g_maxCUSize;
> + int maxHeight = sps.numCuInHeight * param->maxCUSize;
> memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel)*
> m_reconPic->m_stride * maxHeight);
>
> /* use pre-calculated cu/pu offsets cached in the SPS structure */
> @@ -189,8 +188,8 @@
>
> if (m_ctuInfo)
> {
> - uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> - uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1)
> >> g_maxLog2CUSize;
> + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
> + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize
> - 1) >> g_maxLog2CUSize;
> uint32_t numCUsInFrame = widthInCU * heightInCU;
> for (uint32_t i = 0; i < numCUsInFrame; i++)
> {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/framedata.cpp
> --- a/source/common/framedata.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/framedata.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -41,9 +41,9 @@
> if (param.rc.bStatWrite)
> m_spsrps = const_cast<RPS*>(sps.spsrps);
>
> - m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);
> + m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame, param);
> for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
> - m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param.internalCsp,
> ctuAddr);
> + m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param, ctuAddr);
>
> CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame);
> CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/picyuv.cpp
> --- a/source/common/picyuv.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/picyuv.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -69,22 +69,26 @@
> m_vChromaShift = 0;
> }
>
> -bool PicYuv::create(uint32_t picWidth, uint32_t picHeight, uint32_t
> picCsp, pixel *pixelbuf)
> +bool PicYuv::create(x265_param* param, pixel *pixelbuf)
> {
> + m_param = param;
> + uint32_t picWidth = m_param->sourceWidth;
> + uint32_t picHeight = m_param->sourceHeight;
> + uint32_t picCsp = m_param->internalCsp;
> m_picWidth = picWidth;
> m_picHeight = picHeight;
> m_hChromaShift = CHROMA_H_SHIFT(picCsp);
> m_vChromaShift = CHROMA_V_SHIFT(picCsp);
> m_picCsp = picCsp;
>
> - uint32_t numCuInWidth = (m_picWidth + g_maxCUSize - 1) / g_maxCUSize;
> - uint32_t numCuInHeight = (m_picHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> + uint32_t numCuInWidth = (m_picWidth + param->maxCUSize - 1) /
> param->maxCUSize;
> + uint32_t numCuInHeight = (m_picHeight + param->maxCUSize - 1) /
> param->maxCUSize;
>
> - m_lumaMarginX = g_maxCUSize + 32; // search margin and 8-tap filter
> half-length, padded for 32-byte alignment
> - m_lumaMarginY = g_maxCUSize + 16; // margin for 8-tap filter and
> infinite padding
> - m_stride = (numCuInWidth * g_maxCUSize) + (m_lumaMarginX << 1);
> + m_lumaMarginX = param->maxCUSize + 32; // search margin and 8-tap
> filter half-length, padded for 32-byte alignment
> + m_lumaMarginY = param->maxCUSize + 16; // margin for 8-tap filter and
> infinite padding
> + m_stride = (numCuInWidth * param->maxCUSize) + (m_lumaMarginX << 1);
>
> - int maxHeight = numCuInHeight * g_maxCUSize;
> + int maxHeight = numCuInHeight * param->maxCUSize;
> if (pixelbuf)
> m_picOrg[0] = pixelbuf;
> else
> @@ -97,7 +101,7 @@
> {
> m_chromaMarginX = m_lumaMarginX; // keep 16-byte alignment for
> chroma CTUs
> m_chromaMarginY = m_lumaMarginY >> m_vChromaShift;
> - m_strideC = ((numCuInWidth * g_maxCUSize) >> m_hChromaShift) +
> (m_chromaMarginX * 2);
> + m_strideC = ((numCuInWidth * m_param->maxCUSize) >>
> m_hChromaShift) + (m_chromaMarginX * 2);
>
> CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> @@ -124,14 +128,14 @@
> m_vChromaShift = CHROMA_V_SHIFT(picCsp);
> m_picCsp = picCsp;
>
> - uint32_t numCuInWidth = (m_picWidth + g_maxCUSize - 1) / g_maxCUSize;
> - uint32_t numCuInHeight = (m_picHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> + uint32_t numCuInWidth = (m_picWidth + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> + uint32_t numCuInHeight = (m_picHeight + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
>
> - m_lumaMarginX = g_maxCUSize + 32; // search margin and 8-tap filter
> half-length, padded for 32-byte alignment
> - m_lumaMarginY = g_maxCUSize + 16; // margin for 8-tap filter and
> infinite padding
> - m_stride = (numCuInWidth * g_maxCUSize) + (m_lumaMarginX << 1);
> + m_lumaMarginX = m_param->maxCUSize + 32; // search margin and 8-tap
> filter half-length, padded for 32-byte alignment
> + m_lumaMarginY = m_param->maxCUSize + 16; // margin for 8-tap filter
> and infinite padding
> + m_stride = (numCuInWidth * m_param->maxCUSize) + (m_lumaMarginX << 1);
>
> - int maxHeight = numCuInHeight * g_maxCUSize;
> + int maxHeight = numCuInHeight * m_param->maxCUSize;
> int bufLen = (int)(m_stride * (maxHeight + (m_lumaMarginY * 2)));
>
> return bufLen;
> @@ -152,8 +156,8 @@
> {
> for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
> {
> - m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride
> * cuRow * g_maxCUSize + cuCol * g_maxCUSize;
> - m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC
> * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >>
> m_hChromaShift);
> + m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride
> * cuRow * m_param->maxCUSize + cuCol * m_param->maxCUSize;
> + m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC
> * cuRow * (m_param->maxCUSize >> m_vChromaShift) + cuCol *
> (m_param->maxCUSize >> m_hChromaShift);
> }
> }
>
> @@ -172,7 +176,7 @@
> CHECKED_MALLOC(m_cuOffsetY, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
> for (uint32_t cuRow = 0; cuRow < sps.numCuInHeight; cuRow++)
> for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
> - m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride *
> cuRow * g_maxCUSize + cuCol * g_maxCUSize;
> + m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride *
> cuRow * m_param->maxCUSize + cuCol * m_param->maxCUSize;
>
> CHECKED_MALLOC(m_buOffsetY, intptr_t, (size_t)numPartitions);
> for (uint32_t idx = 0; idx < numPartitions; ++idx)
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/picyuv.h
> --- a/source/common/picyuv.h Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/picyuv.h Wed Jun 21 11:05:02 2017 +0530
> @@ -71,10 +71,11 @@
> pixel m_maxChromaVLevel;
> pixel m_minChromaVLevel;
> double m_avgChromaVLevel;
> + x265_param *m_param;
>
> PicYuv();
>
> - bool create(uint32_t picWidth, uint32_t picHeight, uint32_t csp,
> pixel *pixelbuf = NULL);
> + bool create(x265_param* param, pixel *pixelbuf = NULL);
> bool createOffsets(const SPS& sps);
> void destroy();
> int getLumaBufLen(uint32_t picWidth, uint32_t picHeight, uint32_t
> picCsp);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/slice.cpp
> --- a/source/common/slice.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/slice.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -187,8 +187,8 @@
> // Calculate end address
> uint32_t internalAddress = (endCUAddr - 1) % NUM_4x4_PARTITIONS;
> uint32_t externalAddress = (endCUAddr - 1) / NUM_4x4_PARTITIONS;
> - uint32_t xmax = m_sps->picWidthInLumaSamples - (externalAddress %
> m_sps->numCuInWidth) * g_maxCUSize;
> - uint32_t ymax = m_sps->picHeightInLumaSamples - (externalAddress /
> m_sps->numCuInWidth) * g_maxCUSize;
> + uint32_t xmax = m_sps->picWidthInLumaSamples - (externalAddress %
> m_sps->numCuInWidth) * m_param->maxCUSize;
> + uint32_t ymax = m_sps->picHeightInLumaSamples - (externalAddress /
> m_sps->numCuInWidth) * m_param->maxCUSize;
>
> while (g_zscanToPelX[internalAddress] >= xmax ||
> g_zscanToPelY[internalAddress] >= ymax)
> internalAddress--;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/slice.h
> --- a/source/common/slice.h Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/slice.h Wed Jun 21 11:05:02 2017 +0530
> @@ -360,6 +360,7 @@
> int m_iPPSQpMinus26;
> int numRefIdxDefault[2];
> int m_iNumRPSInSPS;
> + const x265_param *m_param;
>
> Slice()
> {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/analysis.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -90,19 +90,19 @@
> cacheCost = X265_MALLOC(uint64_t, costArrSize);
>
> int csp = m_param->internalCsp;
> - uint32_t cuSize = g_maxCUSize;
> + uint32_t cuSize = m_param->maxCUSize;
>
> bool ok = true;
> for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++, cuSize >>= 1)
> {
> ModeDepth &md = m_modeDepth[depth];
>
> - md.cuMemPool.create(depth, csp, MAX_PRED_TYPES);
> + md.cuMemPool.create(depth, csp, MAX_PRED_TYPES, *m_param);
> ok &= md.fencYuv.create(cuSize, csp);
>
> for (int j = 0; j < MAX_PRED_TYPES; j++)
> {
> - md.pred[j].cu.initialize(md.cuMemPool, depth, csp, j);
> + md.pred[j].cu.initialize(md.cuMemPool, depth, *m_param, j);
> ok &= md.pred[j].predYuv.create(cuSize, csp);
> ok &= md.pred[j].reconYuv.create(cuSize, csp);
> md.pred[j].fencYuv = &md.fencYuv;
> @@ -236,8 +236,8 @@
> else
> {
> if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
> - ctu.m_cuPelX / g_maxCUSize >= frame.m_encData->m_pir.
> pirStartCol
> - && ctu.m_cuPelX / g_maxCUSize < frame.m_encData->m_pir.
> pirEndCol)
> + ctu.m_cuPelX / m_param->maxCUSize >= frame.m_encData->m_pir.
> pirStartCol
> + && ctu.m_cuPelX / m_param->maxCUSize < frame.m_encData->m_pir.
> pirEndCol)
> compressIntraCU(ctu, cuGeom, qp);
> else if (!m_param->rdLevel)
> {
> @@ -2440,7 +2440,7 @@
> int safeX, maxSafeMv;
> if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE)
> {
> - safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * g_maxCUSize - 3;
> + safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * m_param->maxCUSize - 3;
> maxSafeMv = (safeX - tempPred->cu.m_cuPelX) * 4;
> }
> for (uint32_t i = 0; i < numMergeCand; ++i)
> @@ -2466,7 +2466,7 @@
> }
>
> if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
> - tempPred->cu.m_cuPelX / g_maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
> + tempPred->cu.m_cuPelX / m_param->maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
> candMvField[i][0].mv.x > maxSafeMv)
> // skip merge candidates which reference beyond safe
> reference area
> continue;
> @@ -2570,7 +2570,7 @@
> int safeX, maxSafeMv;
> if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE)
> {
> - safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * g_maxCUSize - 3;
> + safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * m_param->maxCUSize - 3;
> maxSafeMv = (safeX - tempPred->cu.m_cuPelX) * 4;
> }
> for (uint32_t i = 0; i < numMergeCand; i++)
> @@ -2611,7 +2611,7 @@
> triedBZero = true;
> }
> if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
> - tempPred->cu.m_cuPelX / g_maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
> + tempPred->cu.m_cuPelX / m_param->maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
> candMvField[i][0].mv.x > maxSafeMv)
> // skip merge candidates which reference beyond safe
> reference area
> continue;
> @@ -3236,7 +3236,7 @@
> uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.
> absPartIdx];
> uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.
> absPartIdx];
> uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr -
> 1)) / loopIncr;
> - uint32_t blockSize = g_maxCUSize >> cuGeom.depth;
> + uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth;
> double qp_offset = 0;
> uint32_t cnt = 0;
> uint32_t idx;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/api.cpp
> --- a/source/encoder/api.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/api.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -361,8 +361,8 @@
>
> if (param->analysisMode)
> {
> - uint32_t widthInCU = (param->sourceWidth + g_maxCUSize -
> 1) >> g_maxLog2CUSize;
> - uint32_t heightInCU = (param->sourceHeight + g_maxCUSize -
> 1) >> g_maxLog2CUSize;
> + uint32_t widthInCU = (param->sourceWidth + param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> + uint32_t heightInCU = (param->sourceHeight + param->maxCUSize -
> 1) >> g_maxLog2CUSize;
>
> uint32_t numCUsInFrame = widthInCU * heightInCU;
> pic->analysisData.numCUsInFrame = numCUsInFrame;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/dpb.cpp
> --- a/source/encoder/dpb.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/dpb.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -107,8 +107,8 @@
>
> if (curFrame->m_ctuInfo != NULL)
> {
> - uint32_t widthInCU = (curFrame->m_param->sourceWidth +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> - uint32_t heightInCU = (curFrame->m_param->sourceHeight +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> + uint32_t widthInCU = (curFrame->m_param->sourceWidth +
> curFrame->m_param->maxCUSize - 1) >> g_maxLog2CUSize;
> + uint32_t heightInCU = (curFrame->m_param->sourceHeight +
> curFrame->m_param->maxCUSize - 1) >> g_maxLog2CUSize;
> uint32_t numCUsInFrame = widthInCU * heightInCU;
> for (uint32_t i = 0; i < numCUsInFrame; i++)
> {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/encoder.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -321,8 +321,8 @@
> else
> m_scalingList.setupQuantMatrices(m_sps.chromaFormatIdc);
>
> - int numRows = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
> - int numCols = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
> + int numRows = (m_param->sourceHeight + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> + int numCols = (m_param->sourceWidth + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> for (int i = 0; i < m_param->frameNumThreads; i++)
> {
> if (!m_frameEncoder[i]->init(this, numRows, numCols))
> @@ -1035,16 +1035,17 @@
> Slice* slice = frameEnc->m_encData->m_slice;
> slice->m_sps = &m_sps;
> slice->m_pps = &m_pps;
> + slice->m_param = m_param;
> slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
> slice->m_endCUAddr = slice->realEndAddress(m_sps.numCUsInFrame
> * NUM_4x4_PARTITIONS);
> }
>
> if (m_param->searchMethod == X265_SEA &&
> frameEnc->m_lowres.sliceType != X265_TYPE_B)
> {
> - int padX = g_maxCUSize + 32;
> - int padY = g_maxCUSize + 16;
> - uint32_t numCuInHeight = (frameEnc->m_encData->m_reconPic->m_picHeight
> + g_maxCUSize - 1) / g_maxCUSize;
> - int maxHeight = numCuInHeight * g_maxCUSize;
> + int padX = m_param->maxCUSize + 32;
> + int padY = m_param->maxCUSize + 16;
> + uint32_t numCuInHeight = (frameEnc->m_encData->m_reconPic->m_picHeight
> + m_param->maxCUSize - 1) / m_param->maxCUSize;
> + int maxHeight = numCuInHeight * m_param->maxCUSize;
> for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> {
> frameEnc->m_encData->m_meBuffer[i] =
> X265_MALLOC(uint32_t, frameEnc->m_reconPic->m_stride * (maxHeight + (2 *
> padY)));
> @@ -1108,8 +1109,8 @@
> x265_analysis_data* analysis = &frameEnc->m_analysisData;
> analysis->poc = frameEnc->m_poc;
> analysis->sliceType = frameEnc->m_lowres.sliceType;
> - uint32_t widthInCU = (m_param->sourceWidth +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> - uint32_t heightInCU = (m_param->sourceHeight +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> + uint32_t widthInCU = (m_param->sourceWidth +
> m_param->maxCUSize - 1) >> g_maxLog2CUSize;
> + uint32_t heightInCU = (m_param->sourceHeight +
> m_param->maxCUSize - 1) >> g_maxLog2CUSize;
>
> uint32_t numCUsInFrame = widthInCU * heightInCU;
> analysis->numCUsInFrame = numCUsInFrame;
> @@ -1182,8 +1183,8 @@
>
> void Encoder::copyCtuInfo(x265_ctu_info_t** frameCtuInfo, int poc)
> {
> - uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> - uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
> Frame* curFrame;
> Frame* prevFrame = NULL;
> int32_t* frameCTU;
> @@ -1554,7 +1555,7 @@
> }
>
> x265_log(m_param, X265_LOG_INFO, "CU: " X265_LL " %dX%d CTUs
> compressed in %.3lf seconds, %.3lf CTUs per worker-second\n",
> - cuStats.totalCTUs, g_maxCUSize, g_maxCUSize,
> + cuStats.totalCTUs, m_param->maxCUSize, m_param->maxCUSize,
> ELAPSED_SEC(totalWorkerTime),
> cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
>
> @@ -1981,8 +1982,8 @@
> sps->chromaFormatIdc = m_param->internalCsp;
> sps->picWidthInLumaSamples = m_param->sourceWidth;
> sps->picHeightInLumaSamples = m_param->sourceHeight;
> - sps->numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
> - sps->numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> + sps->numCuInWidth = (m_param->sourceWidth + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> + sps->numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1)
> / m_param->maxCUSize;
> sps->numCUsInFrame = sps->numCuInWidth * sps->numCuInHeight;
> sps->numPartitions = NUM_4x4_PARTITIONS;
> sps->numPartInCUSize = 1 << g_unitSizeDepth;
> @@ -2212,7 +2213,7 @@
> p->lookaheadDepth = p->totalFrames;
> if (p->bIntraRefresh)
> {
> - int numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
> + int numCuInWidth = (m_param->sourceWidth + m_param->maxCUSize -
> 1) / m_param->maxCUSize;
> if (p->maxNumReferences > 1)
> {
> x265_log(p, X265_LOG_WARNING, "Max References > 1 +
> intra-refresh is not supported , setting max num references = 1\n");
> @@ -2772,8 +2773,8 @@
> {
> analysis->analysisFramedata = NULL;
> analysis2PassFrameData *analysisFrameData = (analysis2PassFrameData*)
> analysis->analysisFramedata;
> - uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> - uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
>
> uint32_t numCUsInFrame = widthInCU * heightInCU;
> CHECKED_MALLOC_ZERO(analysisFrameData, analysis2PassFrameData, 1);
> @@ -3074,8 +3075,8 @@
> }\
>
> uint32_t depthBytes = 0;
> - uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> - uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
> uint32_t numCUsInFrame = widthInCU * heightInCU;
>
> int poc; uint32_t frameRecordSize;
> @@ -3384,8 +3385,8 @@
> }\
>
> uint32_t depthBytes = 0;
> - uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> - uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
> uint32_t numCUsInFrame = widthInCU * heightInCU;
> analysis2PassFrameData* analysisFrameData = (analysis2PassFrameData*)
> analysis2Pass->analysisFramedata;
>
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/entropy.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -888,7 +888,7 @@
> uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
> X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr),
> "real end address expected\n");
>
> - uint32_t granularityMask = g_maxCUSize - 1;
> + uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
> uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
> uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
> uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/frameencoder.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -124,7 +124,7 @@
> range += !!(m_param->searchMethod < 2); /* diamond/hex range check
> lag */
> range += NTAPS_LUMA / 2; /* subpel filter half-length
> */
> range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine)
> + 1) / 2; /* subpel refine steps */
> - m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range +
> g_maxCUSize - 1) / g_maxCUSize);
> + m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range +
> m_param->maxCUSize - 1) / m_param->maxCUSize);
>
> // NOTE: 2 times of numRows because both Encoder and Filter in same
> queue
> if (!WaveFront::init(m_numRows * 2))
> @@ -837,7 +837,7 @@
> }
> else if (m_param->decodedPictureHashSEI == 3)
> {
> - uint32_t cuHeight = g_maxCUSize;
> + uint32_t cuHeight = m_param->maxCUSize;
>
> m_checksum[0] = 0;
>
> @@ -1246,7 +1246,7 @@
>
> uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1))
> / 16;
> uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1))
> / 16;
> - uint32_t noOfBlocks = g_maxCUSize / 16;
> + uint32_t noOfBlocks = m_param->maxCUSize / 16;
> const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row -
> 1].sliceId != curRow.sliceId)) ? 1 : 0;
> const uint32_t bLastRowInSlice = ((row == m_numRows - 1) ||
> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
> const uint32_t sliceId = curRow.sliceId;
> @@ -1325,8 +1325,8 @@
> // TODO: specially case handle on first and last row
>
> // Initialize restrict on MV range in slices
> - tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) +
> 3 * 4;
> - tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (g_maxCUSize * 4) - 4 * 4);
> + tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize
> * 4) + 3 * 4;
> + tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (m_param->maxCUSize * 4) - 4 * 4);
>
> // Handle single row slice
> if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
> @@ -1482,7 +1482,7 @@
> {
> /* 1 << shift == number of 8x8 blocks at current depth */
> int shift = 2 * (g_maxCUDepth - depth);
> - int cuSize = g_maxCUSize >> depth;
> + int cuSize = m_param->maxCUSize >> depth;
>
> if (cuSize == 8)
> curRow.rowStats.intra8x8Cnt +=
> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/framefilter.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -185,8 +185,8 @@
> m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
> m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
> m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
> - m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ?
> (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
> - m_lastWidth = (m_param->sourceWidth % g_maxCUSize) ?
> (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
> + m_lastHeight = (m_param->sourceHeight % m_param->maxCUSize) ?
> (m_param->sourceHeight % m_param->maxCUSize) : m_param->maxCUSize;
> + m_lastWidth = (m_param->sourceWidth % m_param->maxCUSize) ?
> (m_param->sourceWidth % m_param->maxCUSize) : m_param->maxCUSize;
> integralCompleted.set(0);
>
> if (m_param->bEnableSsim)
> @@ -214,7 +214,7 @@
> for(int row = 0; row < numRows; row++)
> {
> // Setting maximum bound information
> - m_parallelFilter[row].m_rowHeight = (row == numRows - 1) ?
> m_lastHeight : g_maxCUSize;
> + m_parallelFilter[row].m_rowHeight = (row == numRows - 1) ?
> m_lastHeight : m_param->maxCUSize;
> m_parallelFilter[row].m_row = row;
> m_parallelFilter[row].m_rowAddr = row * numCols;
> m_parallelFilter[row].m_frameFilter = this;
> @@ -300,7 +300,7 @@
> void FrameFilter::ParallelFilter::copySaoAboveRef(const CUData *ctu,
> PicYuv* reconPic, uint32_t cuAddr, int col)
> {
> // Copy SAO Top Reference Pixels
> - int ctuWidth = g_maxCUSize;
> + int ctuWidth = ctu->m_encData->m_param->maxCUSize;
> const pixel* recY = reconPic->getPlaneAddr(0, cuAddr) -
> (ctu->m_bFirstRowInSlice ? 0 : reconPic->m_stride);
>
> // Luma
> @@ -701,8 +701,8 @@
> intptr_t stride2 = m_frame->m_fencPic->m_stride;
> uint32_t bEnd = ((row) == (this->m_numRows - 1));
> uint32_t bStart = (row == 0);
> - uint32_t minPixY = row * g_maxCUSize - 4 * !bStart;
> - uint32_t maxPixY = X265_MIN((row + 1) * g_maxCUSize - 4 * !bEnd,
> (uint32_t)m_param->sourceHeight);
> + uint32_t minPixY = row * m_param->maxCUSize - 4 * !bStart;
> + uint32_t maxPixY = X265_MIN((row + 1) * m_param->maxCUSize - 4 *
> !bEnd, (uint32_t)m_param->sourceHeight);
> uint32_t ssim_cnt;
> x265_emms();
>
> @@ -768,7 +768,7 @@
> uint32_t width = reconPic->m_picWidth;
> uint32_t height = m_parallelFilter[row].getCUHeight();
> intptr_t stride = reconPic->m_stride;
> - uint32_t cuHeight = g_maxCUSize;
> + uint32_t cuHeight = m_param->maxCUSize;
>
> if (!row)
> m_frameEncoder->m_checksum[0] = 0;
> @@ -812,18 +812,18 @@
> }
>
> int stride = (int)m_frame->m_reconPic->m_stride;
> - int padX = g_maxCUSize + 32;
> - int padY = g_maxCUSize + 16;
> + int padX = m_param->maxCUSize + 32;
> + int padY = m_param->maxCUSize + 16;
> int numCuInHeight = m_frame->m_encData->m_slice->
> m_sps->numCuInHeight;
> - int maxHeight = numCuInHeight * g_maxCUSize;
> + int maxHeight = numCuInHeight * m_param->maxCUSize;
> int startRow = 0;
>
> if (m_param->interlaceMode)
> - startRow = (row * g_maxCUSize >> 1);
> + startRow = (row * m_param->maxCUSize >> 1);
> else
> - startRow = row * g_maxCUSize;
> + startRow = row * m_param->maxCUSize;
>
> - int height = lastRow ? (maxHeight + g_maxCUSize *
> m_param->interlaceMode) : (((row + m_param->interlaceMode) * g_maxCUSize) +
> g_maxCUSize);
> + int height = lastRow ? (maxHeight + m_param->maxCUSize *
> m_param->interlaceMode) : (((row + m_param->interlaceMode) *
> m_param->maxCUSize) + m_param->maxCUSize);
>
> if (!row)
> {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/framefilter.h
> --- a/source/encoder/framefilter.h Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/framefilter.h Wed Jun 21 11:05:02 2017 +0530
> @@ -123,7 +123,7 @@
>
> uint32_t getCUWidth(int colNum) const
> {
> - return (colNum == (int)m_numCols - 1) ? m_lastWidth : g_maxCUSize;
> + return (colNum == (int)m_numCols - 1) ? m_lastWidth :
> m_param->maxCUSize;
> }
>
> void init(Encoder *top, FrameEncoder *frame, int numRows, uint32_t
> numCols);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/reference.cpp
> --- a/source/encoder/reference.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/reference.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -72,12 +72,12 @@
>
> if (wp)
> {
> - uint32_t numCUinHeight = (reconPic->m_picHeight + g_maxCUSize -
> 1) / g_maxCUSize;
> + uint32_t numCUinHeight = (reconPic->m_picHeight + p.maxCUSize -
> 1) / p.maxCUSize;
>
> int marginX = reconPic->m_lumaMarginX;
> int marginY = reconPic->m_lumaMarginY;
> intptr_t stride = reconPic->m_stride;
> - int cuHeight = g_maxCUSize;
> + int cuHeight = p.maxCUSize;
>
> for (int c = 0; c < (p.internalCsp != X265_CSP_I400 &&
> recPic->m_picCsp != X265_CSP_I400 ? numInterpPlanes : 1); c++)
> {
> @@ -127,15 +127,15 @@
> int marginY = reconPic->m_lumaMarginY;
> intptr_t stride = reconPic->m_stride;
> int width = reconPic->m_picWidth;
> - int height = (finishedRows - numWeightedRows) * g_maxCUSize;
> + int height = (finishedRows - numWeightedRows) *
> reconPic->m_param->maxCUSize;
> /* the last row may be partial height */
> if (finishedRows == maxNumRows - 1)
> {
> - const int leftRows = (reconPic->m_picHeight & (g_maxCUSize - 1));
> + const int leftRows = (reconPic->m_picHeight &
> (reconPic->m_param->maxCUSize - 1));
>
> - height += leftRows ? leftRows : g_maxCUSize;
> + height += leftRows ? leftRows : reconPic->m_param->maxCUSize;
> }
> - int cuHeight = g_maxCUSize;
> + int cuHeight = reconPic->m_param->maxCUSize;
>
> for (int c = 0; c < numInterpPlanes; c++)
> {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/sao.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -98,8 +98,8 @@
> m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
> m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
>
> - m_numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
> - m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> + m_numCuInWidth = (m_param->sourceWidth + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> + m_numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
>
> const pixel maxY = (1 << X265_DEPTH) - 1;
> const pixel rangeExt = maxY >> 1;
> @@ -107,12 +107,12 @@
>
> for (int i = 0; i < (param->internalCsp != X265_CSP_I400 ? 3 : 1);
> i++)
> {
> - CHECKED_MALLOC(m_tmpL1[i], pixel, g_maxCUSize + 1);
> - CHECKED_MALLOC(m_tmpL2[i], pixel, g_maxCUSize + 1);
> + CHECKED_MALLOC(m_tmpL1[i], pixel, m_param->maxCUSize + 1);
> + CHECKED_MALLOC(m_tmpL2[i], pixel, m_param->maxCUSize + 1);
>
> // SAO asm code will read 1 pixel before and after, so pad by 2
> // NOTE: m_param->sourceWidth+2 enough, to avoid condition check
> in copySaoAboveRef(), I alloc more up to 63 bytes in here
> - CHECKED_MALLOC(m_tmpU[i], pixel, m_numCuInWidth * g_maxCUSize + 2
> + 32);
> + CHECKED_MALLOC(m_tmpU[i], pixel, m_numCuInWidth *
> m_param->maxCUSize + 2 + 32);
> m_tmpU[i] += 1;
> }
>
> @@ -279,8 +279,8 @@
> uint32_t picWidth = m_param->sourceWidth;
> uint32_t picHeight = m_param->sourceHeight;
> const CUData* cu = m_frame->m_encData->getPicCTU(addr);
> - int ctuWidth = g_maxCUSize;
> - int ctuHeight = g_maxCUSize;
> + int ctuWidth = m_param->maxCUSize;
> + int ctuHeight = m_param->maxCUSize;
> uint32_t lpelx = cu->m_cuPelX;
> uint32_t tpely = cu->m_cuPelY;
> const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
> @@ -573,8 +573,8 @@
> {
> PicYuv* reconPic = m_frame->m_reconPic;
> intptr_t stride = reconPic->m_stride;
> - int ctuWidth = g_maxCUSize;
> - int ctuHeight = g_maxCUSize;
> + int ctuWidth = m_param->maxCUSize;
> + int ctuHeight = m_param->maxCUSize;
>
> int addr = idxY * m_numCuInWidth + idxX;
> pixel* rec = reconPic->getLumaAddr(addr);
> @@ -633,8 +633,8 @@
> {
> PicYuv* reconPic = m_frame->m_reconPic;
> intptr_t stride = reconPic->m_strideC;
> - int ctuWidth = g_maxCUSize;
> - int ctuHeight = g_maxCUSize;
> + int ctuWidth = m_param->maxCUSize;
> + int ctuHeight = m_param->maxCUSize;
>
> {
> ctuWidth >>= m_hChromaShift;
> @@ -744,8 +744,8 @@
> intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
> uint32_t picWidth = m_param->sourceWidth;
> uint32_t picHeight = m_param->sourceHeight;
> - int ctuWidth = g_maxCUSize;
> - int ctuHeight = g_maxCUSize;
> + int ctuWidth = m_param->maxCUSize;
> + int ctuHeight = m_param->maxCUSize;
> uint32_t lpelx = cu->m_cuPelX;
> uint32_t tpely = cu->m_cuPelY;
> const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
> @@ -928,8 +928,8 @@
> intptr_t stride = reconPic->m_stride;
> uint32_t picWidth = m_param->sourceWidth;
> uint32_t picHeight = m_param->sourceHeight;
> - int ctuWidth = g_maxCUSize;
> - int ctuHeight = g_maxCUSize;
> + int ctuWidth = m_param->maxCUSize;
> + int ctuHeight = m_param->maxCUSize;
> uint32_t lpelx = cu->m_cuPelX;
> uint32_t tpely = cu->m_cuPelY;
> const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/search.cpp
> --- a/source/encoder/search.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/search.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -120,8 +120,8 @@
> CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL + sizeC
> * 2);
> m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[0] + sizeL;
> m_rqt[i].coeffRQT[2] = m_rqt[i].coeffRQT[0] + sizeL + sizeC;
> - ok &= m_rqt[i].reconQtYuv.create(g_maxCUSize,
> param.internalCsp);
> - ok &= m_rqt[i].resiQtYuv.create(g_maxCUSize,
> param.internalCsp);
> + ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize,
> param.internalCsp);
> + ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize,
> param.internalCsp);
> }
> }
> else
> @@ -130,15 +130,15 @@
> {
> CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL);
> m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[2] = NULL;
> - ok &= m_rqt[i].reconQtYuv.create(g_maxCUSize,
> param.internalCsp);
> - ok &= m_rqt[i].resiQtYuv.create(g_maxCUSize,
> param.internalCsp);
> + ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize,
> param.internalCsp);
> + ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize,
> param.internalCsp);
> }
> }
>
> /* the rest of these buffers are indexed per-depth */
> for (uint32_t i = 0; i <= g_maxCUDepth; i++)
> {
> - int cuSize = g_maxCUSize >> i;
> + int cuSize = param.maxCUSize >> i;
> ok &= m_rqt[i].tmpResiYuv.create(cuSize, param.internalCsp);
> ok &= m_rqt[i].tmpPredYuv.create(cuSize, param.internalCsp);
> ok &= m_rqt[i].bidirPredYuv[0].create(cuSize, param.internalCsp);
> @@ -2593,11 +2593,11 @@
> cu.clipMv(mvmax);
>
> if (cu.m_encData->m_param->bIntraRefresh && m_slice->m_sliceType ==
> P_SLICE &&
> - cu.m_cuPelX / g_maxCUSize < m_frame->m_encData->m_pir.pirStartCol
> &&
> + cu.m_cuPelX / m_param->maxCUSize < m_frame->m_encData->m_pir.pirStartCol
> &&
> m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol <
> m_slice->m_sps->numCuInWidth)
> {
> int safeX, maxSafeMv;
> - safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * g_maxCUSize - 3;
> + safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * m_param->maxCUSize - 3;
> maxSafeMv = (safeX - cu.m_cuPelX) * 4;
> mvmax.x = X265_MIN(mvmax.x, maxSafeMv);
> mvmin.x = X265_MIN(mvmin.x, maxSafeMv);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/slicetype.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -907,7 +907,7 @@
> curFrame->m_lowres.lowresCostForRc = curFrame->m_lowres.lowresCosts[b
> - p0][p1 - b];
> uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0,
> intraSum = 0;
> uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);
> - uint32_t numCuInHeight = (m_param->sourceHeight + g_maxCUSize -
> 1) / g_maxCUSize;
> + uint32_t numCuInHeight = (m_param->sourceHeight +
> m_param->maxCUSize - 1) / m_param->maxCUSize;
> uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu
> = (uint32_t)m_8x8Height;
> double *qp_offset = 0;
> /* Factor in qpoffsets based on Aq/Cutree in CU costs */
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170622/44410e7d/attachment-0001.html>
More information about the x265-devel
mailing list