[x265] [PATCH 1 of 7] replace global g_maxCUSize with param->maxCUSize

Thu Jun 22 07:25:27 CEST 2017

On Wed, Jun 21, 2017 at 12:14 PM, <kavitha at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Kavitha Sampath <kavitha at multicorewareinc.com>
> # Date 1498023302 -19800
> #      Wed Jun 21 11:05:02 2017 +0530
> # Node ID 68b27c44790d200ceb95b26962a84c8230d29eba
> # Parent  4436e1ca6f3987292dea608c7ecb2780fdcfc4df
> replace global g_maxCUSize with param->maxCUSize
>

This will enable running multiple instances of x265 in the same application
with different maxCUsizes. Thanks!
Pushed the full set of 7 patches into default branch

>
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/cudata.cpp
> --- a/source/common/cudata.cpp  Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/cudata.cpp  Wed Jun 21 11:05:02 2017 +0530
> @@ -119,8 +119,9 @@
>      memset(this, 0, sizeof(*this));
>  }
>
> -void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth,
> int csp, int instance)
> +void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth,
> const x265_param& param, int instance)
>  {
> +    int csp = param.internalCsp;
>      m_chromaFormat  = csp;
>      m_hChromaShift  = CHROMA_H_SHIFT(csp);
>      m_vChromaShift  = CHROMA_V_SHIFT(csp);
> @@ -221,7 +222,7 @@
>
>          m_distortion = dataPool.distortionMemBlock + instance *
> m_numPartitions;
>
> -        uint32_t cuSize = g_maxCUSize >> depth;
> +        uint32_t cuSize = param.maxCUSize >> depth;
>          m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize *
> cuSize);
>          m_trCoeff[1] = m_trCoeff[2] = 0;
>          m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0;
> @@ -263,7 +264,7 @@
>
>          m_distortion = dataPool.distortionMemBlock + instance *
> m_numPartitions;
>
> -        uint32_t cuSize = g_maxCUSize >> depth;
> +        uint32_t cuSize = param.maxCUSize >> depth;
>          uint32_t sizeL = cuSize * cuSize;
>          uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); //
> block chroma part
>          m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL +
> sizeC * 2);
> @@ -1917,10 +1918,10 @@
>      uint32_t offset = 8;
>
>      int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples +
> offset - m_cuPelX - 1) << mvshift);
> -    int16_t xmin = -(int16_t)((g_maxCUSize + offset + m_cuPelX - 1) <<
> mvshift);
> +    int16_t xmin = -(int16_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelX - 1) << mvshift);
>
>      int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples +
> offset - m_cuPelY - 1) << mvshift);
> -    int16_t ymin = -(int16_t)((g_maxCUSize + offset + m_cuPelY - 1) <<
> mvshift);
> +    int16_t ymin = -(int16_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelY - 1) << mvshift);
>
>      outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
>      outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/cudata.h
> --- a/source/common/cudata.h    Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/cudata.h    Wed Jun 21 11:05:02 2017 +0530
> @@ -225,7 +225,7 @@
>
>      CUData();
>
> -    void     initialize(const CUDataMemPool& dataPool, uint32_t depth,
> int csp, int instance);
> +    void     initialize(const CUDataMemPool& dataPool, uint32_t depth,
> const x265_param& param, int instance);
>      static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight,
> uint32_t maxCUSize, uint32_t minCUSize, CUGeom
> cuDataArray[CUGeom::MAX_GEOMS]);
>
>      void     initCTU(const Frame& frame, uint32_t cuAddr, int qp,
> uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCUInSlice);
> @@ -350,10 +350,10 @@
>
>      CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL; }
>
> -    bool create(uint32_t depth, uint32_t csp, uint32_t numInstances)
> +    bool create(uint32_t depth, uint32_t csp, uint32_t numInstances,
> const x265_param& param)
>      {
>          uint32_t numPartition = NUM_4x4_PARTITIONS >> (depth * 2);
> -        uint32_t cuSize = g_maxCUSize >> depth;
> +        uint32_t cuSize = param.maxCUSize >> depth;
>          uint32_t sizeL = cuSize * cuSize;
>          if (csp == X265_CSP_I400)
>          {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/frame.cpp
> --- a/source/common/frame.cpp   Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/frame.cpp   Wed Jun 21 11:05:02 2017 +0530
> @@ -63,8 +63,8 @@
>
>      if (param->bCTUInfo)
>      {
> -        uint32_t widthInCTU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> -        uint32_t heightInCTU = (m_param->sourceHeight + g_maxCUSize - 1)
> >> g_maxLog2CUSize;
> +        uint32_t widthInCTU = (m_param->sourceWidth + param->maxCUSize -
> 1) >> g_maxLog2CUSize;
> +        uint32_t heightInCTU = (m_param->sourceHeight +  param->maxCUSize
> - 1) >> g_maxLog2CUSize;
>          uint32_t numCTUsInFrame = widthInCTU * heightInCTU;
>          CHECKED_MALLOC_ZERO(m_addOnDepth, uint8_t *, numCTUsInFrame);
>          CHECKED_MALLOC_ZERO(m_addOnCtuInfo, uint8_t *, numCTUsInFrame);
> @@ -77,11 +77,10 @@
>          }
>      }
>
> -    if (m_fencPic->create(param->sourceWidth, param->sourceHeight,
> param->internalCsp) &&
> -        m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode ||
> !!param->bAQMotion, param->rc.qgSize))
> +    if (m_fencPic->create(param) && m_lowres.create(m_fencPic,
> param->bframes, !!param->rc.aqMode || !!param->bAQMotion, param->rc.qgSize))
>      {
>          X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
> initialized");
> -        m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1)  /
> g_maxCUSize;
> +        m_numRows = (m_fencPic->m_picHeight + param->maxCUSize - 1)  /
> param->maxCUSize;
>          m_reconRowFlag = new ThreadSafeInteger[m_numRows];
>          m_reconColCount = new ThreadSafeInteger[m_numRows];
>
> @@ -107,12 +106,12 @@
>      m_reconPic = new PicYuv;
>      m_param = param;
>      m_encData->m_reconPic = m_reconPic;
> -    bool ok = m_encData->create(*param, sps, m_fencPic->m_picCsp) &&
> m_reconPic->create(param->sourceWidth, param->sourceHeight,
> param->internalCsp);
> +    bool ok = m_encData->create(*param, sps, m_fencPic->m_picCsp) &&
> m_reconPic->create(param);
>      if (ok)
>      {
>          /* initialize right border of m_reconpicYuv as SAO may read
> beyond the
>           * end of the picture accessing uninitialized pixels */
> -        int maxHeight = sps.numCuInHeight * g_maxCUSize;
> +        int maxHeight = sps.numCuInHeight * param->maxCUSize;
>          memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel)*
> m_reconPic->m_stride * maxHeight);
>
>          /* use pre-calculated cu/pu offsets cached in the SPS structure */
> @@ -189,8 +188,8 @@
>
>      if (m_ctuInfo)
>      {
> -        uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> -        uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1)
> >> g_maxLog2CUSize;
> +        uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
> +        uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize
> - 1) >> g_maxLog2CUSize;
>          uint32_t numCUsInFrame = widthInCU * heightInCU;
>          for (uint32_t i = 0; i < numCUsInFrame; i++)
>          {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/framedata.cpp
> --- a/source/common/framedata.cpp       Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/framedata.cpp       Wed Jun 21 11:05:02 2017 +0530
> @@ -41,9 +41,9 @@
>      if (param.rc.bStatWrite)
>          m_spsrps = const_cast<RPS*>(sps.spsrps);
>
> -    m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);
> +    m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame, param);
>      for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
> -        m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param.internalCsp,
> ctuAddr);
> +        m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param, ctuAddr);
>
>      CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame);
>      CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/picyuv.cpp
> --- a/source/common/picyuv.cpp  Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/picyuv.cpp  Wed Jun 21 11:05:02 2017 +0530
> @@ -69,22 +69,26 @@
>      m_vChromaShift = 0;
>  }
>
> -bool PicYuv::create(uint32_t picWidth, uint32_t picHeight, uint32_t
> picCsp, pixel *pixelbuf)
> +bool PicYuv::create(x265_param* param, pixel *pixelbuf)
>  {
> +    m_param = param;
> +    uint32_t picWidth = m_param->sourceWidth;
> +    uint32_t picHeight = m_param->sourceHeight;
> +    uint32_t picCsp = m_param->internalCsp;
>      m_picWidth  = picWidth;
>      m_picHeight = picHeight;
>      m_hChromaShift = CHROMA_H_SHIFT(picCsp);
>      m_vChromaShift = CHROMA_V_SHIFT(picCsp);
>      m_picCsp = picCsp;
>
> -    uint32_t numCuInWidth = (m_picWidth + g_maxCUSize - 1)  / g_maxCUSize;
> -    uint32_t numCuInHeight = (m_picHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> +    uint32_t numCuInWidth = (m_picWidth + param->maxCUSize - 1)  /
> param->maxCUSize;
> +    uint32_t numCuInHeight = (m_picHeight + param->maxCUSize - 1) /
> param->maxCUSize;
>
> -    m_lumaMarginX = g_maxCUSize + 32; // search margin and 8-tap filter
> half-length, padded for 32-byte alignment
> -    m_lumaMarginY = g_maxCUSize + 16; // margin for 8-tap filter and
> infinite padding
> -    m_stride = (numCuInWidth * g_maxCUSize) + (m_lumaMarginX << 1);
> +    m_lumaMarginX = param->maxCUSize + 32; // search margin and 8-tap
> filter half-length, padded for 32-byte alignment
> +    m_lumaMarginY = param->maxCUSize + 16; // margin for 8-tap filter and
> infinite padding
> +    m_stride = (numCuInWidth * param->maxCUSize) + (m_lumaMarginX << 1);
>
> -    int maxHeight = numCuInHeight * g_maxCUSize;
> +    int maxHeight = numCuInHeight * param->maxCUSize;
>      if (pixelbuf)
>          m_picOrg[0] = pixelbuf;
>      else
> @@ -97,7 +101,7 @@
>      {
>          m_chromaMarginX = m_lumaMarginX;  // keep 16-byte alignment for
> chroma CTUs
>          m_chromaMarginY = m_lumaMarginY >> m_vChromaShift;
> -        m_strideC = ((numCuInWidth * g_maxCUSize) >> m_hChromaShift) +
> (m_chromaMarginX * 2);
> +        m_strideC = ((numCuInWidth * m_param->maxCUSize) >>
> m_hChromaShift) + (m_chromaMarginX * 2);
>
>          CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
>          CHECKED_MALLOC(m_picBuf[2], pixel, m_strideC * ((maxHeight >>
> m_vChromaShift) + (m_chromaMarginY * 2)));
> @@ -124,14 +128,14 @@
>      m_vChromaShift = CHROMA_V_SHIFT(picCsp);
>      m_picCsp = picCsp;
>
> -    uint32_t numCuInWidth = (m_picWidth + g_maxCUSize - 1) / g_maxCUSize;
> -    uint32_t numCuInHeight = (m_picHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> +    uint32_t numCuInWidth = (m_picWidth + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> +    uint32_t numCuInHeight = (m_picHeight + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
>
> -    m_lumaMarginX = g_maxCUSize + 32; // search margin and 8-tap filter
> half-length, padded for 32-byte alignment
> -    m_lumaMarginY = g_maxCUSize + 16; // margin for 8-tap filter and
> infinite padding
> -    m_stride = (numCuInWidth * g_maxCUSize) + (m_lumaMarginX << 1);
> +    m_lumaMarginX = m_param->maxCUSize + 32; // search margin and 8-tap
> filter half-length, padded for 32-byte alignment
> +    m_lumaMarginY = m_param->maxCUSize + 16; // margin for 8-tap filter
> and infinite padding
> +    m_stride = (numCuInWidth * m_param->maxCUSize) + (m_lumaMarginX << 1);
>
> -    int maxHeight = numCuInHeight * g_maxCUSize;
> +    int maxHeight = numCuInHeight * m_param->maxCUSize;
>      int bufLen = (int)(m_stride * (maxHeight + (m_lumaMarginY * 2)));
>
>      return bufLen;
> @@ -152,8 +156,8 @@
>          {
>              for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
>              {
> -                m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride
> * cuRow * g_maxCUSize + cuCol * g_maxCUSize;
> -                m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC
> * cuRow * (g_maxCUSize >> m_vChromaShift) + cuCol * (g_maxCUSize >>
> m_hChromaShift);
> +                m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride
> * cuRow * m_param->maxCUSize + cuCol * m_param->maxCUSize;
> +                m_cuOffsetC[cuRow * sps.numCuInWidth + cuCol] = m_strideC
> * cuRow * (m_param->maxCUSize >> m_vChromaShift) + cuCol *
> (m_param->maxCUSize >> m_hChromaShift);
>              }
>          }
>
> @@ -172,7 +176,7 @@
>          CHECKED_MALLOC(m_cuOffsetY, intptr_t, sps.numCuInWidth *
> sps.numCuInHeight);
>          for (uint32_t cuRow = 0; cuRow < sps.numCuInHeight; cuRow++)
>          for (uint32_t cuCol = 0; cuCol < sps.numCuInWidth; cuCol++)
> -            m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride *
> cuRow * g_maxCUSize + cuCol * g_maxCUSize;
> +            m_cuOffsetY[cuRow * sps.numCuInWidth + cuCol] = m_stride *
> cuRow * m_param->maxCUSize + cuCol * m_param->maxCUSize;
>
>          CHECKED_MALLOC(m_buOffsetY, intptr_t, (size_t)numPartitions);
>          for (uint32_t idx = 0; idx < numPartitions; ++idx)
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/picyuv.h
> --- a/source/common/picyuv.h    Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/picyuv.h    Wed Jun 21 11:05:02 2017 +0530
> @@ -71,10 +71,11 @@
>      pixel   m_maxChromaVLevel;
>      pixel   m_minChromaVLevel;
>      double  m_avgChromaVLevel;
> +    x265_param *m_param;
>
>      PicYuv();
>
> -    bool  create(uint32_t picWidth, uint32_t picHeight, uint32_t csp,
> pixel *pixelbuf = NULL);
> +    bool  create(x265_param* param, pixel *pixelbuf = NULL);
>      bool  createOffsets(const SPS& sps);
>      void  destroy();
>      int   getLumaBufLen(uint32_t picWidth, uint32_t picHeight, uint32_t
> picCsp);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/slice.cpp
> --- a/source/common/slice.cpp   Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/slice.cpp   Wed Jun 21 11:05:02 2017 +0530
> @@ -187,8 +187,8 @@
>      // Calculate end address
>      uint32_t internalAddress = (endCUAddr - 1) % NUM_4x4_PARTITIONS;
>      uint32_t externalAddress = (endCUAddr - 1) / NUM_4x4_PARTITIONS;
> -    uint32_t xmax = m_sps->picWidthInLumaSamples - (externalAddress %
> m_sps->numCuInWidth) * g_maxCUSize;
> -    uint32_t ymax = m_sps->picHeightInLumaSamples - (externalAddress /
> m_sps->numCuInWidth) * g_maxCUSize;
> +    uint32_t xmax = m_sps->picWidthInLumaSamples - (externalAddress %
> m_sps->numCuInWidth) * m_param->maxCUSize;
> +    uint32_t ymax = m_sps->picHeightInLumaSamples - (externalAddress /
> m_sps->numCuInWidth) * m_param->maxCUSize;
>
>      while (g_zscanToPelX[internalAddress] >= xmax ||
> g_zscanToPelY[internalAddress] >= ymax)
>          internalAddress--;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/common/slice.h
> --- a/source/common/slice.h     Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/common/slice.h     Wed Jun 21 11:05:02 2017 +0530
> @@ -360,6 +360,7 @@
>      int         m_iPPSQpMinus26;
>      int         numRefIdxDefault[2];
>      int         m_iNumRPSInSPS;
> +    const x265_param *m_param;
>
>      Slice()
>      {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/analysis.cpp       Wed Jun 21 11:05:02 2017 +0530
> @@ -90,19 +90,19 @@
>      cacheCost = X265_MALLOC(uint64_t, costArrSize);
>
>      int csp = m_param->internalCsp;
> -    uint32_t cuSize = g_maxCUSize;
> +    uint32_t cuSize = m_param->maxCUSize;
>
>      bool ok = true;
>      for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++, cuSize >>= 1)
>      {
>          ModeDepth &md = m_modeDepth[depth];
>
> -        md.cuMemPool.create(depth, csp, MAX_PRED_TYPES);
> +        md.cuMemPool.create(depth, csp, MAX_PRED_TYPES, *m_param);
>          ok &= md.fencYuv.create(cuSize, csp);
>
>          for (int j = 0; j < MAX_PRED_TYPES; j++)
>          {
> -            md.pred[j].cu.initialize(md.cuMemPool, depth, csp, j);
> +            md.pred[j].cu.initialize(md.cuMemPool, depth, *m_param, j);
>              ok &= md.pred[j].predYuv.create(cuSize, csp);
>              ok &= md.pred[j].reconYuv.create(cuSize, csp);
>              md.pred[j].fencYuv = &md.fencYuv;
> @@ -236,8 +236,8 @@
>      else
>      {
>          if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
> -            ctu.m_cuPelX / g_maxCUSize >= frame.m_encData->m_pir.
> pirStartCol
> -            && ctu.m_cuPelX / g_maxCUSize < frame.m_encData->m_pir.
> pirEndCol)
> +            ctu.m_cuPelX / m_param->maxCUSize >= frame.m_encData->m_pir.
> pirStartCol
> +            && ctu.m_cuPelX / m_param->maxCUSize < frame.m_encData->m_pir.
> pirEndCol)
>              compressIntraCU(ctu, cuGeom, qp);
>          else if (!m_param->rdLevel)
>          {
> @@ -2440,7 +2440,7 @@
>      int safeX, maxSafeMv;
>      if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE)
>      {
> -        safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * g_maxCUSize - 3;
> +        safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * m_param->maxCUSize - 3;
>          maxSafeMv = (safeX - tempPred->cu.m_cuPelX) * 4;
>      }
>      for (uint32_t i = 0; i < numMergeCand; ++i)
> @@ -2466,7 +2466,7 @@
>          }
>
>          if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
> -            tempPred->cu.m_cuPelX / g_maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
> +            tempPred->cu.m_cuPelX / m_param->maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
>              candMvField[i][0].mv.x > maxSafeMv)
>              // skip merge candidates which reference beyond safe
> reference area
>              continue;
> @@ -2570,7 +2570,7 @@
>      int safeX, maxSafeMv;
>      if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE)
>      {
> -        safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * g_maxCUSize - 3;
> +        safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * m_param->maxCUSize - 3;
>          maxSafeMv = (safeX - tempPred->cu.m_cuPelX) * 4;
>      }
>      for (uint32_t i = 0; i < numMergeCand; i++)
> @@ -2611,7 +2611,7 @@
>              triedBZero = true;
>          }
>          if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
> -            tempPred->cu.m_cuPelX / g_maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
> +            tempPred->cu.m_cuPelX / m_param->maxCUSize <
> m_frame->m_encData->m_pir.pirEndCol &&
>              candMvField[i][0].mv.x > maxSafeMv)
>              // skip merge candidates which reference beyond safe
> reference area
>              continue;
> @@ -3236,7 +3236,7 @@
>          uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.
> absPartIdx];
>          uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.
> absPartIdx];
>          uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr -
> 1)) / loopIncr;
> -        uint32_t blockSize = g_maxCUSize >> cuGeom.depth;
> +        uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth;
>          double qp_offset = 0;
>          uint32_t cnt = 0;
>          uint32_t idx;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/api.cpp
> --- a/source/encoder/api.cpp    Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/api.cpp    Wed Jun 21 11:05:02 2017 +0530
> @@ -361,8 +361,8 @@
>
>      if (param->analysisMode)
>      {
> -        uint32_t widthInCU       = (param->sourceWidth  + g_maxCUSize -
> 1) >> g_maxLog2CUSize;
> -        uint32_t heightInCU      = (param->sourceHeight + g_maxCUSize -
> 1) >> g_maxLog2CUSize;
> +        uint32_t widthInCU = (param->sourceWidth + param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> +        uint32_t heightInCU = (param->sourceHeight + param->maxCUSize -
> 1) >> g_maxLog2CUSize;
>
>          uint32_t numCUsInFrame   = widthInCU * heightInCU;
>          pic->analysisData.numCUsInFrame = numCUsInFrame;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/dpb.cpp
> --- a/source/encoder/dpb.cpp    Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/dpb.cpp    Wed Jun 21 11:05:02 2017 +0530
> @@ -107,8 +107,8 @@
>
>              if (curFrame->m_ctuInfo != NULL)
>              {
> -                uint32_t widthInCU = (curFrame->m_param->sourceWidth +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> -                uint32_t heightInCU = (curFrame->m_param->sourceHeight +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> +                uint32_t widthInCU = (curFrame->m_param->sourceWidth +
> curFrame->m_param->maxCUSize - 1) >> g_maxLog2CUSize;
> +                uint32_t heightInCU = (curFrame->m_param->sourceHeight +
> curFrame->m_param->maxCUSize - 1) >> g_maxLog2CUSize;
>                  uint32_t numCUsInFrame = widthInCU * heightInCU;
>                  for (uint32_t i = 0; i < numCUsInFrame; i++)
>                  {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/encoder.cpp        Wed Jun 21 11:05:02 2017 +0530
> @@ -321,8 +321,8 @@
>      else
>          m_scalingList.setupQuantMatrices(m_sps.chromaFormatIdc);
>
> -    int numRows = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
> -    int numCols = (m_param->sourceWidth  + g_maxCUSize - 1) / g_maxCUSize;
> +    int numRows = (m_param->sourceHeight + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> +    int numCols = (m_param->sourceWidth  + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
>      for (int i = 0; i < m_param->frameNumThreads; i++)
>      {
>          if (!m_frameEncoder[i]->init(this, numRows, numCols))
> @@ -1035,16 +1035,17 @@
>                  Slice* slice = frameEnc->m_encData->m_slice;
>                  slice->m_sps = &m_sps;
>                  slice->m_pps = &m_pps;
> +                slice->m_param = m_param;
>                  slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
>                  slice->m_endCUAddr = slice->realEndAddress(m_sps.numCUsInFrame
> * NUM_4x4_PARTITIONS);
>              }
>
>              if (m_param->searchMethod == X265_SEA &&
> frameEnc->m_lowres.sliceType != X265_TYPE_B)
>              {
> -                int padX = g_maxCUSize + 32;
> -                int padY = g_maxCUSize + 16;
> -                uint32_t numCuInHeight = (frameEnc->m_encData->m_reconPic->m_picHeight
> + g_maxCUSize - 1) / g_maxCUSize;
> -                int maxHeight = numCuInHeight * g_maxCUSize;
> +                int padX = m_param->maxCUSize + 32;
> +                int padY = m_param->maxCUSize + 16;
> +                uint32_t numCuInHeight = (frameEnc->m_encData->m_reconPic->m_picHeight
> + m_param->maxCUSize - 1) / m_param->maxCUSize;
> +                int maxHeight = numCuInHeight * m_param->maxCUSize;
>                  for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
>                  {
>                      frameEnc->m_encData->m_meBuffer[i] =
> X265_MALLOC(uint32_t, frameEnc->m_reconPic->m_stride * (maxHeight + (2 *
> padY)));
> @@ -1108,8 +1109,8 @@
>                  x265_analysis_data* analysis = &frameEnc->m_analysisData;
>                  analysis->poc = frameEnc->m_poc;
>                  analysis->sliceType = frameEnc->m_lowres.sliceType;
> -                uint32_t widthInCU       = (m_param->sourceWidth  +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> -                uint32_t heightInCU      = (m_param->sourceHeight +
> g_maxCUSize - 1) >> g_maxLog2CUSize;
> +                uint32_t widthInCU       = (m_param->sourceWidth  +
> m_param->maxCUSize - 1) >> g_maxLog2CUSize;
> +                uint32_t heightInCU      = (m_param->sourceHeight +
> m_param->maxCUSize - 1) >> g_maxLog2CUSize;
>
>                  uint32_t numCUsInFrame   = widthInCU * heightInCU;
>                  analysis->numCUsInFrame  = numCUsInFrame;
> @@ -1182,8 +1183,8 @@
>
>  void Encoder::copyCtuInfo(x265_ctu_info_t** frameCtuInfo, int poc)
>  {
> -    uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> -    uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> +    uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> +    uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
>      Frame* curFrame;
>      Frame* prevFrame = NULL;
>      int32_t* frameCTU;
> @@ -1554,7 +1555,7 @@
>      }
>
>      x265_log(m_param, X265_LOG_INFO, "CU: " X265_LL " %dX%d CTUs
> compressed in %.3lf seconds, %.3lf CTUs per worker-second\n",
> -             cuStats.totalCTUs, g_maxCUSize, g_maxCUSize,
> +             cuStats.totalCTUs, m_param->maxCUSize, m_param->maxCUSize,
>               ELAPSED_SEC(totalWorkerTime),
>               cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
>
> @@ -1981,8 +1982,8 @@
>      sps->chromaFormatIdc = m_param->internalCsp;
>      sps->picWidthInLumaSamples = m_param->sourceWidth;
>      sps->picHeightInLumaSamples = m_param->sourceHeight;
> -    sps->numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
> -    sps->numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> +    sps->numCuInWidth = (m_param->sourceWidth + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> +    sps->numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1)
> / m_param->maxCUSize;
>      sps->numCUsInFrame = sps->numCuInWidth * sps->numCuInHeight;
>      sps->numPartitions = NUM_4x4_PARTITIONS;
>      sps->numPartInCUSize = 1 << g_unitSizeDepth;
> @@ -2212,7 +2213,7 @@
>          p->lookaheadDepth = p->totalFrames;
>      if (p->bIntraRefresh)
>      {
> -        int numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
> +        int numCuInWidth = (m_param->sourceWidth + m_param->maxCUSize -
> 1) / m_param->maxCUSize;
>          if (p->maxNumReferences > 1)
>          {
>              x265_log(p,  X265_LOG_WARNING, "Max References > 1 +
> intra-refresh is not supported , setting max num references = 1\n");
> @@ -2772,8 +2773,8 @@
>  {
>      analysis->analysisFramedata = NULL;
>      analysis2PassFrameData *analysisFrameData = (analysis2PassFrameData*)
> analysis->analysisFramedata;
> -    uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> -    uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> +    uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> +    uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
>
>      uint32_t numCUsInFrame = widthInCU * heightInCU;
>      CHECKED_MALLOC_ZERO(analysisFrameData, analysis2PassFrameData, 1);
> @@ -3074,8 +3075,8 @@
>  }\
>
>      uint32_t depthBytes = 0;
> -    uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> -    uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> +    uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> +    uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
>      uint32_t numCUsInFrame = widthInCU * heightInCU;
>
>      int poc; uint32_t frameRecordSize;
> @@ -3384,8 +3385,8 @@
>  }\
>
>      uint32_t depthBytes = 0;
> -    uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> -    uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >>
> g_maxLog2CUSize;
> +    uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1)
> >> g_maxLog2CUSize;
> +    uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize -
> 1) >> g_maxLog2CUSize;
>      uint32_t numCUsInFrame = widthInCU * heightInCU;
>      analysis2PassFrameData* analysisFrameData = (analysis2PassFrameData*)
> analysis2Pass->analysisFramedata;
>
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp        Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/entropy.cpp        Wed Jun 21 11:05:02 2017 +0530
> @@ -888,7 +888,7 @@
>      uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
>      X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr),
> "real end address expected\n");
>
> -    uint32_t granularityMask = g_maxCUSize - 1;
> +    uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
>      uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
>      uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
>      uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/frameencoder.cpp   Wed Jun 21 11:05:02 2017 +0530
> @@ -124,7 +124,7 @@
>      range += !!(m_param->searchMethod < 2);  /* diamond/hex range check
> lag */
>      range += NTAPS_LUMA / 2;                 /* subpel filter half-length
> */
>      range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine)
> + 1) / 2; /* subpel refine steps */
> -    m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range +
> g_maxCUSize - 1) / g_maxCUSize);
> +    m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range +
> m_param->maxCUSize - 1) / m_param->maxCUSize);
>
>      // NOTE: 2 times of numRows because both Encoder and Filter in same
> queue
>      if (!WaveFront::init(m_numRows * 2))
> @@ -837,7 +837,7 @@
>          }
>          else if (m_param->decodedPictureHashSEI == 3)
>          {
> -            uint32_t cuHeight = g_maxCUSize;
> +            uint32_t cuHeight = m_param->maxCUSize;
>
>              m_checksum[0] = 0;
>
> @@ -1246,7 +1246,7 @@
>
>      uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1))
> / 16;
>      uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1))
> / 16;
> -    uint32_t noOfBlocks = g_maxCUSize / 16;
> +    uint32_t noOfBlocks = m_param->maxCUSize / 16;
>      const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row -
> 1].sliceId != curRow.sliceId)) ? 1 : 0;
>      const uint32_t bLastRowInSlice = ((row == m_numRows - 1) ||
> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
>      const uint32_t sliceId = curRow.sliceId;
> @@ -1325,8 +1325,8 @@
>      // TODO: specially case handle on first and last row
>
>      // Initialize restrict on MV range in slices
> -    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) +
> 3 * 4;
> -    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (g_maxCUSize * 4) - 4 * 4);
> +    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize
> * 4) + 3 * 4;
> +    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (m_param->maxCUSize * 4) - 4 * 4);
>
>      // Handle single row slice
>      if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
> @@ -1482,7 +1482,7 @@
>              {
>                  /* 1 << shift == number of 8x8 blocks at current depth */
>                  int shift = 2 * (g_maxCUDepth - depth);
> -                int cuSize = g_maxCUSize >> depth;
> +                int cuSize = m_param->maxCUSize >> depth;
>
>                  if (cuSize == 8)
>                      curRow.rowStats.intra8x8Cnt +=
> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp    Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/framefilter.cpp    Wed Jun 21 11:05:02 2017 +0530
> @@ -185,8 +185,8 @@
>      m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
>      m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
>      m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
> -    m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ?
> (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
> -    m_lastWidth = (m_param->sourceWidth % g_maxCUSize) ?
> (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
> +    m_lastHeight = (m_param->sourceHeight % m_param->maxCUSize) ?
> (m_param->sourceHeight % m_param->maxCUSize) : m_param->maxCUSize;
> +    m_lastWidth = (m_param->sourceWidth % m_param->maxCUSize) ?
> (m_param->sourceWidth % m_param->maxCUSize) : m_param->maxCUSize;
>      integralCompleted.set(0);
>
>      if (m_param->bEnableSsim)
> @@ -214,7 +214,7 @@
>          for(int row = 0; row < numRows; row++)
>          {
>              // Setting maximum bound information
> -            m_parallelFilter[row].m_rowHeight = (row == numRows - 1) ?
> m_lastHeight : g_maxCUSize;
> +            m_parallelFilter[row].m_rowHeight = (row == numRows - 1) ?
> m_lastHeight : m_param->maxCUSize;
>              m_parallelFilter[row].m_row = row;
>              m_parallelFilter[row].m_rowAddr = row * numCols;
>              m_parallelFilter[row].m_frameFilter = this;
> @@ -300,7 +300,7 @@
>  void FrameFilter::ParallelFilter::copySaoAboveRef(const CUData *ctu,
> PicYuv* reconPic, uint32_t cuAddr, int col)
>  {
>      // Copy SAO Top Reference Pixels
> -    int ctuWidth  = g_maxCUSize;
> +    int ctuWidth  = ctu->m_encData->m_param->maxCUSize;
>      const pixel* recY = reconPic->getPlaneAddr(0, cuAddr) -
> (ctu->m_bFirstRowInSlice ? 0 : reconPic->m_stride);
>
>      // Luma
> @@ -701,8 +701,8 @@
>          intptr_t stride2 = m_frame->m_fencPic->m_stride;
>          uint32_t bEnd = ((row) == (this->m_numRows - 1));
>          uint32_t bStart = (row == 0);
> -        uint32_t minPixY = row * g_maxCUSize - 4 * !bStart;
> -        uint32_t maxPixY = X265_MIN((row + 1) * g_maxCUSize - 4 * !bEnd,
> (uint32_t)m_param->sourceHeight);
> +        uint32_t minPixY = row * m_param->maxCUSize - 4 * !bStart;
> +        uint32_t maxPixY = X265_MIN((row + 1) * m_param->maxCUSize - 4 *
> !bEnd, (uint32_t)m_param->sourceHeight);
>          uint32_t ssim_cnt;
>          x265_emms();
>
> @@ -768,7 +768,7 @@
>              uint32_t width = reconPic->m_picWidth;
>              uint32_t height = m_parallelFilter[row].getCUHeight();
>              intptr_t stride = reconPic->m_stride;
> -            uint32_t cuHeight = g_maxCUSize;
> +            uint32_t cuHeight = m_param->maxCUSize;
>
>              if (!row)
>                  m_frameEncoder->m_checksum[0] = 0;
> @@ -812,18 +812,18 @@
>          }
>
>          int stride = (int)m_frame->m_reconPic->m_stride;
> -        int padX = g_maxCUSize + 32;
> -        int padY = g_maxCUSize + 16;
> +        int padX = m_param->maxCUSize + 32;
> +        int padY = m_param->maxCUSize + 16;
>          int numCuInHeight = m_frame->m_encData->m_slice->
> m_sps->numCuInHeight;
> -        int maxHeight = numCuInHeight * g_maxCUSize;
> +        int maxHeight = numCuInHeight * m_param->maxCUSize;
>          int startRow = 0;
>
>          if (m_param->interlaceMode)
> -            startRow = (row * g_maxCUSize >> 1);
> +            startRow = (row * m_param->maxCUSize >> 1);
>          else
> -            startRow = row * g_maxCUSize;
> +            startRow = row * m_param->maxCUSize;
>
> -        int height = lastRow ? (maxHeight + g_maxCUSize *
> m_param->interlaceMode) : (((row + m_param->interlaceMode) * g_maxCUSize) +
> g_maxCUSize);
> +        int height = lastRow ? (maxHeight + m_param->maxCUSize *
> m_param->interlaceMode) : (((row + m_param->interlaceMode) *
> m_param->maxCUSize) + m_param->maxCUSize);
>
>          if (!row)
>          {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/framefilter.h
> --- a/source/encoder/framefilter.h      Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/framefilter.h      Wed Jun 21 11:05:02 2017 +0530
> @@ -123,7 +123,7 @@
>
>      uint32_t getCUWidth(int colNum) const
>      {
> -        return (colNum == (int)m_numCols - 1) ? m_lastWidth : g_maxCUSize;
> +        return (colNum == (int)m_numCols - 1) ? m_lastWidth :
> m_param->maxCUSize;
>      }
>
>      void init(Encoder *top, FrameEncoder *frame, int numRows, uint32_t
> numCols);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/reference.cpp
> --- a/source/encoder/reference.cpp      Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/reference.cpp      Wed Jun 21 11:05:02 2017 +0530
> @@ -72,12 +72,12 @@
>
>      if (wp)
>      {
> -        uint32_t numCUinHeight = (reconPic->m_picHeight + g_maxCUSize -
> 1) / g_maxCUSize;
> +        uint32_t numCUinHeight = (reconPic->m_picHeight + p.maxCUSize -
> 1) / p.maxCUSize;
>
>          int marginX = reconPic->m_lumaMarginX;
>          int marginY = reconPic->m_lumaMarginY;
>          intptr_t stride = reconPic->m_stride;
> -        int cuHeight = g_maxCUSize;
> +        int cuHeight = p.maxCUSize;
>
>          for (int c = 0; c < (p.internalCsp != X265_CSP_I400 &&
> recPic->m_picCsp != X265_CSP_I400 ? numInterpPlanes : 1); c++)
>          {
> @@ -127,15 +127,15 @@
>      int marginY = reconPic->m_lumaMarginY;
>      intptr_t stride = reconPic->m_stride;
>      int width   = reconPic->m_picWidth;
> -    int height  = (finishedRows - numWeightedRows) * g_maxCUSize;
> +    int height  = (finishedRows - numWeightedRows) *
> reconPic->m_param->maxCUSize;
>      /* the last row may be partial height */
>      if (finishedRows == maxNumRows - 1)
>      {
> -        const int leftRows = (reconPic->m_picHeight & (g_maxCUSize - 1));
> +        const int leftRows = (reconPic->m_picHeight &
> (reconPic->m_param->maxCUSize - 1));
>
> -        height += leftRows ? leftRows : g_maxCUSize;
> +        height += leftRows ? leftRows : reconPic->m_param->maxCUSize;
>      }
> -    int cuHeight = g_maxCUSize;
> +    int cuHeight = reconPic->m_param->maxCUSize;
>
>      for (int c = 0; c < numInterpPlanes; c++)
>      {
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp    Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/sao.cpp    Wed Jun 21 11:05:02 2017 +0530
> @@ -98,8 +98,8 @@
>      m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
>      m_vChromaShift = CHROMA_V_SHIFT(param->internalCsp);
>
> -    m_numCuInWidth =  (m_param->sourceWidth + g_maxCUSize - 1) /
> g_maxCUSize;
> -    m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) /
> g_maxCUSize;
> +    m_numCuInWidth =  (m_param->sourceWidth + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
> +    m_numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1) /
> m_param->maxCUSize;
>
>      const pixel maxY = (1 << X265_DEPTH) - 1;
>      const pixel rangeExt = maxY >> 1;
> @@ -107,12 +107,12 @@
>
>      for (int i = 0; i < (param->internalCsp != X265_CSP_I400 ? 3 : 1);
> i++)
>      {
> -        CHECKED_MALLOC(m_tmpL1[i], pixel, g_maxCUSize + 1);
> -        CHECKED_MALLOC(m_tmpL2[i], pixel, g_maxCUSize + 1);
> +        CHECKED_MALLOC(m_tmpL1[i], pixel, m_param->maxCUSize + 1);
> +        CHECKED_MALLOC(m_tmpL2[i], pixel, m_param->maxCUSize + 1);
>
>          // SAO asm code will read 1 pixel before and after, so pad by 2
>          // NOTE: m_param->sourceWidth+2 enough, to avoid condition check
> in copySaoAboveRef(), I alloc more up to 63 bytes in here
> -        CHECKED_MALLOC(m_tmpU[i], pixel, m_numCuInWidth * g_maxCUSize + 2
> + 32);
> +        CHECKED_MALLOC(m_tmpU[i], pixel, m_numCuInWidth *
> m_param->maxCUSize + 2 + 32);
>          m_tmpU[i] += 1;
>      }
>
> @@ -279,8 +279,8 @@
>      uint32_t picWidth  = m_param->sourceWidth;
>      uint32_t picHeight = m_param->sourceHeight;
>      const CUData* cu = m_frame->m_encData->getPicCTU(addr);
> -    int ctuWidth = g_maxCUSize;
> -    int ctuHeight = g_maxCUSize;
> +    int ctuWidth = m_param->maxCUSize;
> +    int ctuHeight = m_param->maxCUSize;
>      uint32_t lpelx = cu->m_cuPelX;
>      uint32_t tpely = cu->m_cuPelY;
>      const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
> @@ -573,8 +573,8 @@
>  {
>      PicYuv* reconPic = m_frame->m_reconPic;
>      intptr_t stride = reconPic->m_stride;
> -    int ctuWidth  = g_maxCUSize;
> -    int ctuHeight = g_maxCUSize;
> +    int ctuWidth = m_param->maxCUSize;
> +    int ctuHeight = m_param->maxCUSize;
>
>      int addr = idxY * m_numCuInWidth + idxX;
>      pixel* rec = reconPic->getLumaAddr(addr);
> @@ -633,8 +633,8 @@
>  {
>      PicYuv* reconPic = m_frame->m_reconPic;
>      intptr_t stride = reconPic->m_strideC;
> -    int ctuWidth  = g_maxCUSize;
> -    int ctuHeight = g_maxCUSize;
> +    int ctuWidth  = m_param->maxCUSize;
> +    int ctuHeight = m_param->maxCUSize;
>
>      {
>          ctuWidth  >>= m_hChromaShift;
> @@ -744,8 +744,8 @@
>      intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
>      uint32_t picWidth  = m_param->sourceWidth;
>      uint32_t picHeight = m_param->sourceHeight;
> -    int ctuWidth  = g_maxCUSize;
> -    int ctuHeight = g_maxCUSize;
> +    int ctuWidth  = m_param->maxCUSize;
> +    int ctuHeight = m_param->maxCUSize;
>      uint32_t lpelx = cu->m_cuPelX;
>      uint32_t tpely = cu->m_cuPelY;
>      const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
> @@ -928,8 +928,8 @@
>      intptr_t stride = reconPic->m_stride;
>      uint32_t picWidth  = m_param->sourceWidth;
>      uint32_t picHeight = m_param->sourceHeight;
> -    int ctuWidth  = g_maxCUSize;
> -    int ctuHeight = g_maxCUSize;
> +    int ctuWidth  = m_param->maxCUSize;
> +    int ctuHeight = m_param->maxCUSize;
>      uint32_t lpelx = cu->m_cuPelX;
>      uint32_t tpely = cu->m_cuPelY;
>      const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/search.cpp
> --- a/source/encoder/search.cpp Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/search.cpp Wed Jun 21 11:05:02 2017 +0530
> @@ -120,8 +120,8 @@
>              CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL + sizeC
> * 2);
>              m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[0] + sizeL;
>              m_rqt[i].coeffRQT[2] = m_rqt[i].coeffRQT[0] + sizeL + sizeC;
> -            ok &= m_rqt[i].reconQtYuv.create(g_maxCUSize,
> param.internalCsp);
> -            ok &= m_rqt[i].resiQtYuv.create(g_maxCUSize,
> param.internalCsp);
> +            ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize,
> param.internalCsp);
> +            ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize,
> param.internalCsp);
>          }
>      }
>      else
> @@ -130,15 +130,15 @@
>          {
>              CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL);
>              m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[2] = NULL;
> -            ok &= m_rqt[i].reconQtYuv.create(g_maxCUSize,
> param.internalCsp);
> -            ok &= m_rqt[i].resiQtYuv.create(g_maxCUSize,
> param.internalCsp);
> +            ok &= m_rqt[i].reconQtYuv.create(param.maxCUSize,
> param.internalCsp);
> +            ok &= m_rqt[i].resiQtYuv.create(param.maxCUSize,
> param.internalCsp);
>          }
>      }
>
>      /* the rest of these buffers are indexed per-depth */
>      for (uint32_t i = 0; i <= g_maxCUDepth; i++)
>      {
> -        int cuSize = g_maxCUSize >> i;
> +        int cuSize = param.maxCUSize >> i;
>          ok &= m_rqt[i].tmpResiYuv.create(cuSize, param.internalCsp);
>          ok &= m_rqt[i].tmpPredYuv.create(cuSize, param.internalCsp);
>          ok &= m_rqt[i].bidirPredYuv[0].create(cuSize, param.internalCsp);
> @@ -2593,11 +2593,11 @@
>      cu.clipMv(mvmax);
>
>      if (cu.m_encData->m_param->bIntraRefresh && m_slice->m_sliceType ==
> P_SLICE &&
> -          cu.m_cuPelX / g_maxCUSize < m_frame->m_encData->m_pir.pirStartCol
> &&
> +          cu.m_cuPelX / m_param->maxCUSize < m_frame->m_encData->m_pir.pirStartCol
> &&
>            m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol <
> m_slice->m_sps->numCuInWidth)
>      {
>          int safeX, maxSafeMv;
> -        safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * g_maxCUSize - 3;
> +        safeX = m_slice->m_refFrameList[0][0]->m_encData->m_pir.pirEndCol
> * m_param->maxCUSize - 3;
>          maxSafeMv = (safeX - cu.m_cuPelX) * 4;
>          mvmax.x = X265_MIN(mvmax.x, maxSafeMv);
>          mvmin.x = X265_MIN(mvmin.x, maxSafeMv);
> diff -r 4436e1ca6f39 -r 68b27c44790d source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Tue Jun 20 16:38:20 2017 +0530
> +++ b/source/encoder/slicetype.cpp      Wed Jun 21 11:05:02 2017 +0530
> @@ -907,7 +907,7 @@
>          curFrame->m_lowres.lowresCostForRc = curFrame->m_lowres.lowresCosts[b
> - p0][p1 - b];
>          uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0,
> intraSum = 0;
>          uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);
> -        uint32_t numCuInHeight = (m_param->sourceHeight + g_maxCUSize -
> 1) / g_maxCUSize;
> +        uint32_t numCuInHeight = (m_param->sourceHeight +
> m_param->maxCUSize - 1) / m_param->maxCUSize;
>          uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu
> = (uint32_t)m_8x8Height;
>          double *qp_offset = 0;
>          /* Factor in qpoffsets based on Aq/Cutree in CU costs */
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170622/44410e7d/attachment-0001.html>