[x265] primitives: clarify constness

Steve Borho steve at borho.org
Mon Nov 17 21:32:36 CET 2014


On 11/16, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1416115937 -32400
> #      Sun Nov 16 14:32:17 2014 +0900
> # Node ID 9578af829f2af0aa354fb60b46359e16c4a5b954
> # Parent  8191e0d0245501441e8d21769e55929a3dbcb260
> primitives: clarify constness

Nice! This was also on my TODO list. This should allows us to get rid of
all of the const_casts in the encoder.

We have a refactor of the transform/quant code to use int16_t
consistently for coefficients. Once this makes it into the tree, we'll
rebase this patch atop those changes.

> diff -r 8191e0d02455 -r 9578af829f2a source/common/dct.cpp
> --- a/source/common/dct.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/dct.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -41,7 +41,7 @@
>  
>  // Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
>  // give identical results
> -void fastForwardDst(int16_t *block, int16_t *coeff, int shift)  // input block, output coeff
> +void fastForwardDst(const int16_t* block, int16_t* coeff, int shift)  // input block, output coeff
>  {
>      int c[4];
>      int rnd_factor = 1 << (shift - 1);
> @@ -61,7 +61,7 @@
>      }
>  }
>  
> -void inversedst(int16_t *tmp, int16_t *block, int shift)  // input tmp, output block
> +void inversedst(const int16_t* tmp, int16_t* block, int shift)  // input tmp, output block
>  {
>      int i, c[4];
>      int rnd_factor = 1 << (shift - 1);
> @@ -81,7 +81,7 @@
>      }
>  }
>  
> -void partialButterfly16(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterfly16(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j, k;
>      int E[8], O[8];
> @@ -134,7 +134,7 @@
>      }
>  }
>  
> -void partialButterfly32(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterfly32(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j, k;
>      int E[16], O[16];
> @@ -203,7 +203,7 @@
>      }
>  }
>  
> -void partialButterfly8(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterfly8(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j, k;
>      int E[4], O[4];
> @@ -240,7 +240,7 @@
>      }
>  }
>  
> -void partialButterflyInverse4(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterflyInverse4(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j;
>      int E[2], O[2];
> @@ -265,7 +265,7 @@
>      }
>  }
>  
> -void partialButterflyInverse8(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterflyInverse8(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j, k;
>      int E[4], O[4];
> @@ -301,7 +301,7 @@
>      }
>  }
>  
> -void partialButterflyInverse16(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterflyInverse16(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j, k;
>      int E[8], O[8];
> @@ -352,7 +352,7 @@
>      }
>  }
>  
> -void partialButterflyInverse32(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterflyInverse32(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j, k;
>      int E[16], O[16];
> @@ -416,7 +416,7 @@
>      }
>  }
>  
> -void partialButterfly4(int16_t *src, int16_t *dst, int shift, int line)
> +void partialButterfly4(const int16_t* src, int16_t* dst, int shift, int line)
>  {
>      int j;
>      int E[2], O[2];
> @@ -440,7 +440,7 @@
>      }
>  }
>  
> -void dst4_c(int16_t *src, int32_t *dst, intptr_t stride)
> +void dst4_c(const int16_t* src, int32_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 1 + X265_DEPTH - 8;
>      const int shift_2nd = 8;
> @@ -468,7 +468,7 @@
>  #undef N
>  }
>  
> -void dct4_c(int16_t *src, int32_t *dst, intptr_t stride)
> +void dct4_c(const int16_t* src, int32_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 1 + X265_DEPTH - 8;
>      const int shift_2nd = 8;
> @@ -495,7 +495,7 @@
>  #undef N
>  }
>  
> -void dct8_c(int16_t *src, int32_t *dst, intptr_t stride)
> +void dct8_c(const int16_t* src, int32_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 2 + X265_DEPTH - 8;
>      const int shift_2nd = 9;
> @@ -523,7 +523,7 @@
>  #undef N
>  }
>  
> -void dct16_c(int16_t *src, int32_t *dst, intptr_t stride)
> +void dct16_c(const int16_t* src, int32_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 3 + X265_DEPTH - 8;
>      const int shift_2nd = 10;
> @@ -551,7 +551,7 @@
>  #undef N
>  }
>  
> -void dct32_c(int16_t *src, int32_t *dst, intptr_t stride)
> +void dct32_c(const int16_t* src, int32_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 4 + X265_DEPTH - 8;
>      const int shift_2nd = 11;
> @@ -579,7 +579,7 @@
>  #undef N
>  }
>  
> -void idst4_c(int32_t *src, int16_t *dst, intptr_t stride)
> +void idst4_c(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 7;
>      const int shift_2nd = 12 - (X265_DEPTH - 8);
> @@ -607,7 +607,7 @@
>      }
>  }
>  
> -void idct4_c(int32_t *src, int16_t *dst, intptr_t stride)
> +void idct4_c(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 7;
>      const int shift_2nd = 12 - (X265_DEPTH - 8);
> @@ -635,7 +635,7 @@
>      }
>  }
>  
> -void idct8_c(int32_t *src, int16_t *dst, intptr_t stride)
> +void idct8_c(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 7;
>      const int shift_2nd = 12 - (X265_DEPTH - 8);
> @@ -662,7 +662,7 @@
>      }
>  }
>  
> -void idct16_c(int32_t *src, int16_t *dst, intptr_t stride)
> +void idct16_c(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 7;
>      const int shift_2nd = 12 - (X265_DEPTH - 8);
> @@ -689,7 +689,7 @@
>      }
>  }
>  
> -void idct32_c(int32_t *src, int16_t *dst, intptr_t stride)
> +void idct32_c(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      const int shift_1st = 7;
>      const int shift_2nd = 12 - (X265_DEPTH - 8);
> @@ -769,7 +769,7 @@
>      }
>  }
>  
> -uint32_t quant_c(int32_t* coef, int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff)
> +uint32_t quant_c(const int32_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff)
>  {
>      X265_CHECK(qBits >= 8, "qBits less than 8\n");
>      X265_CHECK((numCoeff % 16) == 0, "numCoeff must be multiple of 16\n");
> @@ -793,7 +793,7 @@
>      return numSig;
>  }
>  
> -uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff)
> +uint32_t nquant_c(const int32_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff)
>  {
>      X265_CHECK((numCoeff % 16) == 0, "number of quant coeff is not multiple of 4x4\n");
>      X265_CHECK((uint32_t)add < ((uint32_t)1 << qBits), "2 ^ qBits less than add\n");
> @@ -817,7 +817,7 @@
>      return numSig;
>  }
>  
> -int  count_nonzero_c(const int16_t *quantCoeff, int numCoeff)
> +int  count_nonzero_c(const int16_t* quantCoeff, int numCoeff)
>  {
>      X265_CHECK(((intptr_t)quantCoeff & 15) == 0, "quant buffer not aligned\n");
>      X265_CHECK(numCoeff > 0 && (numCoeff & 15) == 0, "numCoeff invalid %d\n", numCoeff);
> @@ -833,7 +833,7 @@
>  }
>  
>  template<int trSize>
> -uint32_t copy_count(int16_t* coeff, int16_t* residual, intptr_t stride)
> +uint32_t copy_count(int16_t* coeff, const int16_t* residual, intptr_t stride)
>  {
>      uint32_t numSig = 0;
>      for (int k = 0; k < trSize; k++)
> @@ -848,7 +848,7 @@
>      return numSig;
>  }
>  
> -void denoiseDct_c(int32_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff)
> +void denoiseDct_c(int32_t* dctCoef, uint32_t* resSum, const uint16_t* offset, int numCoeff)
>  {
>      for (int i = 0; i < numCoeff; i++)
>      {
> diff -r 8191e0d02455 -r 9578af829f2a source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/ipfilter.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -35,7 +35,7 @@
>  
>  namespace {
>  template<int dstStride>
> -void filterConvertPelToShort_c(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height)
> +void filterConvertPelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height)
>  {
>      int shift = IF_INTERNAL_PREC - X265_DEPTH;
>      int row, col;
> @@ -74,9 +74,9 @@
>  }
>  
>  template<int N, int width, int height>
> -void interp_horiz_pp_c(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
> +void interp_horiz_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
>  {
> -    int16_t const * coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
> +    const int16_t* coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
>      int headRoom = IF_FILTER_PREC;
>      int offset =  (1 << (headRoom - 1));
>      uint16_t maxVal = (1 << X265_DEPTH) - 1;
> @@ -115,9 +115,9 @@
>  }
>  
>  template<int N, int width, int height>
> -void interp_horiz_ps_c(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
> +void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
>  {
> -    int16_t const * coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
> +    const int16_t* coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
>      int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
>      int shift = IF_FILTER_PREC - headRoom;
>      int offset = -IF_INTERNAL_OFFS << shift;
> @@ -160,9 +160,9 @@
>  }
>  
>  template<int N, int width, int height>
> -void interp_vert_pp_c(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
> +void interp_vert_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
>  {
> -    int16_t const * c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
> +    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
>      int shift = IF_FILTER_PREC;
>      int offset = 1 << (shift - 1);
>      uint16_t maxVal = (1 << X265_DEPTH) - 1;
> @@ -201,9 +201,9 @@
>  }
>  
>  template<int N, int width, int height>
> -void interp_vert_ps_c(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
> +void interp_vert_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
>  {
> -    int16_t const * c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
> +    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
>      int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
>      int shift = IF_FILTER_PREC - headRoom;
>      int offset = -IF_INTERNAL_OFFS << shift;
> @@ -239,13 +239,13 @@
>  }
>  
>  template<int N, int width, int height>
> -void interp_vert_sp_c(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
> +void interp_vert_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
>  {
>      int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
>      int shift = IF_FILTER_PREC + headRoom;
>      int offset = (1 << (shift - 1)) + (IF_INTERNAL_OFFS << IF_FILTER_PREC);
>      uint16_t maxVal = (1 << X265_DEPTH) - 1;
> -    const int16_t *coeff = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
> +    const int16_t* coeff = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
>  
>      src -= (N / 2 - 1) * srcStride;
>  
> @@ -282,9 +282,9 @@
>  }
>  
>  template<int N, int width, int height>
> -void interp_vert_ss_c(int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
> +void interp_vert_ss_c(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
>  {
> -    const int16_t *const c = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
> +    const int16_t* c = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
>      int shift = IF_FILTER_PREC;
>      int row, col;
>  
> @@ -317,13 +317,13 @@
>  }
>  
>  template<int N>
> -void filterVertical_sp_c(int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int coeffIdx)
> +void filterVertical_sp_c(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int coeffIdx)
>  {
>      int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
>      int shift = IF_FILTER_PREC + headRoom;
>      int offset = (1 << (shift - 1)) + (IF_INTERNAL_OFFS << IF_FILTER_PREC);
>      uint16_t maxVal = (1 << X265_DEPTH) - 1;
> -    const int16_t *coeff = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
> +    const int16_t* coeff = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
>  
>      src -= (N / 2 - 1) * srcStride;
>  
> @@ -360,7 +360,7 @@
>  }
>  
>  template<int N, int width, int height>
> -void interp_hv_pp_c(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int idxX, int idxY)
> +void interp_hv_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
>  {
>      short immedVals[(64 + 8) * (64 + 8)];
>  
> diff -r 8191e0d02455 -r 9578af829f2a source/common/picyuv.h
> --- a/source/common/picyuv.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/picyuv.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -76,12 +76,21 @@
>      pixel*  getCrAddr(uint32_t ctuAddr)                        { return m_picOrg[2] + m_cuOffsetC[ctuAddr]; }
>      pixel*  getChromaAddr(uint32_t chromaId, uint32_t ctuAddr) { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr]; }
>      pixel*  getPlaneAddr(uint32_t plane, uint32_t ctuAddr)     { return m_picOrg[plane] + (plane ? m_cuOffsetC[ctuAddr] : m_cuOffsetY[ctuAddr]); }
> +    const pixel* getLumaAddr(uint32_t ctuAddr) const           { return m_picOrg[0] + m_cuOffsetY[ctuAddr]; }
> +    const pixel* getCbAddr(uint32_t ctuAddr) const             { return m_picOrg[1] + m_cuOffsetC[ctuAddr]; }
> +    const pixel* getCrAddr(uint32_t ctuAddr) const             { return m_picOrg[2] + m_cuOffsetC[ctuAddr]; }
> +    const pixel* getChromaAddr(uint32_t chromaId, uint32_t ctuAddr) const { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr]; }
> +    const pixel* getPlaneAddr(uint32_t plane, uint32_t ctuAddr) const     { return m_picOrg[plane] + (plane ? m_cuOffsetC[ctuAddr] : m_cuOffsetY[ctuAddr]); }
>  
>      /* get pointer to CU start address */
>      pixel*  getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) { return m_picOrg[0] + m_cuOffsetY[ctuAddr] + m_buOffsetY[absPartIdx]; }
>      pixel*  getCbAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return m_picOrg[1] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
>      pixel*  getCrAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return m_picOrg[2] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
>      pixel*  getChromaAddr(uint32_t chromaId, uint32_t ctuAddr, uint32_t absPartIdx) { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
> +    const pixel* getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) const { return m_picOrg[0] + m_cuOffsetY[ctuAddr] + m_buOffsetY[absPartIdx]; }
> +    const pixel* getCbAddr(uint32_t ctuAddr, uint32_t absPartIdx) const   { return m_picOrg[1] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
> +    const pixel* getCrAddr(uint32_t ctuAddr, uint32_t absPartIdx) const   { return m_picOrg[2] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
> +    const pixel* getChromaAddr(uint32_t chromaId, uint32_t ctuAddr, uint32_t absPartIdx) const { return m_picOrg[chromaId] + m_cuOffsetC[ctuAddr] + m_buOffsetC[absPartIdx]; }
>  };
>  
>  void updateChecksum(const pixel* plane, uint32_t& checksumVal, uint32_t height, uint32_t width, intptr_t stride, int row, uint32_t cuHeight);
> diff -r 8191e0d02455 -r 9578af829f2a source/common/pixel.cpp
> --- a/source/common/pixel.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/pixel.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -90,7 +90,7 @@
>  // place functions in anonymous namespace (file static)
>  
>  template<int lx, int ly>
> -int sad(pixel *pix1, intptr_t stride_pix1, pixel *pix2, intptr_t stride_pix2)
> +int sad(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
>  {
>      int sum = 0;
>  
> @@ -109,7 +109,7 @@
>  }
>  
>  template<int lx, int ly>
> -int sad(int16_t *pix1, intptr_t stride_pix1, int16_t *pix2, intptr_t stride_pix2)
> +int sad(const int16_t* pix1, intptr_t stride_pix1, const int16_t* pix2, intptr_t stride_pix2)
>  {
>      int sum = 0;
>  
> @@ -128,7 +128,7 @@
>  }
>  
>  template<int lx, int ly>
> -void sad_x3(pixel *pix1, pixel *pix2, pixel *pix3, pixel *pix4, intptr_t frefstride, int32_t *res)
> +void sad_x3(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, intptr_t frefstride, int32_t* res)
>  {
>      res[0] = 0;
>      res[1] = 0;
> @@ -150,7 +150,7 @@
>  }
>  
>  template<int lx, int ly>
> -void sad_x4(pixel *pix1, pixel *pix2, pixel *pix3, pixel *pix4, pixel *pix5, intptr_t frefstride, int32_t *res)
> +void sad_x4(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, const pixel* pix5, intptr_t frefstride, int32_t* res)
>  {
>      res[0] = 0;
>      res[1] = 0;
> @@ -175,7 +175,7 @@
>  }
>  
>  template<int lx, int ly, class T1, class T2>
> -int sse(T1 *pix1, intptr_t stride_pix1, T2 *pix2, intptr_t stride_pix2)
> +int sse(T1* pix1, intptr_t stride_pix1, T2* pix2, intptr_t stride_pix2)
>  {
>      int sum = 0;
>      int iTemp;
> @@ -217,7 +217,7 @@
>      return (a + s) ^ s;
>  }
>  
> -int satd_4x4(pixel *pix1, intptr_t stride_pix1, pixel *pix2, intptr_t stride_pix2)
> +int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
>  {
>      sum2_t tmp[4][2];
>      sum2_t a0, a1, a2, a3, b0, b1;
> @@ -245,7 +245,7 @@
>      return (int)(sum >> 1);
>  }
>  
> -int satd_4x4(int16_t *pix1, intptr_t stride_pix1, int16_t *pix2, intptr_t stride_pix2)
> +int satd_4x4(const int16_t* pix1, intptr_t stride_pix1, const int16_t* pix2, intptr_t stride_pix2)
>  {
>      ssum2_t tmp[4][2];
>      ssum2_t a0, a1, a2, a3, b0, b1;
> @@ -274,7 +274,7 @@
>  }
>  
>  // x264's SWAR version of satd 8x4, performs two 4x4 SATDs at once
> -int satd_8x4(pixel *pix1, intptr_t stride_pix1, pixel *pix2, intptr_t stride_pix2)
> +int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
>  {
>      sum2_t tmp[4][4];
>      sum2_t a0, a1, a2, a3;
> @@ -300,7 +300,7 @@
>  
>  template<int w, int h>
>  // calculate satd in blocks of 4x4
> -int satd4(pixel *pix1, intptr_t stride_pix1, pixel *pix2, intptr_t stride_pix2)
> +int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
>  {
>      int satd = 0;
>  
> @@ -318,7 +318,7 @@
>  
>  template<int w, int h>
>  // calculate satd in blocks of 8x4
> -int satd8(pixel *pix1, intptr_t stride_pix1, pixel *pix2, intptr_t stride_pix2)
> +int satd8(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
>  {
>      int satd = 0;
>  
> @@ -334,7 +334,7 @@
>      return satd;
>  }
>  
> -inline int _sa8d_8x8(pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2)
> +inline int _sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
>  {
>      sum2_t tmp[8][4];
>      sum2_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
> @@ -371,12 +371,12 @@
>      return (int)sum;
>  }
>  
> -int sa8d_8x8(pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2)
> +int sa8d_8x8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
>  {
>      return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2);
>  }
>  
> -inline int _sa8d_8x8(int16_t *pix1, intptr_t i_pix1, int16_t *pix2, intptr_t i_pix2)
> +inline int _sa8d_8x8(const int16_t* pix1, intptr_t i_pix1, const int16_t* pix2, intptr_t i_pix2)
>  {
>      ssum2_t tmp[8][4];
>      ssum2_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
> @@ -413,12 +413,12 @@
>      return (int)sum;
>  }
>  
> -int sa8d_8x8(int16_t *pix1, intptr_t i_pix1, int16_t *pix2, intptr_t i_pix2)
> +int sa8d_8x8(const int16_t* pix1, intptr_t i_pix1, const int16_t* pix2, intptr_t i_pix2)
>  {
>      return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2);
>  }
>  
> -int sa8d_16x16(pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2)
> +int sa8d_16x16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
>  {
>      int sum = _sa8d_8x8(pix1, i_pix1, pix2, i_pix2)
>          + _sa8d_8x8(pix1 + 8, i_pix1, pix2 + 8, i_pix2)
> @@ -432,7 +432,7 @@
>  
>  template<int w, int h>
>  // Calculate sa8d in blocks of 8x8
> -int sa8d8(pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2)
> +int sa8d8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
>  {
>      int cost = 0;
>  
> @@ -449,7 +449,7 @@
>  
>  template<int w, int h>
>  // Calculate sa8d in blocks of 16x16
> -int sa8d16(pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2)
> +int sa8d16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
>  {
>      int cost = 0;
>  
> @@ -465,7 +465,7 @@
>  }
>  
>  template<int size>
> -int pixel_ssd_s_c(short *a, intptr_t dstride)
> +int pixel_ssd_s_c(const int16_t* a, intptr_t dstride)
>  {
>      int sum = 0;
>      for (int y = 0; y < size; y++)
> @@ -480,7 +480,7 @@
>  }
>  
>  template<int size>
> -void blockfil_s_c(int16_t *dst, intptr_t dstride, int16_t val)
> +void blockfil_s_c(int16_t* dst, intptr_t dstride, int16_t val)
>  {
>      for (int y = 0; y < size; y++)
>      {
> @@ -491,7 +491,7 @@
>      }
>  }
>  
> -void convert16to32_shl(int32_t *dst, int16_t *src, intptr_t stride, int shift, int size)
> +void convert16to32_shl(int32_t* dst, const int16_t* src, intptr_t stride, int shift, int size)
>  {
>      for (int i = 0; i < size; i++)
>      {
> @@ -503,7 +503,7 @@
>  }
>  
>  template<int size>
> -void convert16to32_shr(int32_t *dst, int16_t *src, intptr_t stride, int shift, int offset)
> +void convert16to32_shr(int32_t* dst, const int16_t* src, intptr_t stride, int shift, int offset)
>  {
>      for (int i = 0; i < size; i++)
>      {
> @@ -514,7 +514,7 @@
>      }
>  }
>  
> -void convert32to16_shr(int16_t *dst, int32_t *src, intptr_t stride, int shift, int size)
> +void convert32to16_shr(int16_t* dst, const int32_t* src, intptr_t stride, int shift, int size)
>  {
>      int round = 1 << (shift - 1);
>  
> @@ -530,7 +530,7 @@
>      }
>  }
>  
> -void copy_shr(int16_t *dst, int16_t *src, intptr_t stride, int shift, int size)
> +void copy_shr(int16_t* dst, const int16_t* src, intptr_t stride, int shift, int size)
>  {
>      int round = 1 << (shift - 1);
>  
> @@ -547,7 +547,7 @@
>  }
>  
>  template<int size>
> -void convert32to16_shl(int16_t *dst, int32_t *src, intptr_t stride, int shift)
> +void convert32to16_shl(int16_t* dst, const int32_t* src, intptr_t stride, int shift)
>  {
>      for (int i = 0; i < size; i++)
>      {
> @@ -562,7 +562,7 @@
>  }
>  
>  template<int size>
> -void copy_shl(int16_t *dst, int16_t *src, intptr_t stride, int shift)
> +void copy_shl(int16_t* dst, const int16_t* src, intptr_t stride, int shift)
>  {
>      for (int i = 0; i < size; i++)
>      {
> @@ -577,7 +577,7 @@
>  }
>  
>  template<int blockSize>
> -void getResidual(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride)
> +void getResidual(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride)
>  {
>      for (int y = 0; y < blockSize; y++)
>      {
> @@ -593,7 +593,7 @@
>  }
>  
>  template<int blockSize>
> -void transpose(pixel* dst, pixel* src, intptr_t stride)
> +void transpose(pixel* dst, const pixel* src, intptr_t stride)
>  {
>      for (int k = 0; k < blockSize; k++)
>      {
> @@ -604,7 +604,7 @@
>      }
>  }
>  
> -void weight_sp_c(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
> +void weight_sp_c(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
>  {
>      int x, y;
>  
> @@ -622,7 +622,7 @@
>      }
>  }
>  
> -void weight_pp_c(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
> +void weight_pp_c(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
>  {
>      int x, y;
>  
> @@ -646,7 +646,7 @@
>  }
>  
>  template<int lx, int ly>
> -void pixelavg_pp(pixel* dst, intptr_t dstride, pixel* src0, intptr_t sstride0, pixel* src1, intptr_t sstride1, int)
> +void pixelavg_pp(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int)
>  {
>      for (int y = 0; y < ly; y++)
>      {
> @@ -661,7 +661,7 @@
>      }
>  }
>  
> -void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
> +void scale1D_128to64(pixel* dst, const pixel* src, intptr_t /*stride*/)
>  {
>      int x;
>  
> @@ -675,9 +675,9 @@
>      }
>  }
>  
> -void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride)
> +void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
>  {
> -    int x, y;
> +    uint32_t x, y;
>  
>      for (y = 0; y < 64; y += 2)
>      {
> @@ -694,13 +694,13 @@
>      }
>  }
>  
> -void frame_init_lowres_core(pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
> +void frame_init_lowres_core(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc,
>                              intptr_t src_stride, intptr_t dst_stride, int width, int height)
>  {
>      for (int y = 0; y < height; y++)
>      {
> -        pixel *src1 = src0 + src_stride;
> -        pixel *src2 = src1 + src_stride;
> +        const pixel* src1 = src0 + src_stride;
> +        const pixel* src2 = src1 + src_stride;
>          for (int x = 0; x < width; x++)
>          {
>              // slower than naive bilinear, but matches asm
> @@ -720,7 +720,7 @@
>  }
>  
>  /* structural similarity metric */
> -void ssim_4x4x2_core(const pixel *pix1, intptr_t stride1, const pixel *pix2, intptr_t stride2, int sums[2][4])
> +void ssim_4x4x2_core(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4])
>  {
>      for (int z = 0; z < 2; z++)
>      {
> @@ -794,7 +794,7 @@
>  }
>  
>  template<int size>
> -uint64_t pixel_var(pixel *pix, intptr_t i_stride)
> +uint64_t pixel_var(const pixel* pix, intptr_t i_stride)
>  {
>      uint32_t sum = 0, sqr = 0;
>  
> @@ -817,7 +817,7 @@
>  #endif
>  
>  template<int size>
> -int psyCost_pp(pixel *source, intptr_t sstride, pixel *recon, intptr_t rstride)
> +int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride)
>  {
>      static pixel zeroBuf[8] /* = { 0 } */;
>  
> @@ -850,7 +850,7 @@
>  }
>  
>  template<int size>
> -int psyCost_ss(int16_t *source, intptr_t sstride, int16_t *recon, intptr_t rstride)
> +int psyCost_ss(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride)
>  {
>      static int16_t zeroBuf[8] /* = { 0 } */;
>  
> @@ -882,8 +882,8 @@
>      }
>  }
>  
> -void plane_copy_deinterleave_chroma(pixel *dstu, intptr_t dstuStride, pixel *dstv, intptr_t dstvStride,
> -                                    pixel *src,  intptr_t srcStride, int w, int h)
> +void plane_copy_deinterleave_chroma(pixel* dstu, intptr_t dstuStride, pixel* dstv, intptr_t dstvStride,
> +                                    const pixel* src,  intptr_t srcStride, int w, int h)
>  {
>      for (int y = 0; y < h; y++, dstu += dstuStride, dstv += dstvStride, src += srcStride)
>      {
> @@ -896,7 +896,7 @@
>  }
>  
>  template<int bx, int by>
> -void blockcopy_pp_c(pixel *a, intptr_t stridea, pixel *b, intptr_t strideb)
> +void blockcopy_pp_c(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb)
>  {
>      for (int y = 0; y < by; y++)
>      {
> @@ -911,7 +911,7 @@
>  }
>  
>  template<int bx, int by>
> -void blockcopy_ss_c(int16_t *a, intptr_t stridea, int16_t *b, intptr_t strideb)
> +void blockcopy_ss_c(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb)
>  {
>      for (int y = 0; y < by; y++)
>      {
> @@ -926,7 +926,7 @@
>  }
>  
>  template<int bx, int by>
> -void blockcopy_sp_c(pixel *a, intptr_t stridea, int16_t *b, intptr_t strideb)
> +void blockcopy_sp_c(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb)
>  {
>      for (int y = 0; y < by; y++)
>      {
> @@ -942,7 +942,7 @@
>  }
>  
>  template<int bx, int by>
> -void blockcopy_ps_c(int16_t *a, intptr_t stridea, pixel *b, intptr_t strideb)
> +void blockcopy_ps_c(int16_t* a, intptr_t stridea, const pixel* b, intptr_t strideb)
>  {
>      for (int y = 0; y < by; y++)
>      {
> @@ -957,7 +957,7 @@
>  }
>  
>  template<int bx, int by>
> -void pixel_sub_ps_c(int16_t *a, intptr_t dstride, pixel *b0, pixel *b1, intptr_t sstride0, intptr_t sstride1)
> +void pixel_sub_ps_c(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1)
>  {
>      for (int y = 0; y < by; y++)
>      {
> @@ -973,7 +973,7 @@
>  }
>  
>  template<int bx, int by>
> -void pixel_add_ps_c(pixel *a, intptr_t dstride, pixel *b0, int16_t *b1, intptr_t sstride0, intptr_t sstride1)
> +void pixel_add_ps_c(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1)
>  {
>      for (int y = 0; y < by; y++)
>      {
> @@ -989,7 +989,7 @@
>  }
>  
>  template<int bx, int by>
> -void addAvg(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
> +void addAvg(const int16_t* src0, const int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
>  {
>      int shiftNum, offset;
>  
> @@ -1010,7 +1010,7 @@
>      }
>  }
>  
> -void planecopy_cp_c(uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift)
> +void planecopy_cp_c(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
>  {
>      for (int r = 0; r < height; r++)
>      {
> @@ -1024,7 +1024,7 @@
>      }
>  }
>  
> -void planecopy_sp_c(uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
> +void planecopy_sp_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
>  {
>      for (int r = 0; r < height; r++)
>      {
> @@ -1040,8 +1040,8 @@
>  
>  /* Estimate the total amount of influence on future quality that could be had if we
>   * were to improve the reference samples used to inter predict any given CU. */
> -void estimateCUPropagateCost(int *dst, uint16_t *propagateIn, int32_t *intraCosts, uint16_t *interCosts,
> -                             int32_t *invQscales, double *fpsFactor, int len)
> +void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts,
> +                             const int32_t* invQscales, const double* fpsFactor, int len)
>  {
>      double fps = *fpsFactor / 256;
>  
> @@ -1068,12 +1068,12 @@
>      primitives.extendRowBorder(pic, stride, width, height, marginX);
>  
>      /* copy top row to create above margin */
> -    pixel *top = pic - marginX;
> +    pixel* top = pic - marginX;
>      for (int y = 0; y < marginY; y++)
>          memcpy(top - (y + 1) * stride, top, stride * sizeof(pixel));
>  
>      /* copy bottom row to create below margin */
> -    pixel *bot = pic - marginX + (height - 1) * stride;
> +    pixel* bot = pic - marginX + (height - 1) * stride;
>      for (int y = 0; y < marginY; y++)
>          memcpy(bot + (y + 1) * stride, bot, stride * sizeof(pixel));
>  }
> diff -r 8191e0d02455 -r 9578af829f2a source/common/predict.cpp
> --- a/source/common/predict.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/predict.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -83,7 +83,8 @@
>  {
>      int tuSize = 1 << log2TrSize;
>  
> -    pixel *refLft, *refAbv;
> +    pixel* refLft;
> +    pixel* refAbv;
>  
>      if (!(g_intraFilterFlags[dirMode] & tuSize))
>      {
> @@ -321,13 +322,13 @@
>  
>  void Predict::predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
>  {
> -    pixel *dst = dstYuv.getLumaAddr(m_puAbsPartIdx);
> +    pixel* dst = dstYuv.getLumaAddr(m_puAbsPartIdx);
>      intptr_t dstStride = dstYuv.m_size;
>  
>      intptr_t srcStride = refPic.m_stride;
>      intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
>      int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
> -    pixel* src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
> +    const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
>  
>      int xFrac = mv.x & 0x3;
>      int yFrac = mv.y & 0x3;
> @@ -350,12 +351,12 @@
>  
>  void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
>  {
> -    int16_t *dst = dstSYuv.getLumaAddr(m_puAbsPartIdx);
> +    int16_t* dst = dstSYuv.getLumaAddr(m_puAbsPartIdx);
>      int dstStride = dstSYuv.m_size;
>  
>      intptr_t srcStride = refPic.m_stride;
>      intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
> -    pixel *src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
> +    const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset;
>  
>      int xFrac = mv.x & 0x3;
>      int yFrac = mv.y & 0x3;
> @@ -391,8 +392,8 @@
>  
>      intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
>  
> -    pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
> -    pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
> +    const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
> +    const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
>  
>      pixel* dstCb = dstYuv.getCbAddr(m_puAbsPartIdx);
>      pixel* dstCr = dstYuv.getCrAddr(m_puAbsPartIdx);
> @@ -441,8 +442,8 @@
>  
>      intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
>  
> -    pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
> -    pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
> +    const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
> +    const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset;
>  
>      int16_t* dstCb = dstSYuv.getCbAddr(m_puAbsPartIdx);
>      int16_t* dstCr = dstSYuv.getCrAddr(m_puAbsPartIdx);
> @@ -492,20 +493,12 @@
>      int w0, w1, offset, shiftNum, shift, round;
>      uint32_t src0Stride, src1Stride, dststride;
>  
> -    pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
> -    pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
> -    pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
> -
> -    const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx);
> -    const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx);
> -    const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx);
> -
> -    const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx);
> -    const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx);
> -    const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx);
> -
>      if (bLuma)
>      {
> +        pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
> +        const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx);
> +        const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx);
> +
>          // Luma
>          w0      = wp0[0].w;
>          offset  = wp0[0].o + wp1[0].o;
> @@ -542,6 +535,13 @@
>  
>      if (bChroma)
>      {
> +        pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
> +        pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
> +        const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx);
> +        const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx);
> +        const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx);
> +        const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx);
> +
>          // Chroma U
>          w0      = wp0[1].w;
>          offset  = wp0[1].o + wp1[1].o;
> @@ -602,19 +602,14 @@
>  /* weighted averaging for uni-pred */
>  void Predict::addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const
>  {
> -    pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
> -    pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
> -    pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
> -
> -    const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx);
> -    const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx);
> -    const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx);
> -
>      int w0, offset, shiftNum, shift, round;
>      uint32_t srcStride, dstStride;
>  
>      if (bLuma)
>      {
> +        pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx);
> +        const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx);
> +
>          // Luma
>          w0      = wp[0].w;
>          offset  = wp[0].offset;
> @@ -624,11 +619,16 @@
>          srcStride = srcYuv.m_size;
>          dstStride = predYuv.m_size;
>  
> -        primitives.weight_sp(const_cast<int16_t*>(srcY0), dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset);
> +        primitives.weight_sp(srcY0, dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset);
>      }
>  
>      if (bChroma)
>      {
> +        pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx);
> +        pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx);
> +        const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx);
> +        const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx);
> +
>          // Chroma U
>          w0      = wp[1].w;
>          offset  = wp[1].offset;
> @@ -642,7 +642,7 @@
>          uint32_t cwidth = m_puWidth >> srcYuv.m_hChromaShift;
>          uint32_t cheight = m_puHeight >> srcYuv.m_vChromaShift;
>  
> -        primitives.weight_sp(const_cast<int16_t*>(srcU0), dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
> +        primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
>  
>          // Chroma V
>          w0     = wp[2].w;
> @@ -650,7 +650,7 @@
>          shift  = wp[2].shift + shiftNum;
>          round  = shift ? (1 << (shift - 1)) : 0;
>  
> -        primitives.weight_sp(const_cast<int16_t*>(srcV0), dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
> +        primitives.weight_sp(srcV0, dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
>      }
>  }
>  
> @@ -765,7 +765,7 @@
>      }
>  
>      int   numIntraNeighbor = 0;
> -    bool *bNeighborFlags = intraNeighbors->bNeighborFlags;
> +    bool* bNeighborFlags = intraNeighbors->bNeighborFlags;
>  
>      uint32_t partIdxLT, partIdxRT, partIdxLB;
>  
> @@ -829,15 +829,15 @@
>      }
>      else // reference samples are partially available
>      {
> -        const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
> -        const bool *pNeighborFlags;
> +        const bool* bNeighborFlags = intraNeighbors.bNeighborFlags;
> +        const bool* pNeighborFlags;
>          int aboveUnits = intraNeighbors.aboveUnits;
>          int leftUnits = intraNeighbors.leftUnits;
>          int unitWidth = intraNeighbors.unitWidth;
>          int unitHeight = intraNeighbors.unitHeight;
>          int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
>          pixel adiLineBuffer[5 * MAX_CU_SIZE];
> -        pixel *adi;
> +        pixel* adi;
>  
>          // Initialize
>          for (int i = 0; i < totalSamples; i++)
> @@ -893,7 +893,7 @@
>              while (next < totalUnits && !bNeighborFlags[next])
>                  next++;
>  
> -            pixel *pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth)));
> +            pixel* pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth)));
>              const pixel refSample = *pAdiLineNext;
>              // Pad unavailable samples with new value
>              int nextOrTop = X265_MIN(next, leftUnits);
> @@ -959,12 +959,12 @@
>          return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft);
>  }
>  
> -int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags)
> +int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
>  {
>      const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
>      const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1;
>      const uint32_t idxStep = 1;
> -    bool *validFlagPtr = bValidFlags;
> +    bool* validFlagPtr = bValidFlags;
>      int numIntra = 0;
>  
>      for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
> @@ -985,12 +985,12 @@
>      return numIntra;
>  }
>  
> -int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags)
> +int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
>  {
>      const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
>      const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1;
>      const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;
> -    bool *validFlagPtr = bValidFlags;
> +    bool* validFlagPtr = bValidFlags;
>      int numIntra = 0;
>  
>      for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep)
> @@ -1011,10 +1011,10 @@
>      return numIntra;
>  }
>  
> -int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags)
> +int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
>  {
>      const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1;
> -    bool *validFlagPtr = bValidFlags;
> +    bool* validFlagPtr = bValidFlags;
>      int numIntra = 0;
>  
>      for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
> @@ -1035,10 +1035,10 @@
>      return numIntra;
>  }
>  
> -int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags)
> +int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
>  {
>      const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1;
> -    bool *validFlagPtr = bValidFlags;
> +    bool* validFlagPtr = bValidFlags;
>      int numIntra = 0;
>  
>      for (uint32_t offset = 1; offset <= numUnitsInPU; offset++)
> diff -r 8191e0d02455 -r 9578af829f2a source/common/primitives.h
> --- a/source/common/primitives.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/primitives.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -132,74 +132,74 @@
>      return log2Size - 2;
>  }
>  
> -typedef int  (*pixelcmp_t)(pixel *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride); // fenc is aligned
> -typedef int  (*pixelcmp_ss_t)(int16_t *fenc, intptr_t fencstride, int16_t *fref, intptr_t frefstride);
> -typedef int  (*pixelcmp_sp_t)(int16_t *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride);
> -typedef int  (*pixel_ssd_s_t)(int16_t *fenc, intptr_t fencstride);
> -typedef void (*pixelcmp_x4_t)(pixel *fenc, pixel *fref0, pixel *fref1, pixel *fref2, pixel *fref3, intptr_t frefstride, int32_t *res);
> -typedef void (*pixelcmp_x3_t)(pixel *fenc, pixel *fref0, pixel *fref1, pixel *fref2, intptr_t frefstride, int32_t *res);
> -typedef void (*blockcpy_sp_t)(int bx, int by, int16_t *dst, intptr_t dstride, pixel *src, intptr_t sstride); // dst is aligned
> -typedef void (*blockcpy_sc_t)(int bx, int by, int16_t *dst, intptr_t dstride, uint8_t *src, intptr_t sstride); // dst is aligned
> -typedef void (*pixelsub_ps_t)(int bx, int by, int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
> -typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, intptr_t sstride0, pixel *src1, intptr_t sstride1, int weight);
> -typedef void (*blockfill_s_t)(int16_t *dst, intptr_t dstride, int16_t val);
> +typedef int  (*pixelcmp_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
> +typedef int  (*pixelcmp_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
> +typedef int  (*pixelcmp_sp_t)(const int16_t* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride);
> +typedef int  (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);
> +typedef void (*pixelcmp_x4_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
> +typedef void (*pixelcmp_x3_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
> +typedef void (*blockcpy_sp_t)(int bx, int by, int16_t* dst, intptr_t dstride, const pixel* src, intptr_t sstride); // dst is aligned
> +typedef void (*blockcpy_sc_t)(int bx, int by, int16_t* dst, intptr_t dstride, const uint8_t* src, intptr_t sstride); // dst is aligned
> +typedef void (*pixelsub_ps_t)(int bx, int by, int16_t* dst, intptr_t dstride, const pixel* src0, const pixel* src1, intptr_t sstride0, intptr_t sstride1);
> +typedef void (*pixelavg_pp_t)(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int weight);
> +typedef void (*blockfill_s_t)(int16_t* dst, intptr_t dstride, int16_t val);
>  
> -typedef void (*intra_pred_t)(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter);
> -typedef void (*intra_allangs_t)(pixel *dst, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma);
> +typedef void (*intra_pred_t)(pixel* dst, intptr_t dstStride, pixel* refLeft, pixel* refAbove, int dirMode, int bFilter);
> +typedef void (*intra_allangs_t)(pixel* dst, pixel* above0, pixel* left0, pixel* above1, pixel* left1, int bLuma);
>  
> -typedef void (*cvt16to32_shl_t)(int32_t *dst, int16_t *src, intptr_t, int, int);
> -typedef void (*cvt16to32_shr_t)(int32_t *dst, int16_t *src, intptr_t, int, int);
> -typedef void (*cvt32to16_shr_t)(int16_t *dst, int32_t *src, intptr_t, int, int);
> -typedef void (*cvt32to16_shl_t)(int16_t *dst, int32_t *src, intptr_t, int);
> -typedef uint32_t (*copy_cnt_t)(int16_t* coeff, int16_t* residual, intptr_t stride);
> -typedef void (*copy_shr_t)(int16_t *dst, int16_t *src, intptr_t stride, int shift, int size);
> -typedef void (*copy_shl_t)(int16_t *dst, int16_t *src, intptr_t stride, int shift);
> +typedef void (*cvt16to32_shl_t)(int32_t* dst, const int16_t* src, intptr_t, int, int);
> +typedef void (*cvt16to32_shr_t)(int32_t* dst, const int16_t* src, intptr_t, int, int);
> +typedef void (*cvt32to16_shr_t)(int16_t* dst, const int32_t* src, intptr_t, int, int);
> +typedef void (*cvt32to16_shl_t)(int16_t* dst, const int32_t* src, intptr_t, int);
> +typedef uint32_t (*copy_cnt_t)(int16_t* coeff, const int16_t* residual, intptr_t stride);
> +typedef void (*copy_shr_t)(int16_t* dst, const int16_t* src, intptr_t stride, int shift, int size);
> +typedef void (*copy_shl_t)(int16_t* dst, const int16_t* src, intptr_t stride, int shift);
>  
> -typedef void (*dct_t)(int16_t *src, int32_t *dst, intptr_t stride);
> -typedef void (*idct_t)(int32_t *src, int16_t *dst, intptr_t stride);
> -typedef void (*denoiseDct_t)(int32_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff);
> +typedef void (*dct_t)(const int16_t* src, int32_t* dst, intptr_t stride);
> +typedef void (*idct_t)(const int32_t* src, int16_t* dst, intptr_t stride);
> +typedef void (*denoiseDct_t)(int32_t* dctCoef, uint32_t* resSum, const uint16_t* offset, int numCoeff);
>  
> -typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> -typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
> -typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
> -typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
> -typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
> +typedef void (*calcresidual_t)(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
> +typedef void (*transpose_t)(pixel* dst, const pixel* src, intptr_t stride);
> +typedef uint32_t (*quant_t)(const int32_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
> +typedef uint32_t (*nquant_t)(const int32_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
> +typedef void (*dequant_scaling_t)(const int16_t* src, const int32_t* dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
>  typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> -typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
> +typedef int  (*count_nonzero_t)(const int16_t* quantCoeff, int numCoeff);
>  
> -typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> -typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> -typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
> -typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,
> +typedef void (*weightp_pp_t)(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> +typedef void (*weightp_sp_t)(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> +typedef void (*scale_t)(pixel* dst, const pixel* src, intptr_t stride);
> +typedef void (*downscale_t)(const pixel* src0, pixel* dstf, pixel* dsth, pixel* dstv, pixel* dstc,
>                              intptr_t src_stride, intptr_t dst_stride, int width, int height);
>  typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int width, int height, int marginX);
> -typedef void (*ssim_4x4x2_core_t)(const pixel *pix1, intptr_t stride1, const pixel *pix2, intptr_t stride2, int sums[2][4]);
> +typedef void (*ssim_4x4x2_core_t)(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]);
>  typedef float (*ssim_end4_t)(int sum0[5][4], int sum1[5][4], int width);
> -typedef uint64_t (*var_t)(pixel *pix, intptr_t stride);
> -typedef void (*plane_copy_deinterleave_t)(pixel *dstu, intptr_t dstuStride, pixel *dstv, intptr_t dstvStride, pixel *src, intptr_t srcStride, int w, int h);
> +typedef uint64_t (*var_t)(const pixel* pix, intptr_t stride);
> +typedef void (*plane_copy_deinterleave_t)(pixel* dstu, intptr_t dstuStride, pixel* dstv, intptr_t dstvStride, const pixel* src, intptr_t srcStride, int w, int h);
>  
> -typedef void (*filter_pp_t) (pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> -typedef void (*filter_hps_t) (pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx, int isRowExt);
> -typedef void (*filter_ps_t) (pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx);
> -typedef void (*filter_sp_t) (int16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx);
> -typedef void (*filter_ss_t) (int16_t *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx);
> -typedef void (*filter_hv_pp_t) (pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int idxX, int idxY);
> -typedef void (*filter_p2s_t)(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
> +typedef void (*filter_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
> +typedef void (*filter_hps_t) (const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
> +typedef void (*filter_ps_t) (const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
> +typedef void (*filter_sp_t) (const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
> +typedef void (*filter_ss_t) (const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
> +typedef void (*filter_hv_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
> +typedef void (*filter_p2s_t)(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
>  
> -typedef void (*copy_pp_t)(pixel *dst, intptr_t dstride, pixel *src, intptr_t sstride); // dst is aligned
> -typedef void (*copy_sp_t)(pixel *dst, intptr_t dstStride, int16_t *src, intptr_t srcStride);
> -typedef void (*copy_ps_t)(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
> -typedef void (*copy_ss_t)(int16_t *dst, intptr_t dstStride, int16_t *src, intptr_t srcStride);
> +typedef void (*copy_pp_t)(pixel* dst, intptr_t dstride, const pixel* src, intptr_t sstride); // dst is aligned
> +typedef void (*copy_sp_t)(pixel* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
> +typedef void (*copy_ps_t)(int16_t* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
> +typedef void (*copy_ss_t)(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
>  
> -typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
> -typedef void (*pixel_add_ps_t)(pixel *a, intptr_t dstride, pixel *b0, int16_t *b1, intptr_t sstride0, intptr_t sstride1);
> -typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
> +typedef void (*pixel_sub_ps_t)(int16_t* dst, intptr_t dstride, const pixel* src0, const pixel* src1, intptr_t sstride0, intptr_t sstride1);
> +typedef void (*pixel_add_ps_t)(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1);
> +typedef void (*addAvg_t)(const int16_t* src0, const int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
>  
> -typedef void (*saoCuOrgE0_t)(pixel * rec, int8_t * offsetEo, int width, int8_t signLeft);
> -typedef void (*planecopy_cp_t) (uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift);
> -typedef void (*planecopy_sp_t) (uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
> +typedef void (*saoCuOrgE0_t)(pixel* rec, int8_t*  offsetEo, int width, int8_t signLeft);
> +typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
> +typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
>  
> -typedef void (*cutree_propagate_cost) (int *dst, uint16_t *propagateIn, int32_t *intraCosts, uint16_t *interCosts, int32_t *invQscales, double *fpsFactor, int len);
> +typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);
>  
>  /* Define a structure containing function pointers to optimized encoder
>   * primitives.  Each pointer can reference either an assembly routine,
> diff -r 8191e0d02455 -r 9578af829f2a source/common/quant.cpp
> --- a/source/common/quant.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/quant.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -50,7 +50,7 @@
>      return y + ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1))); // min(x, y)
>  }
>  
> -inline int getICRate(uint32_t absLevel, int32_t diffLevel, const int *greaterOneBits, const int *levelAbsBits, uint32_t absGoRice, uint32_t c1c2Idx)
> +inline int getICRate(uint32_t absLevel, int32_t diffLevel, const int* greaterOneBits, const int* levelAbsBits, uint32_t absGoRice, uint32_t c1c2Idx)
>  {
>      X265_CHECK(c1c2Idx <= 3, "c1c2Idx check failure\n");
>      X265_CHECK(absGoRice <= 4, "absGoRice check failure\n");
> @@ -106,7 +106,7 @@
>  }
>  
>  /* Calculates the cost for specific absolute transform level */
> -inline uint32_t getICRateCost(uint32_t absLevel, int32_t diffLevel, const int *greaterOneBits, const int *levelAbsBits, uint32_t absGoRice, uint32_t c1c2Idx)
> +inline uint32_t getICRateCost(uint32_t absLevel, int32_t diffLevel, const int* greaterOneBits, const int* levelAbsBits, uint32_t absGoRice, uint32_t c1c2Idx)
>  {
>      X265_CHECK(absLevel, "absLevel should not be zero\n");
>  
> @@ -216,7 +216,7 @@
>  uint32_t Quant::signBitHidingHDQ(int16_t* coeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codeParams)
>  {
>      const uint32_t log2TrSizeCG = codeParams.log2TrSizeCG;
> -    const uint16_t *scan = codeParams.scan;
> +    const uint16_t* scan = codeParams.scan;
>      bool lastCG = true;
>  
>      for (int cg = (1 << (log2TrSizeCG * 2)) - 1; cg >= 0; cg--)
> @@ -322,7 +322,7 @@
>      return numSig;
>  }
>  
> -uint32_t Quant::transformNxN(CUData& cu, pixel* fenc, uint32_t fencStride, int16_t* residual, uint32_t stride,
> +uint32_t Quant::transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t stride,
>                               coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
>  {
>      if (cu.m_tqBypass[absPartIdx])
> @@ -389,7 +389,7 @@
>          int scalingListType = ttype + (isLuma ? 3 : 0);
>          int rem = m_qpParam[ttype].rem;
>          int per = m_qpParam[ttype].per;
> -        int32_t *quantCoeff = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
> +        const int32_t* quantCoeff = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
>  
>          int qbits = QUANT_SHIFT + per + transformShift;
>          int add = (cu.m_slice->m_sliceType == I_SLICE ? 171 : 85) << (qbits - 9);
> @@ -408,7 +408,7 @@
>      }
>  }
>  
> -void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, coeff_t* coeff,
> +void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, const coeff_t* coeff,
>                              uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
>  {
>      if (transQuantBypass)
> @@ -427,7 +427,7 @@
>      if (m_scalingList->m_bEnabled)
>      {
>          int scalingListType = (bIntra ? 0 : 3) + ttype;
> -        int32_t *dequantCoef = m_scalingList->m_dequantCoef[log2TrSize - 2][scalingListType][rem];
> +        const int32_t* dequantCoef = m_scalingList->m_dequantCoef[log2TrSize - 2][scalingListType][rem];
>          primitives.dequant_scaling(coeff, dequantCoef, m_resiDctCoeff, numCoeff, per, shift);
>      }
>      else
> @@ -475,7 +475,7 @@
>  
>  /* Rate distortion optimized quantization for entropy coding engines using
>   * probability models like CABAC */
> -uint32_t Quant::rdoQuant(CUData& cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
> +uint32_t Quant::rdoQuant(const CUData& cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
>  {
>      int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
>      int scalingListType = (cu.isIntra(absPartIdx) ? 0 : 3) + ttype;
> @@ -486,7 +486,7 @@
>      int per = m_qpParam[ttype].per;
>      int qbits = QUANT_SHIFT + per + transformShift; /* Right shift of non-RDOQ quantizer level = (coeff*Q + offset)>>q_bits */
>      int add = (1 << (qbits - 1));
> -    int32_t *qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
> +    const int32_t* qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
>  
>      int numCoeff = 1 << (log2TrSize * 2);
>  
> @@ -503,7 +503,7 @@
>      /* unquant constants for measuring distortion. Scaling list quant coefficients have a (1 << 4)
>       * scale applied that must be removed during unquant. Note that in real dequant there is clipping
>       * at several stages. We skip the clipping for simplicity when measuring RD cost */
> -    int32_t *unquantScale = m_scalingList->m_dequantCoef[log2TrSize - 2][scalingListType][rem];
> +    const int32_t* unquantScale = m_scalingList->m_dequantCoef[log2TrSize - 2][scalingListType][rem];
>      int unquantShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift + (m_scalingList->m_bEnabled ? 4 : 0);
>      int unquantRound = (unquantShift > per) ? 1 << (unquantShift - per - 1) : 0;
>      int scaleBits = SCALE_BITS - 2 * transformShift;
> @@ -616,8 +616,8 @@
>                  // coefficient level estimation
>                  const uint32_t oneCtx = 4 * ctxSet + c1;
>                  const uint32_t absCtx = ctxSet + c2;
> -                const int *greaterOneBits = estBitsSbac.greaterOneBits[oneCtx];
> -                const int *levelAbsBits = estBitsSbac.levelAbsBits[absCtx];
> +                const int* greaterOneBits = estBitsSbac.greaterOneBits[oneCtx];
> +                const int* levelAbsBits = estBitsSbac.levelAbsBits[absCtx];
>  
>                  uint16_t level = 0;
>                  uint32_t sigCoefBits = 0;
> diff -r 8191e0d02455 -r 9578af829f2a source/common/quant.h
> --- a/source/common/quant.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/quant.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -104,10 +104,10 @@
>      /* CU setup */
>      void setQPforQuant(const CUData& ctu);
>  
> -    uint32_t transformNxN(CUData& cu, pixel *fenc, uint32_t fencstride, int16_t* residual, uint32_t stride, coeff_t* coeff,
> +    uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencstride, const int16_t* residual, uint32_t stride, coeff_t* coeff,
>                            uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
>  
> -    void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, coeff_t* coeff,
> +    void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, const coeff_t* coeff,
>                           uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
>  
>      /* static methods shared with entropy.cpp */
> @@ -121,7 +121,7 @@
>  
>      uint32_t signBitHidingHDQ(int16_t* qcoeff, int32_t* deltaU, uint32_t numSig, const TUEntropyCodingParameters &codingParameters);
>  
> -    uint32_t rdoQuant(CUData& cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy);
> +    uint32_t rdoQuant(const CUData& cu, int16_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy);
>      inline uint32_t getRateLast(uint32_t posx, uint32_t posy) const;
>  };
>  
> diff -r 8191e0d02455 -r 9578af829f2a source/common/shortyuv.cpp
> --- a/source/common/shortyuv.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/shortyuv.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -84,7 +84,7 @@
>      const int16_t* src = getLumaAddr(absPartIdx);
>      int16_t* dst = dstYuv.getLumaAddr(absPartIdx);
>  
> -    primitives.square_copy_ss[log2Size - 2](dst, dstYuv.m_size, const_cast<int16_t*>(src), m_size);
> +    primitives.square_copy_ss[log2Size - 2](dst, dstYuv.m_size, src, m_size);
>  }
>  
>  void ShortYuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
> @@ -92,7 +92,7 @@
>      const int16_t* src = getLumaAddr(absPartIdx);
>      pixel* dst = dstYuv.getLumaAddr(absPartIdx);
>  
> -    primitives.square_copy_sp[log2Size - 2](dst, dstYuv.m_size, const_cast<int16_t*>(src), m_size);
> +    primitives.square_copy_sp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
>  }
>  
>  void ShortYuv::copyPartToPartChroma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
> @@ -103,8 +103,8 @@
>      int16_t* dstU = dstYuv.getCbAddr(absPartIdx);
>      int16_t* dstV = dstYuv.getCrAddr(absPartIdx);
>  
> -    primitives.chroma[m_csp].copy_ss[part](dstU, dstYuv.m_csize, const_cast<int16_t*>(srcU), m_csize);
> -    primitives.chroma[m_csp].copy_ss[part](dstV, dstYuv.m_csize, const_cast<int16_t*>(srcV), m_csize);
> +    primitives.chroma[m_csp].copy_ss[part](dstU, dstYuv.m_csize, srcU, m_csize);
> +    primitives.chroma[m_csp].copy_ss[part](dstV, dstYuv.m_csize, srcV, m_csize);
>  }
>  
>  void ShortYuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
> @@ -115,6 +115,6 @@
>      pixel* dstU = dstYuv.getCbAddr(absPartIdx);
>      pixel* dstV = dstYuv.getCrAddr(absPartIdx);
>  
> -    primitives.chroma[m_csp].copy_sp[part](dstU, dstYuv.m_csize, const_cast<int16_t*>(srcU), m_csize);
> -    primitives.chroma[m_csp].copy_sp[part](dstV, dstYuv.m_csize, const_cast<int16_t*>(srcV), m_csize);
> +    primitives.chroma[m_csp].copy_sp[part](dstU, dstYuv.m_csize, srcU, m_csize);
> +    primitives.chroma[m_csp].copy_sp[part](dstV, dstYuv.m_csize, srcV, m_csize);
>  }
> diff -r 8191e0d02455 -r 9578af829f2a source/common/vec/dct-sse3.cpp
> --- a/source/common/vec/dct-sse3.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/vec/dct-sse3.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -52,7 +52,7 @@
>      {  83,  36,  83,  36, 83,  36, 83,  36 },
>      {  36, -83,  36, -83, 36, -83, 36, -83 }
>  };
> -void idct8(int32_t *src, int16_t *dst, intptr_t stride)
> +void idct8(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      __m128i m128iS0, m128iS1, m128iS2, m128iS3, m128iS4, m128iS5, m128iS6, m128iS7, m128iAdd, m128Tmp0, m128Tmp1, m128Tmp2, m128Tmp3, E0h, E1h, E2h, E3h, E0l, E1l, E2l, E3l, O0h, O1h, O2h, O3h, O0l, O1l, O2l, O3l, EE0l, EE1l, E00l, E01l, EE0h, EE1h, E00h, E01h;
>      __m128i T00, T01, T02, T03, T04, T05, T06, T07;
> @@ -305,7 +305,7 @@
>      _mm_storeh_pi((__m64*)&dst[7 * stride +  4], _mm_castsi128_ps(T11));
>  }
>  
> -void idct16(int32_t *src, int16_t *dst, intptr_t stride)
> +void idct16(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      const __m128i c16_p87_p90   = _mm_set1_epi32(0x0057005A); //row0 87high - 90low address
>      const __m128i c16_p70_p80   = _mm_set1_epi32(0x00460050);
> @@ -716,7 +716,7 @@
>      _mm_store_si128((__m128i*)&dst[15 * stride + 8], in15[1]);
>  }
>  
> -void idct32(int32_t *src, int16_t *dst, intptr_t stride)
> +void idct32(const int32_t* src, int16_t* dst, intptr_t stride)
>  {
>      //Odd
>      const __m128i c16_p90_p90   = _mm_set1_epi32(0x005A005A); //column 0
> diff -r 8191e0d02455 -r 9578af829f2a source/common/vec/dct-ssse3.cpp
> --- a/source/common/vec/dct-ssse3.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/vec/dct-ssse3.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -100,7 +100,7 @@
>  #undef MAKE_COEF
>  };
>  
> -void dct16(int16_t *src, int32_t *dst, intptr_t stride)
> +void dct16(const int16_t* src, int32_t* dst, intptr_t stride)
>  {
>      // Const
>      __m128i c_4     = _mm_set1_epi32(4);
> @@ -657,7 +657,7 @@
>  #undef MAKE_COEF16
>  };
>  
> -void dct32(int16_t *src, int32_t *dst, intptr_t stride)
> +void dct32(const int16_t* src, int32_t* dst, intptr_t stride)
>  {
>      // Const
>      __m128i c_8     = _mm_set1_epi32(8);
> diff -r 8191e0d02455 -r 9578af829f2a source/common/x86/blockcopy8.h
> --- a/source/common/x86/blockcopy8.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/x86/blockcopy8.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -24,48 +24,48 @@
>  #ifndef X265_BLOCKCOPY8_H
>  #define X265_BLOCKCOPY8_H
>  
> -void x265_cvt32to16_shr_sse2(int16_t * dst, int *src, intptr_t, int, int);
> -void x265_cvt32to16_shl_4_sse2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt32to16_shl_8_sse2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt32to16_shl_16_sse2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt32to16_shl_32_sse2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt32to16_shl_4_avx2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt32to16_shl_8_avx2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt32to16_shl_16_avx2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt32to16_shl_32_avx2(int16_t * dst, int *src, intptr_t, int);
> -void x265_cvt16to32_shl_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
> -void x265_cvt16to32_shr_4_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
> -void x265_cvt16to32_shr_8_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
> -void x265_cvt16to32_shr_16_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
> -void x265_cvt16to32_shr_32_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
> -void x265_copy_shr_sse4(int16_t * dst, int16_t *src, intptr_t, int, int);
> -void x265_copy_shl_4_sse2(int16_t * dst, int16_t *src, intptr_t, int);
> -void x265_copy_shl_8_sse2(int16_t * dst, int16_t *src, intptr_t, int);
> -void x265_copy_shl_16_sse2(int16_t * dst, int16_t *src, intptr_t, int);
> -void x265_copy_shl_32_sse2(int16_t * dst, int16_t *src, intptr_t, int);
> -uint32_t x265_copy_cnt_4_sse4(int16_t * dst, int16_t * src, intptr_t);
> -uint32_t x265_copy_cnt_8_sse4(int16_t * dst, int16_t * src, intptr_t);
> -uint32_t x265_copy_cnt_16_sse4(int16_t * dst, int16_t * src, intptr_t);
> -uint32_t x265_copy_cnt_32_sse4(int16_t * dst, int16_t * src, intptr_t);
> -uint32_t x265_copy_cnt_4_avx2(int16_t * dst, int16_t * src, intptr_t);
> -uint32_t x265_copy_cnt_8_avx2(int16_t * dst, int16_t * src, intptr_t);
> -uint32_t x265_copy_cnt_16_avx2(int16_t * dst, int16_t * src, intptr_t);
> -uint32_t x265_copy_cnt_32_avx2(int16_t * dst, int16_t * src, intptr_t);
> +void x265_cvt32to16_shr_sse2(int16_t* dst, const int32_t* src, intptr_t, int, int);
> +void x265_cvt32to16_shl_4_sse2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt32to16_shl_8_sse2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt32to16_shl_16_sse2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt32to16_shl_32_sse2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt32to16_shl_4_avx2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt32to16_shl_8_avx2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt32to16_shl_16_avx2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt32to16_shl_32_avx2(int16_t* dst, const int32_t* src, intptr_t, int);
> +void x265_cvt16to32_shl_sse4(int32_t* dst, const int16_t* src, intptr_t, int32_t, int32_t);
> +void x265_cvt16to32_shr_4_sse4(int32_t* dst, const int16_t* src, intptr_t, int32_t, int32_t);
> +void x265_cvt16to32_shr_8_sse4(int32_t* dst, const int16_t* src, intptr_t, int32_t, int32_t);
> +void x265_cvt16to32_shr_16_sse4(int32_t* dst, const int16_t* src, intptr_t, int32_t, int32_t);
> +void x265_cvt16to32_shr_32_sse4(int32_t* dst, const int16_t* src, intptr_t, int32_t, int32_t);
> +void x265_copy_shr_sse4(int16_t* dst, const int16_t* src, intptr_t, int, int);
> +void x265_copy_shl_4_sse2(int16_t* dst, const int16_t* src, intptr_t, int);
> +void x265_copy_shl_8_sse2(int16_t* dst, const int16_t* src, intptr_t, int);
> +void x265_copy_shl_16_sse2(int16_t* dst, const int16_t* src, intptr_t, int);
> +void x265_copy_shl_32_sse2(int16_t* dst, const int16_t* src, intptr_t, int);
> +uint32_t x265_copy_cnt_4_sse4(int16_t* dst, const int16_t* src, intptr_t);
> +uint32_t x265_copy_cnt_8_sse4(int16_t* dst, const int16_t* src, intptr_t);
> +uint32_t x265_copy_cnt_16_sse4(int16_t* dst, const int16_t* src, intptr_t);
> +uint32_t x265_copy_cnt_32_sse4(int16_t* dst, const int16_t* src, intptr_t);
> +uint32_t x265_copy_cnt_4_avx2(int16_t* dst, const int16_t* src, intptr_t);
> +uint32_t x265_copy_cnt_8_avx2(int16_t* dst, const int16_t* src, intptr_t);
> +uint32_t x265_copy_cnt_16_avx2(int16_t* dst, const int16_t* src, intptr_t);
> +uint32_t x265_copy_cnt_32_avx2(int16_t* dst, const int16_t* src, intptr_t);
>  
>  #define SETUP_BLOCKCOPY_FUNC(W, H, cpu) \
> -    void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \
> -    void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb); \
> -    void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t * a, intptr_t stridea, int16_t * b, intptr_t strideb);
> +    void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); \
> +    void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb); \
> +    void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb);
>  
>  #define SETUP_BLOCKCOPY_PS(W, H, cpu) \
> -    void x265_blockcopy_ps_ ## W ## x ## H ## cpu(int16_t * dst, intptr_t dstStride, pixel * src, intptr_t srcStride);
> +    void x265_blockcopy_ps_ ## W ## x ## H ## cpu(int16_t* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
>  
>  #define SETUP_BLOCKCOPY_SP(W, H, cpu) \
> -    void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb);
> +    void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb);
>  
>  #define SETUP_BLOCKCOPY_SS_PP(W, H, cpu) \
> -    void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \
> -    void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t * a, intptr_t stridea, int16_t * b, intptr_t strideb);
> +    void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); \
> +    void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb);
>  
>  #define BLOCKCOPY_COMMON(cpu) \
>      SETUP_BLOCKCOPY_FUNC(4, 4, cpu); \
> @@ -178,31 +178,31 @@
>  
>  BLOCKCOPY_SP(_sse2);
>  
> -void x265_blockfill_s_4x4_sse2(int16_t *dst, intptr_t dstride, int16_t val);
> -void x265_blockfill_s_8x8_sse2(int16_t *dst, intptr_t dstride, int16_t val);
> -void x265_blockfill_s_16x16_sse2(int16_t *dst, intptr_t dstride, int16_t val);
> -void x265_blockfill_s_32x32_sse2(int16_t *dst, intptr_t dstride, int16_t val);
> -void x265_blockcopy_ss_16x4_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_16x8_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_16x12_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_16x16_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_16x24_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_16x32_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_16x64_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_64x16_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_64x32_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_64x48_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> -void x265_blockcopy_ss_64x64_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride);
> +void x265_blockfill_s_4x4_sse2(int16_t* dst, intptr_t dstride, int16_t val);
> +void x265_blockfill_s_8x8_sse2(int16_t* dst, intptr_t dstride, int16_t val);
> +void x265_blockfill_s_16x16_sse2(int16_t* dst, intptr_t dstride, int16_t val);
> +void x265_blockfill_s_32x32_sse2(int16_t* dst, intptr_t dstride, int16_t val);
> +void x265_blockcopy_ss_16x4_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_16x8_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_16x12_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_16x16_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_16x24_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_16x32_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_16x64_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_64x16_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_64x32_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_64x48_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
> +void x265_blockcopy_ss_64x64_avx(int16_t* dest, intptr_t deststride, const int16_t* src, intptr_t srcstride);
>  
> -void x265_blockcopy_pp_32x8_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb);
> -void x265_blockcopy_pp_32x16_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb);
> -void x265_blockcopy_pp_32x24_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb);
> -void x265_blockcopy_pp_32x32_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb);
> -void x265_blockcopy_pp_32x48_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb);
> -void x265_blockcopy_pp_32x64_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb);
> +void x265_blockcopy_pp_32x8_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb);
> +void x265_blockcopy_pp_32x16_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb);
> +void x265_blockcopy_pp_32x24_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb);
> +void x265_blockcopy_pp_32x32_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb);
> +void x265_blockcopy_pp_32x48_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb);
> +void x265_blockcopy_pp_32x64_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb);
>  
> -void x265_blockfill_s_16x16_avx2(int16_t *dst, intptr_t dstride, int16_t val);
> -void x265_blockfill_s_32x32_avx2(int16_t *dst, intptr_t dstride, int16_t val);
> +void x265_blockfill_s_16x16_avx2(int16_t* dst, intptr_t dstride, int16_t val);
> +void x265_blockfill_s_32x32_avx2(int16_t* dst, intptr_t dstride, int16_t val);
>  
>  #undef BLOCKCOPY_COMMON
>  #undef BLOCKCOPY_SS_PP
> diff -r 8191e0d02455 -r 9578af829f2a source/common/x86/dct8.h
> --- a/source/common/x86/dct8.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/x86/dct8.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -23,23 +23,23 @@
>  
>  #ifndef X265_DCT8_H
>  #define X265_DCT8_H
> -void x265_dct4_sse2(int16_t *src, int32_t *dst, intptr_t stride);
> -void x265_dst4_ssse3(int16_t *src, int32_t *dst, intptr_t stride);
> -void x265_dct8_sse4(int16_t *src, int32_t *dst, intptr_t stride);
> -void x265_dct4_avx2(int16_t *src, int32_t *dst, intptr_t stride);
> -void x265_dct8_avx2(int16_t *src, int32_t *dst, intptr_t stride);
> -void x265_dct16_avx2(int16_t *src, int32_t *dst, intptr_t stride);
> -void x265_dct32_avx2(int16_t *src, int32_t *dst, intptr_t stride);
> -void x265_idct32_avx2(int32_t *src, int16_t *dst, intptr_t stride);
> +void x265_dct4_sse2(const int16_t* src, int32_t* dst, intptr_t stride);
> +void x265_dst4_ssse3(const int16_t* src, int32_t* dst, intptr_t stride);
> +void x265_dct8_sse4(const int16_t* src, int32_t* dst, intptr_t stride);
> +void x265_dct4_avx2(const int16_t* src, int32_t* dst, intptr_t stride);
> +void x265_dct8_avx2(const int16_t* src, int32_t* dst, intptr_t stride);
> +void x265_dct16_avx2(const int16_t* src, int32_t* dst, intptr_t stride);
> +void x265_dct32_avx2(const int16_t* src, int32_t* dst, intptr_t stride);
>  
> -void x265_idst4_sse2(int32_t *src, int16_t *dst, intptr_t stride);
> -void x265_idct4_sse2(int32_t *src, int16_t *dst, intptr_t stride);
> -void x265_idct4_avx2(int32_t *src, int16_t *dst, intptr_t stride);
> -void x265_idct8_ssse3(int32_t *src, int16_t *dst, intptr_t stride);
> -void x265_idct8_avx2(int32_t *src, int16_t *dst, intptr_t stride);
> -void x265_idct16_avx2(int32_t *src, int16_t *dst, intptr_t stride);
> +void x265_idst4_sse2(const int32_t* src, int16_t* dst, intptr_t stride);
> +void x265_idct4_sse2(const int32_t* src, int16_t* dst, intptr_t stride);
> +void x265_idct4_avx2(const int32_t* src, int16_t* dst, intptr_t stride);
> +void x265_idct8_ssse3(const int32_t* src, int16_t* dst, intptr_t stride);
> +void x265_idct8_avx2(const int32_t* src, int16_t* dst, intptr_t stride);
> +void x265_idct16_avx2(const int32_t* src, int16_t* dst, intptr_t stride);
> +void x265_idct32_avx2(const int32_t* src, int16_t* dst, intptr_t stride);
>  
> -void x265_denoise_dct_sse4(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
> -void x265_denoise_dct_avx2(int32_t *dct, uint32_t *sum, uint16_t *offset, int size);
> +void x265_denoise_dct_sse4(int32_t* dct, uint32_t* sum, const uint16_t* offset, int size);
> +void x265_denoise_dct_avx2(int32_t* dct, uint32_t* sum, const uint16_t* offset, int size);
>  
>  #endif // ifndef X265_DCT8_H
> diff -r 8191e0d02455 -r 9578af829f2a source/common/x86/ipfilter8.h
> --- a/source/common/x86/ipfilter8.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/x86/ipfilter8.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -25,10 +25,10 @@
>  #define X265_IPFILTER8_H
>  
>  #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx, int isRowExt); \
> -    void x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
> +    void x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); \
> +    void x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
>  
>  #define LUMA_FILTERS(cpu) \
>      SETUP_LUMA_FUNC_DEF(4,   4, cpu); \
> @@ -58,7 +58,7 @@
>      SETUP_LUMA_FUNC_DEF(16, 64, cpu)
>  
>  #define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
> +    void x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
>  
>  #define LUMA_SP_FILTERS(cpu) \
>      SETUP_LUMA_SP_FUNC_DEF(4,   4, cpu); \
> @@ -88,7 +88,7 @@
>      SETUP_LUMA_SP_FUNC_DEF(16, 64, cpu);
>  
>  #define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
> +    void x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
>  
>  #define LUMA_SS_FILTERS(cpu) \
>      SETUP_LUMA_SS_FUNC_DEF(4,   4, cpu); \
> @@ -120,10 +120,10 @@
>  #if HIGH_BIT_DEPTH
>  
>  #define SETUP_CHROMA_VERT_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
> +    void x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
>  
>  #define CHROMA_VERT_FILTERS(cpu) \
>      SETUP_CHROMA_VERT_FUNC_DEF(4, 4, cpu); \
> @@ -208,8 +208,8 @@
>      SETUP_CHROMA_VERT_FUNC_DEF(16, 64, cpu)
>  
>  #define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx, int isRowExt);
> +    void x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
>  
>  #define CHROMA_HORIZ_FILTERS(cpu) \
>      SETUP_CHROMA_HORIZ_FUNC_DEF(4, 4, cpu); \
> @@ -289,8 +289,8 @@
>      SETUP_CHROMA_HORIZ_FUNC_DEF(64, 16, cpu); \
>      SETUP_CHROMA_HORIZ_FUNC_DEF(16, 64, cpu)
>  
> -void x265_chroma_p2s_sse2(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
> -void x265_luma_p2s_sse2(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
> +void x265_chroma_p2s_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
> +void x265_luma_p2s_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
>  
>  CHROMA_VERT_FILTERS(_sse2);
>  CHROMA_HORIZ_FILTERS(_sse4);
> @@ -319,10 +319,10 @@
>  #else // if HIGH_BIT_DEPTH
>  
>  #define SETUP_CHROMA_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx, int isRowExt); \
> -    void x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
> -    void x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu(pixel * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
> +    void x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); \
> +    void x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx); \
> +    void x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
>  
>  #define CHROMA_FILTERS(cpu) \
>      SETUP_CHROMA_FUNC_DEF(4, 4, cpu); \
> @@ -403,7 +403,7 @@
>      SETUP_CHROMA_FUNC_DEF(16, 64, cpu);
>  
>  #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
> +    void x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
>  
>  #define CHROMA_SP_FILTERS(cpu) \
>      SETUP_CHROMA_SP_FUNC_DEF(8, 2, cpu); \
> @@ -488,7 +488,7 @@
>      SETUP_CHROMA_SP_FUNC_DEF(16, 64, cpu);
>  
>  #define SETUP_CHROMA_SS_FUNC_DEF(W, H, cpu) \
> -    void x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu(int16_t * src, intptr_t srcStride, int16_t * dst, intptr_t dstStride, int coeffIdx);
> +    void x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
>  
>  #define CHROMA_SS_FILTERS(cpu) \
>      SETUP_CHROMA_SS_FUNC_DEF(4, 4, cpu); \
> @@ -588,7 +588,7 @@
>  CHROMA_SP_FILTERS_444(_sse4);
>  CHROMA_SS_FILTERS_444(_sse2);
>  
> -void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
> +void x265_chroma_p2s_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
>  
>  #undef SETUP_CHROMA_FUNC_DEF
>  #undef SETUP_CHROMA_SP_FUNC_DEF
> @@ -616,8 +616,8 @@
>  LUMA_SS_FILTERS(_sse2);
>  LUMA_FILTERS(_avx2);
>  
> -void x265_interp_8tap_hv_pp_8x8_ssse3(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int idxX, int idxY);
> -void x265_luma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
> +void x265_interp_8tap_hv_pp_8x8_ssse3(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
> +void x265_luma_p2s_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
>  
>  #undef LUMA_FILTERS
>  #undef LUMA_SP_FILTERS
> diff -r 8191e0d02455 -r 9578af829f2a source/common/x86/mc.h
> --- a/source/common/x86/mc.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/x86/mc.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -25,7 +25,7 @@
>  #define X265_MC_H
>  
>  #define LOWRES(cpu) \
> -    void x265_frame_init_lowres_core_ ## cpu(pixel * src0, pixel * dst0, pixel * dsth, pixel * dstv, pixel * dstc, \
> +    void x265_frame_init_lowres_core_ ## cpu(const pixel* src0, pixel* dst0, pixel* dsth, pixel* dstv, pixel* dstc, \
>                                               intptr_t src_stride, intptr_t dst_stride, int width, int height);
>  LOWRES(mmx2)
>  LOWRES(sse2)
> @@ -37,31 +37,31 @@
>      void func ## _mmx2 args; \
>      void func ## _sse2 args; \
>      void func ## _ssse3 args;
> -DECL_SUF(x265_pixel_avg_64x64, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_64x48, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_64x32, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_64x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_48x64, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_32x64, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_32x32, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_32x24, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_32x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_32x8,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_24x32, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_16x64, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_16x32, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_16x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_16x8,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_16x4,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_8x32,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_8x16,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_8x8,   (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_8x4,   (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_4x16,  (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_4x8,   (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> -DECL_SUF(x265_pixel_avg_4x4,   (pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_64x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_64x48, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_64x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_64x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_48x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_32x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_32x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_32x24, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_32x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_32x8,  (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_24x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_16x64, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_16x32, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_16x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_16x12, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_16x8,  (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_16x4,  (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_12x16, (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_8x32,  (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_8x16,  (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_8x8,   (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_8x4,   (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_4x16,  (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_4x8,   (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_4x4,   (pixel*, intptr_t, const pixel*, intptr_t, const pixel*, intptr_t, int))
>  
>  #undef LOWRES
>  #undef DECL_SUF
> diff -r 8191e0d02455 -r 9578af829f2a source/common/x86/pixel-util.h
> --- a/source/common/x86/pixel-util.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/x86/pixel-util.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -24,52 +24,52 @@
>  #ifndef X265_PIXEL_UTIL_H
>  #define X265_PIXEL_UTIL_H
>  
> -void x265_getResidual4_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> -void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> -void x265_getResidual16_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> -void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> -void x265_getResidual32_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> -void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> +void x265_getResidual4_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
> +void x265_getResidual8_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
> +void x265_getResidual16_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
> +void x265_getResidual16_sse4(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
> +void x265_getResidual32_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
> +void x265_getResidual32_sse4(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
>  
> -void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
> -void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
> -void x265_transpose16_sse2(pixel *dest, pixel *src, intptr_t stride);
> -void x265_transpose32_sse2(pixel *dest, pixel *src, intptr_t stride);
> -void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
> +void x265_transpose4_sse2(pixel* dest, const pixel* src, intptr_t stride);
> +void x265_transpose8_sse2(pixel* dest, const pixel* src, intptr_t stride);
> +void x265_transpose16_sse2(pixel* dest, const pixel* src, intptr_t stride);
> +void x265_transpose32_sse2(pixel* dest, const pixel* src, intptr_t stride);
> +void x265_transpose64_sse2(pixel* dest, const pixel* src, intptr_t stride);
>  
> -void x265_transpose8_avx2(pixel *dest, pixel *src, intptr_t stride);
> -void x265_transpose16_avx2(pixel *dest, pixel *src, intptr_t stride);
> -void x265_transpose32_avx2(pixel *dest, pixel *src, intptr_t stride);
> -void x265_transpose64_avx2(pixel *dest, pixel *src, intptr_t stride);
> +void x265_transpose8_avx2(pixel* dest, const pixel* src, intptr_t stride);
> +void x265_transpose16_avx2(pixel* dest, const pixel* src, intptr_t stride);
> +void x265_transpose32_avx2(pixel* dest, const pixel* src, intptr_t stride);
> +void x265_transpose64_avx2(pixel* dest, const pixel* src, intptr_t stride);
>  
> -uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
> -uint32_t x265_quant_avx2(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
> -uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
> -uint32_t x265_nquant_avx2(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
> +uint32_t x265_quant_sse4(const int32_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
> +uint32_t x265_quant_avx2(const int32_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
> +uint32_t x265_nquant_sse4(const int32_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
> +uint32_t x265_nquant_avx2(const int32_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
>  void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
>  void x265_dequant_normal_avx2(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> -int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
> +int x265_count_nonzero_ssse3(const int16_t* quantCoeff, int numCoeff);
>  
> -void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> -void x265_weight_pp_avx2(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> -void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> +void x265_weight_pp_sse4(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> +void x265_weight_pp_avx2(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> +void x265_weight_sp_sse4(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
>  
> -void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t * pix1, intptr_t stride1,
> -                                     const uint8_t * pix2, intptr_t stride2, int sums[2][4]);
> -void x265_pixel_ssim_4x4x2_core_sse2(const pixel * pix1, intptr_t stride1,
> -                                     const pixel * pix2, intptr_t stride2, int sums[2][4]);
> -void x265_pixel_ssim_4x4x2_core_avx(const pixel * pix1, intptr_t stride1,
> -                                    const pixel * pix2, intptr_t stride2, int sums[2][4]);
> +void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t* pix1, intptr_t stride1,
> +                                     const uint8_t* pix2, intptr_t stride2, int sums[2][4]);
> +void x265_pixel_ssim_4x4x2_core_sse2(const pixel* pix1, intptr_t stride1,
> +                                     const pixel* pix2, intptr_t stride2, int sums[2][4]);
> +void x265_pixel_ssim_4x4x2_core_avx(const pixel* pix1, intptr_t stride1,
> +                                    const pixel* pix2, intptr_t stride2, int sums[2][4]);
>  float x265_pixel_ssim_end4_sse2(int sum0[5][4], int sum1[5][4], int width);
>  float x265_pixel_ssim_end4_avx(int sum0[5][4], int sum1[5][4], int width);
>  
> -void x265_scale1D_128to64_ssse3(pixel *, pixel *, intptr_t);
> -void x265_scale1D_128to64_avx2(pixel *, pixel *, intptr_t);
> -void x265_scale2D_64to32_ssse3(pixel *, pixel *, intptr_t);
> +void x265_scale1D_128to64_ssse3(pixel*, const pixel*, intptr_t);
> +void x265_scale1D_128to64_avx2(pixel*, const pixel*, intptr_t);
> +void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
>  
>  #define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
> -    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t * dest, intptr_t destride, pixel * src0, pixel * src1, intptr_t srcstride0, intptr_t srcstride1); \
> -    void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel * dest, intptr_t destride, pixel * src0, int16_t * scr1, intptr_t srcStride0, intptr_t srcStride1);
> +    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t*  dest, intptr_t destride, const pixel* src0, const pixel* src1, intptr_t srcstride0, intptr_t srcstride1); \
> +    void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel* dest, intptr_t destride, const pixel* src0, const int16_t*  scr1, intptr_t srcStride0, intptr_t srcStride1);
>  
>  #define CHROMA_PIXELSUB_DEF(cpu) \
>      SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
> @@ -84,8 +84,8 @@
>      SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 64, cpu);
>  
>  #define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
> -    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t * dest, intptr_t destride, pixel * src0, pixel * src1, intptr_t srcstride0, intptr_t srcstride1); \
> -    void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel * dest, intptr_t destride, pixel * src0, int16_t * scr1, intptr_t srcStride0, intptr_t srcStride1);
> +    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t*  dest, intptr_t destride, const pixel* src0, const pixel* src1, intptr_t srcstride0, intptr_t srcstride1); \
> +    void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel* dest, intptr_t destride, const pixel* src0, const int16_t*  scr1, intptr_t srcStride0, intptr_t srcStride1);
>  
>  #define LUMA_PIXELSUB_DEF(cpu) \
>      SETUP_LUMA_PIXELSUB_PS_FUNC(8,   8, cpu); \
> @@ -102,7 +102,7 @@
>  CHROMA_PIXELSUB_DEF_422(_sse2);
>  
>  #define SETUP_LUMA_PIXELVAR_FUNC(W, H, cpu) \
> -    uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(pixel * pix, intptr_t pixstride);
> +    uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(const pixel* pix, intptr_t pixstride);
>  
>  #define LUMA_PIXELVAR_DEF(cpu) \
>      SETUP_LUMA_PIXELVAR_FUNC(8,   8, cpu); \
> diff -r 8191e0d02455 -r 9578af829f2a source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/x86/pixel.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -57,17 +57,17 @@
>      ret x265_pixel_ ## name ## _12x16_ ## suffix args; \
>  
>  #define DECL_X1(name, suffix) \
> -    DECL_PIXELS(int, name, suffix, (pixel *, intptr_t, pixel *, intptr_t))
> +    DECL_PIXELS(int, name, suffix, (const pixel*, intptr_t, const pixel*, intptr_t))
>  
>  #define DECL_X1_SS(name, suffix) \
> -    DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, int16_t *, intptr_t))
> +    DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const int16_t*, intptr_t))
>  
>  #define DECL_X1_SP(name, suffix) \
> -    DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, pixel *, intptr_t))
> +    DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const pixel*, intptr_t))
>  
>  #define DECL_X4(name, suffix) \
> -    DECL_PIXELS(void, name ## _x3, suffix, (pixel *, pixel *, pixel *, pixel *, intptr_t, int *)) \
> -    DECL_PIXELS(void, name ## _x4, suffix, (pixel *, pixel *, pixel *, pixel *, pixel *, intptr_t, int *))
> +    DECL_PIXELS(void, name ## _x3, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) \
> +    DECL_PIXELS(void, name ## _x4, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*))
>  
>  /* sad-a.asm */
>  DECL_X1(sad, mmx2)
> @@ -103,11 +103,11 @@
>  DECL_X1(satd, avx)
>  DECL_X1(satd, xop)
>  DECL_X1(satd, avx2)
> -int x265_pixel_satd_8x32_sse2(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_satd_16x4_sse2(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_satd_16x12_sse2(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_satd_16x32_sse2(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_satd_16x64_sse2(pixel *, intptr_t, pixel *, intptr_t);
> +int x265_pixel_satd_8x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_satd_16x4_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_satd_16x12_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_satd_16x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_satd_16x64_sse2(const pixel*, intptr_t, const pixel*, intptr_t);
>  
>  DECL_X1(sa8d, mmx2)
>  DECL_X1(sa8d, sse2)
> @@ -138,42 +138,42 @@
>  DECL_X1_SS(ssd_ss, avx2)
>  DECL_X1_SP(ssd_sp, sse4)
>  #define DECL_HEVC_SSD(suffix) \
> -    int x265_pixel_ssd_32x64_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_16x64_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_32x32_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_32x16_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_16x32_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_32x24_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_24x32_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_32x8_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_8x32_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_16x16_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_16x8_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_8x16_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_16x12_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_16x4_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_8x8_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
> -    int x265_pixel_ssd_8x4_ ## suffix(pixel *, intptr_t, pixel *, intptr_t);
> +    int x265_pixel_ssd_32x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_16x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_32x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_32x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_16x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_32x24_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_24x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_32x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_8x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_16x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_16x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_8x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_16x12_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_16x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_8x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \
> +    int x265_pixel_ssd_8x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t);
>  DECL_HEVC_SSD(sse2)
>  DECL_HEVC_SSD(ssse3)
>  DECL_HEVC_SSD(avx)
>  
> -int x265_pixel_ssd_12x16_sse4(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_ssd_24x32_sse4(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_ssd_48x64_sse4(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_ssd_64x16_sse4(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_ssd_64x32_sse4(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_ssd_64x48_sse4(pixel *, intptr_t, pixel *, intptr_t);
> -int x265_pixel_ssd_64x64_sse4(pixel *, intptr_t, pixel *, intptr_t);
> +int x265_pixel_ssd_12x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_ssd_24x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_ssd_48x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_ssd_64x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_ssd_64x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_ssd_64x48_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
> +int x265_pixel_ssd_64x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t);
>  
> -int x265_pixel_ssd_s_4_sse2(int16_t *, intptr_t);
> -int x265_pixel_ssd_s_8_sse2(int16_t *, intptr_t);
> -int x265_pixel_ssd_s_16_sse2(int16_t *, intptr_t);
> -int x265_pixel_ssd_s_32_sse2(int16_t *, intptr_t);
> -int x265_pixel_ssd_s_32_avx2(int16_t *, intptr_t);
> +int x265_pixel_ssd_s_4_sse2(const int16_t*, intptr_t);
> +int x265_pixel_ssd_s_8_sse2(const int16_t*, intptr_t);
> +int x265_pixel_ssd_s_16_sse2(const int16_t*, intptr_t);
> +int x265_pixel_ssd_s_32_sse2(const int16_t*, intptr_t);
> +int x265_pixel_ssd_s_32_avx2(const int16_t*, intptr_t);
>  
>  #define ADDAVG(func)  \
> -    void x265_ ## func ## _sse4(int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t);
> +    void x265_ ## func ## _sse4(const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t);
>  ADDAVG(addAvg_2x4)
>  ADDAVG(addAvg_2x8)
>  ADDAVG(addAvg_4x2);
> @@ -216,8 +216,8 @@
>  ADDAVG(addAvg_24x64)
>  ADDAVG(addAvg_32x48)
>  
> -void x265_downShift_16_sse2(uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
> -void x265_upShift_8_sse4(uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift);
> +void x265_downShift_16_sse2(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
> +void x265_upShift_8_sse4(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
>  
>  #undef DECL_PIXELS
>  #undef DECL_HEVC_SSD
> diff -r 8191e0d02455 -r 9578af829f2a source/common/yuv.cpp
> --- a/source/common/yuv.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/common/yuv.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -71,7 +71,6 @@
>  void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
>  {
>      pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
> -
>      primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
>  
>      pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> @@ -82,18 +81,13 @@
>  
>  void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
>  {
> -    /* We cheat with const_cast internally because the get methods are not capable of
> -     * returning const buffers and the primitives are not const aware, but we know
> -     * this function does not modify srcPic */
> -    PicYuv& srcPicSafe = const_cast<PicYuv&>(srcPic);
> -    pixel* srcY = srcPicSafe.getLumaAddr(cuAddr, absPartIdx);
> -
> +    const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
>      primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
>  
> -    pixel* srcU = srcPicSafe.getCbAddr(cuAddr, absPartIdx);
> -    pixel* srcV = srcPicSafe.getCrAddr(cuAddr, absPartIdx);
> -    primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPicSafe.m_strideC);
> -    primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPicSafe.m_strideC);
> +    const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> +    const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> +    primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC);
> +    primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC);
>  }
>  
>  void Yuv::copyFromYuv(const Yuv& srcYuv)
> @@ -120,7 +114,6 @@
>  {
>      pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
>      pixel* dstY = dstYuv.m_buf[0];
> -
>      primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
>  
>      pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> @@ -144,21 +137,19 @@
>  
>      if (bLuma)
>      {
> -        int16_t* srcY0 = const_cast<ShortYuv&>(srcYuv0).getLumaAddr(absPartIdx);
> -        int16_t* srcY1 = const_cast<ShortYuv&>(srcYuv1).getLumaAddr(absPartIdx);
> +        const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
> +        const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
>          pixel* dstY = getLumaAddr(absPartIdx);
> -
>          primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
>      }
>      if (bChroma)
>      {
> -        int16_t* srcU0 = const_cast<ShortYuv&>(srcYuv0).getCbAddr(absPartIdx);
> -        int16_t* srcV0 = const_cast<ShortYuv&>(srcYuv0).getCrAddr(absPartIdx);
> -        int16_t* srcU1 = const_cast<ShortYuv&>(srcYuv1).getCbAddr(absPartIdx);
> -        int16_t* srcV1 = const_cast<ShortYuv&>(srcYuv1).getCrAddr(absPartIdx);
> +        const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
> +        const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
> +        const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
> +        const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
>          pixel* dstU = getCbAddr(absPartIdx);
>          pixel* dstV = getCrAddr(absPartIdx);
> -
>          primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
>          primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
>      }
> @@ -168,7 +159,7 @@
>  {
>      const pixel* src = getLumaAddr(absPartIdx);
>      pixel* dst = dstYuv.getLumaAddr(absPartIdx);
> -    primitives.square_copy_pp[log2Size - 2](dst, dstYuv.m_size, const_cast<pixel*>(src), m_size);
> +    primitives.square_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
>  }
>  
>  void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
> @@ -178,7 +169,6 @@
>      const pixel* srcV = getCrAddr(absPartIdx);
>      pixel* dstU = dstYuv.getCbAddr(absPartIdx);
>      pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> -
> -    primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, const_cast<pixel*>(srcU), m_csize);
> -    primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, const_cast<pixel*>(srcV), m_csize);
> +    primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize);
> +    primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize);
>  }
> diff -r 8191e0d02455 -r 9578af829f2a source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/encoder/analysis.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -768,7 +768,7 @@
>      if (mightNotSplit)
>      {
>          /* early-out statistics */
> -        FrameData& curEncData = const_cast<FrameData&>(*m_frame->m_encData);
> +        FrameData& curEncData = *m_frame->m_encData;
>          FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
>          uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
>          cuStat.count[depth] += 1;
> @@ -1050,7 +1050,7 @@
>      if (mightNotSplit)
>      {
>          /* early-out statistics */
> -        FrameData& curEncData = const_cast<FrameData&>(*m_frame->m_encData);
> +        FrameData& curEncData = *m_frame->m_encData;
>          FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
>          uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
>          cuStat.count[depth] += 1;
> @@ -1861,7 +1861,7 @@
>       * each quantity */
>  
>      uint32_t depth = cuGeom.depth;
> -    FrameData& curEncData = const_cast<FrameData&>(*m_frame->m_encData);
> +    FrameData& curEncData = *m_frame->m_encData;
>      FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
>      uint64_t cuCost = cuStat.avgCost[depth] * cuStat.count[depth];
>      uint64_t cuCount = cuStat.count[depth];
> diff -r 8191e0d02455 -r 9578af829f2a source/encoder/rdcost.h
> --- a/source/encoder/rdcost.h	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/encoder/rdcost.h	Sun Nov 16 14:32:17 2014 +0900
> @@ -82,13 +82,13 @@
>      }
>  
>      /* return the difference in energy between the source block and the recon block */
> -    inline int psyCost(int size, pixel *source, intptr_t sstride, pixel *recon, intptr_t rstride) const
> +    inline int psyCost(int size, const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) const
>      {
>          return primitives.psy_cost_pp[size](source, sstride, recon, rstride);
>      }
>  
>      /* return the difference in energy between the source block and the recon block */
> -    inline int psyCost(int size, int16_t *source, intptr_t sstride, int16_t *recon, intptr_t rstride) const
> +    inline int psyCost(int size, const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride) const
>      {
>          return primitives.psy_cost_ss[size](source, sstride, recon, rstride);
>      }
> diff -r 8191e0d02455 -r 9578af829f2a source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Fri Nov 14 16:14:39 2014 -0600
> +++ b/source/encoder/search.cpp	Sun Nov 16 14:32:17 2014 +0900
> @@ -282,7 +282,7 @@
>          if (mightSplit)
>              m_entropyCoder.store(m_rqt[fullDepth].rqtRoot);
>  
> -        pixel*   fenc     = const_cast<pixel*>(mode.fencYuv->getLumaAddr(absPartIdx));
> +        const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx);
>          pixel*   pred     = mode.predYuv.getLumaAddr(absPartIdx);
>          int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
>          uint32_t stride   = mode.fencYuv->m_size;
> @@ -464,7 +464,7 @@
>      int      bTSkip = 0;
>      uint32_t bCBF = 0;
>  
> -    pixel*   fenc = const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> +    const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
>      pixel*   pred = predYuv->getLumaAddr(absPartIdx);
>      int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
>      uint32_t stride = fencYuv->m_size;
> @@ -631,7 +631,7 @@
>  
>      if (bCheckFull)
>      {
> -        pixel*   fenc      = const_cast<pixel*>(mode.fencYuv->getLumaAddr(absPartIdx));
> +        const pixel* fenc  = mode.fencYuv->getLumaAddr(absPartIdx);
>          pixel*   pred      = mode.predYuv.getLumaAddr(absPartIdx);
>          int16_t* residual  = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
>          pixel*   picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
> @@ -808,7 +808,7 @@
>          {
>              uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
>  
> -            pixel*   fenc     = const_cast<Yuv*>(mode.fencYuv)->getChromaAddr(chromaId, absPartIdxC);
> +            const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);
>              pixel*   pred     = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
>              int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getChromaAddr(chromaId, absPartIdxC);
>              uint32_t stride   = mode.fencYuv->m_csize;
> @@ -901,7 +901,7 @@
>          {
>              uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
>  
> -            pixel*   fenc = const_cast<Yuv*>(mode.fencYuv)->getChromaAddr(chromaId, absPartIdxC);
> +            const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);
>              pixel*   pred = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
>              int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getChromaAddr(chromaId, absPartIdxC);
>              uint32_t stride = mode.fencYuv->m_csize;
> @@ -1100,7 +1100,7 @@
>              {
>                  uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
>  
> -                pixel*   fenc         = const_cast<pixel*>(mode.fencYuv->getChromaAddr(chromaId, absPartIdxC));
> +                const pixel*   fenc   = mode.fencYuv->getChromaAddr(chromaId, absPartIdxC);
>                  pixel*   pred         = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
>                  int16_t* residual     = resiYuv.getChromaAddr(chromaId, absPartIdxC);
>                  pixel*   recon        = mode.reconYuv.getChromaAddr(chromaId, absPartIdxC); // TODO: needed?
> @@ -1218,13 +1218,13 @@
>      // Reference sample smoothing
>      initAdiPattern(cu, cuGeom, absPartIdx, initTrDepth, ALL_IDX);
>  
> -    pixel* fenc = intraMode.fencYuv->m_buf[0];
> +    const pixel* fenc = intraMode.fencYuv->m_buf[0];
>      uint32_t stride = intraMode.fencYuv->m_size;
>  
> -    pixel *above = m_refAbove + tuSize - 1;
> -    pixel *aboveFiltered = m_refAboveFlt + tuSize - 1;
> -    pixel *left = m_refLeft + tuSize - 1;
> -    pixel *leftFiltered = m_refLeftFlt + tuSize - 1;
> +    pixel* above = m_refAbove + tuSize - 1;
> +    pixel* aboveFiltered = m_refAboveFlt + tuSize - 1;
> +    pixel* left = m_refLeft + tuSize - 1;
> +    pixel* leftFiltered = m_refLeftFlt + tuSize - 1;
>      int sad, bsad;
>      uint32_t bits, bbits, mode, bmode;
>      uint64_t cost, bcost;
> @@ -1248,8 +1248,8 @@
>          // or left buffers
>          pixel _above[4 * 32 + 1];
>          pixel _left[4 * 32 + 1];
> -        pixel *aboveScale = _above + 2 * 32;
> -        pixel *leftScale = _left + 2 * 32;
> +        pixel* aboveScale = _above + 2 * 32;
> +        pixel* leftScale = _left + 2 * 32;
>          aboveScale[0] = leftScale[0] = above[0];
>          primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
>          primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
> @@ -1286,8 +1286,8 @@
>      bbits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
>      bcost = m_rdCost.calcRdSADCost(bsad, bbits);
>  
> -    pixel *abovePlanar = above;
> -    pixel *leftPlanar = left;
> +    pixel* abovePlanar = above;
> +    pixel* leftPlanar = left;
>  
>      if (tuSize & (8 | 16 | 32))
>      {
> @@ -1309,7 +1309,7 @@
>      primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
>  
>      bool modeHor;
> -    pixel *cmp;
> +    const pixel* cmp;
>      intptr_t srcStride;
>  
>  #define TRY_ANGLE(angle) \
> @@ -1449,13 +1449,13 @@
>              initAdiPattern(cu, cuGeom, absPartIdx, initTrDepth, ALL_IDX);
>  
>              // determine set of modes to be tested (using prediction signal only)
> -            pixel*   fenc = const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> +            const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
>              uint32_t stride = predYuv->m_size;
>  
> -            pixel *above = m_refAbove + tuSize - 1;
> -            pixel *aboveFiltered = m_refAboveFlt + tuSize - 1;
> -            pixel *left = m_refLeft + tuSize - 1;
> -            pixel *leftFiltered = m_refLeftFlt + tuSize - 1;
> +            pixel* above = m_refAbove + tuSize - 1;
> +            pixel* aboveFiltered = m_refAboveFlt + tuSize - 1;
> +            pixel* left = m_refLeft + tuSize - 1;
> +            pixel* leftFiltered = m_refLeftFlt + tuSize - 1;
>  
>              // 33 Angle modes once
>              ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
> @@ -1469,8 +1469,8 @@
>  
>              if (tuSize > 32)
>              {
> -                pixel *aboveScale = _above + 2 * 32;
> -                pixel *leftScale = _left + 2 * 32;
> +                pixel* aboveScale = _above + 2 * 32;
> +                pixel* leftScale = _left + 2 * 32;
>  
>                  // origin is 64x64, we scale to 32x32 and setup required parameters
>                  primitives.scale2D_64to32(bufScale, fenc, stride);
> @@ -1515,8 +1515,8 @@
>              modeCosts[DC_IDX] = bcost = m_rdCost.calcRdSADCost(sad, bits);
>  
>              // PLANAR
> -            pixel *abovePlanar = above;
> -            pixel *leftPlanar = left;
> +            pixel* abovePlanar = above;
> +            pixel* leftPlanar = left;
>              if (tuSize >= 8 && tuSize <= 32)
>              {
>                  abovePlanar = aboveFiltered;
> @@ -1535,7 +1535,7 @@
>              for (int mode = 2; mode < 35; mode++)
>              {
>                  bool modeHor = (mode < 18);
> -                pixel *cmp = (modeHor ? buf_trans : fenc);
> +                const pixel* cmp = (modeHor ? buf_trans : fenc);
>                  intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
>                  bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
>                  sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
> @@ -1598,7 +1598,7 @@
>               * that the contexts should be tracked through each PU */
>              pixel*   dst         = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
>              uint32_t dststride   = m_frame->m_reconPic->m_stride;
> -            pixel*   src         = reconYuv->getLumaAddr(absPartIdx);
> +            const pixel*   src   = reconYuv->getLumaAddr(absPartIdx);
>              uint32_t srcstride   = reconYuv->m_size;
>              primitives.square_copy_pp[log2TrSize - 2](dst, dststride, src, srcstride);
>          }
> @@ -1659,7 +1659,7 @@
>          uint64_t cost = 0;
>          for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
>          {
> -            pixel* fenc = fencYuv->m_buf[chromaId];
> +            const pixel* fenc = fencYuv->m_buf[chromaId];
>              pixel* pred = predYuv->m_buf[chromaId];
>              pixel* chromaPred = getAdiChromaBuf(chromaId, scaleTuSize);
>  
> @@ -1759,7 +1759,8 @@
>          {
>              uint32_t zorder    = cuGeom.encodeIdx + absPartIdxC;
>              uint32_t dststride = m_frame->m_reconPic->m_strideC;
> -            pixel *src, *dst;
> +            const pixel* src;
> +            pixel* dst;
>  
>              dst = m_frame->m_reconPic->getCbAddr(cu.m_cuAddr, zorder);
>              src = reconYuv.getCbAddr(absPartIdxC);
> @@ -2179,8 +2180,8 @@
>              predInterLumaPixel(bidirYuv[0], *refPic0, bestME[0].mv);
>              predInterLumaPixel(bidirYuv[1], *refPic1, bestME[1].mv);
>  
> -            pixel *pred0 = bidirYuv[0].getLumaAddr(m_puAbsPartIdx);
> -            pixel *pred1 = bidirYuv[1].getLumaAddr(m_puAbsPartIdx);
> +            pixel* pred0 = bidirYuv[0].getLumaAddr(m_puAbsPartIdx);
> +            pixel* pred1 = bidirYuv[1].getLumaAddr(m_puAbsPartIdx);
>  
>              int partEnum = partitionFromSizes(m_puWidth, m_puHeight);
>              primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, pred0, bidirYuv[0].m_size, pred1, bidirYuv[1].m_size, 32);
> @@ -2207,8 +2208,8 @@
>              if (bTryZero)
>              {
>                  /* coincident blocks of the two reference pictures */
> -                pixel *ref0 = m_slice->m_mref[0][bestME[0].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
> -                pixel *ref1 = m_slice->m_mref[1][bestME[1].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
> +                const pixel* ref0 = m_slice->m_mref[0][bestME[0].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
> +                const pixel* ref1 = m_slice->m_mref[1][bestME[1].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
>                  intptr_t refStride = slice->m_mref[0][0].lumaStride;
>  
>                  primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, ref0, refStride, ref1, refStride, 32);
> @@ -2616,10 +2617,10 @@
>          ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
>          const Yuv* fencYuv = mode.fencYuv;
>  
> -        int16_t *curResiY = resiYuv.getLumaAddr(absPartIdx);
> +        int16_t* curResiY = resiYuv.getLumaAddr(absPartIdx);
>          uint32_t strideResiY = resiYuv.m_size;
>  
> -        pixel *fenc = const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> +        const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
>          uint32_t numSigY = m_quant.transformNxN(cu, fenc, fencYuv->m_size, curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>  
>          if (numSigY)
> @@ -2653,7 +2654,7 @@
>                  cu.setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
>  
>                  int16_t* curResiU = resiYuv.getCbAddr(absPartIdxC);
> -                pixel*   fencCb = const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
> +                const pixel* fencCb = fencYuv->getCbAddr(absPartIdxC);
>                  uint32_t numSigU = m_quant.transformNxN(cu, fencCb, fencYuv->m_csize, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
>                  if (numSigU)
>                  {
> @@ -2667,7 +2668,7 @@
>                  }
>  
>                  int16_t* curResiV = resiYuv.getCrAddr(absPartIdxC);
> -                pixel*   fencCr = const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
> +                const pixel* fencCr = fencYuv->getCrAddr(absPartIdxC);
>                  uint32_t numSigV = m_quant.transformNxN(cu, fencCr, fencYuv->m_csize, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
>                  if (numSigV)
>                  {
> @@ -2787,8 +2788,8 @@
>          if (m_bEnableRDOQ)
>              m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSize, true);
>  
> -        pixel *fenc = const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> -        int16_t *resi = resiYuv.getLumaAddr(absPartIdx);
> +        const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
> +        int16_t* resi = resiYuv.getLumaAddr(absPartIdx);
>          numSig[TEXT_LUMA][0] = m_quant.transformNxN(cu, fenc, fencYuv->m_size, resi, resiYuv.m_size, coeffCurY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>          cbfFlag[TEXT_LUMA][0] = !!numSig[TEXT_LUMA][0];
>  
> @@ -2813,7 +2814,7 @@
>          if (m_rdCost.m_psyRd)
>              psyEnergyY = m_rdCost.psyCost(partSize, resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, (int16_t*)zeroShort, 0);
>  
> -        int16_t *curResiY    = m_rqt[qtLayer].resiQtYuv.getLumaAddr(absPartIdx);
> +        int16_t* curResiY    = m_rqt[qtLayer].resiQtYuv.getLumaAddr(absPartIdx);
>          uint32_t strideResiY = m_rqt[qtLayer].resiQtYuv.m_size;
>  
>          if (cbfFlag[TEXT_LUMA][0])
> @@ -2899,7 +2900,7 @@
>                      if (m_bEnableRDOQ && (chromaId != TEXT_CHROMA_V))
>                          m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
>  
> -                    fenc = const_cast<pixel*>(fencYuv->getChromaAddr(chromaId, absPartIdxC));
> +                    fenc = fencYuv->getChromaAddr(chromaId, absPartIdxC);
>                      resi = resiYuv.getChromaAddr(chromaId, absPartIdxC);
>                      numSig[chromaId][tuIterator.section] = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, coeffCurC + subTUOffset, log2TrSizeC, (TextType)chromaId, absPartIdxC, false);
>                      cbfFlag[chromaId][tuIterator.section] = !!numSig[chromaId][tuIterator.section];
> @@ -2912,7 +2913,7 @@
>                      singleBits[chromaId][tuIterator.section] = newBits - singleBitsPrev;
>                      singleBitsPrev = newBits;
>  
> -                    int16_t *curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
> +                    int16_t* curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
>                      distC = m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC - 2](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize));
>  
>                      if (cbfFlag[chromaId][tuIterator.section])
> @@ -2998,7 +2999,7 @@
>              if (m_bEnableRDOQ)
>                  m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSize, true);
>  
> -            fenc = const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> +            fenc = fencYuv->getLumaAddr(absPartIdx);
>              resi = resiYuv.getLumaAddr(absPartIdx);
>              uint32_t numSigTSkipY = m_quant.transformNxN(cu, fenc, fencYuv->m_size, resi, resiYuv.m_size, tsCoeffY, log2TrSize, TEXT_LUMA, absPartIdx, true);
>  
> @@ -3057,7 +3058,7 @@
>                      uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
>                      uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
>  
> -                    int16_t *curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
> +                    int16_t* curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
>  
>                      ALIGN_VAR_32(coeff_t, tsCoeffC[MAX_TS_SIZE * MAX_TS_SIZE]);
>                      ALIGN_VAR_32(int16_t, tsResiC[MAX_TS_SIZE * MAX_TS_SIZE]);
> @@ -3067,7 +3068,7 @@
>                      if (m_bEnableRDOQ && (chromaId != TEXT_CHROMA_V))
>                          m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
>  
> -                    fenc = const_cast<pixel*>(fencYuv->getChromaAddr(chromaId, absPartIdxC));
> +                    fenc = fencYuv->getChromaAddr(chromaId, absPartIdxC);
>                      resi = resiYuv.getChromaAddr(chromaId, absPartIdxC);
>                      uint32_t numSigTSkipC = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, tsCoeffC, log2TrSizeC, (TextType)chromaId, absPartIdxC, true);
>  
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho



More information about the x265-devel mailing list