[x265-commits] [x265] asm: re-enable sad_x3 following alignment workaround

Tue Nov 5 01:48:44 CET 2013

details:   http://hg.videolan.org/x265/rev/814b4639d6a6
branches:  
changeset: 4823:814b4639d6a6
user:      Steve Borho <steve at borho.org>
date:      Mon Nov 04 01:02:00 2013 -0600
description:
asm: re-enable sad_x3 following alignment workaround
Subject: [x265] Dropping the 'm_' prefix from names used for members of struct type Lowres

details:   http://hg.videolan.org/x265/rev/37903c6fd1f9
branches:  
changeset: 4824:37903c6fd1f9
user:      Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
date:      Mon Nov 04 12:09:06 2013 +0530
description:
Dropping the 'm_' prefix from names used for members of struct type Lowres
Subject: [x265] RDOQ : pass a parameter to temporarily disable RDOQ.

details:   http://hg.videolan.org/x265/rev/91d96a6038e2
branches:  
changeset: 4825:91d96a6038e2
user:      Deepthi Devaki <deepthidevaki at multicorewareinc.com>
date:      Mon Nov 04 15:23:10 2013 +0530
description:
RDOQ : pass a parameter to temporarily disable RDOQ.

default value of parameter is true, that means RDOQ is not disabled. Outputs mustnot change in any rd levels.
Subject: [x265] update authors header

details:   http://hg.videolan.org/x265/rev/6e211f980d98
branches:  
changeset: 4826:6e211f980d98
user:      Min Chen <chenm003 at 163.com>
date:      Mon Nov 04 19:04:43 2013 +0800
description:
update authors header
Subject: [x265] fix bug in chroma_p2s and testbench

details:   http://hg.videolan.org/x265/rev/c83157a8b616
branches:  
changeset: 4827:c83157a8b616
user:      Min Chen <chenm003 at 163.com>
date:      Mon Nov 04 19:05:18 2013 +0800
description:
fix bug in chroma_p2s and testbench
Subject: [x265] asm code and test bench integration code for blockcopy_pp_c partitions

details:   http://hg.videolan.org/x265/rev/7898c58d9cbc
branches:  
changeset: 4828:7898c58d9cbc
user:      Praveen Tiwari
date:      Mon Nov 04 17:30:15 2013 +0530
description:
asm code and test bench integration code for blockcopy_pp_c partitions
Subject: [x265] added C primitive for blockcopy_p_s and function pointer creation for new type

details:   http://hg.videolan.org/x265/rev/e61a0b1c035b
branches:  
changeset: 4829:e61a0b1c035b
user:      Praveen Tiwari
date:      Mon Nov 04 18:14:38 2013 +0530
description:
added C primitive for blockcopy_p_s and function pointer creation for new type
Subject: [x265] pixel.cpp, initialization of function pointer table for blockcopy_ps_c partitions

details:   http://hg.videolan.org/x265/rev/64f25611bcb2
branches:  
changeset: 4830:64f25611bcb2
user:      Praveen Tiwari
date:      Mon Nov 04 18:45:30 2013 +0530
description:
pixel.cpp, initialization of function pointer table for blockcopy_ps_c partitions
Subject: [x265] pixel.h, added asm function decleration for blockcopy_ps_c

details:   http://hg.videolan.org/x265/rev/4cd16b86488c
branches:  
changeset: 4831:4cd16b86488c
user:      Praveen Tiwari
date:      Mon Nov 04 18:57:06 2013 +0530
description:
pixel.h, added asm function decleration for blockcopy_ps_c
Subject: [x265] corrected buffer name for chroma_copy_pp

details:   http://hg.videolan.org/x265/rev/35989e4e0b46
branches:  
changeset: 4832:35989e4e0b46
user:      Praveen Tiwari
date:      Mon Nov 04 20:37:42 2013 +0530
description:
corrected buffer name for chroma_copy_pp
Subject: [x265] pixelharness: shorten copy primitive names for consistency

details:   http://hg.videolan.org/x265/rev/f3106abb88b2
branches:  
changeset: 4833:f3106abb88b2
user:      Steve Borho <steve at borho.org>
date:      Mon Nov 04 18:35:58 2013 -0600
description:
pixelharness: shorten copy primitive names for consistency

diffstat:

 source/Lib/TLibCommon/TComDataCU.cpp  |    2 +-
 source/Lib/TLibCommon/TComTrQuant.cpp |   25 +-
 source/Lib/TLibCommon/TComTrQuant.h   |    4 +-
 source/Lib/TLibEncoder/TEncSearch.cpp |   39 +-
 source/Lib/TLibEncoder/TEncSearch.h   |    4 +-
 source/common/CMakeLists.txt          |    2 +-
 source/common/lowres.cpp              |   12 +-
 source/common/lowres.h                |    8 +-
 source/common/pixel.cpp               |   21 +-
 source/common/primitives.h            |    3 +
 source/common/x86/asm-primitives.cpp  |   74 ++-
 source/common/x86/blockcopy8.asm      |  798 ++++++++++++++++++++++++++++++++++
 source/common/x86/ipfilter8.asm       |    7 +-
 source/common/x86/pixel.h             |   70 ++
 source/encoder/frameencoder.cpp       |    2 +-
 source/encoder/ratecontrol.cpp        |   14 +-
 source/encoder/slicetype.cpp          |    8 +-
 source/test/ipfilterharness.cpp       |   12 +-
 source/test/pixelharness.cpp          |    6 +-
 19 files changed, 1029 insertions(+), 82 deletions(-)

diffs (truncated from 1550 to 300 lines):

diff -r 2ab39c2dd50f -r f3106abb88b2 source/Lib/TLibCommon/TComDataCU.cpp

--- a/source/Lib/TLibCommon/TComDataCU.cpp	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Mon Nov 04 18:35:58 2013 -0600
@@ -246,7 +246,7 @@ void TComDataCU::initCU(TComPic* pic, ui
     m_totalDistortion  = 0;
     m_totalBits        = 0;
     m_numPartitions    = pic->getNumPartInCU();
-    int qp             = pic->m_lowres.m_invQscaleFactor ? pic->getCU(getAddr())->getQP(0) : m_slice->getSliceQp();
+    int qp             = pic->m_lowres.invQscaleFactor ? pic->getCU(getAddr())->getQP(0) : m_slice->getSliceQp();
     for (int i = 0; i < 4; i++)
     {
         m_avgCost[i] = 0;
diff -r 2ab39c2dd50f -r f3106abb88b2 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Mon Nov 04 18:35:58 2013 -0600
@@ -253,11 +253,11 @@ void TComTrQuant::signBitHidingHDQ(TCoef
 }
 
 uint32_t TComTrQuant::xQuant(TComDataCU* cu, int32_t* coef, TCoeff* qCoef, int width, int height,
-                         TextType ttype, uint32_t absPartIdx, int32_t *lastPos)
+                             TextType ttype, uint32_t absPartIdx, int32_t *lastPos, bool curUseRDOQ)
 {
     uint32_t acSum = 0;
     int add = 0;
-    bool useRDOQ = cu->getTransformSkip(absPartIdx, ttype) ? m_useRDOQTS : m_useRDOQ;
+    bool useRDOQ = (cu->getTransformSkip(absPartIdx, ttype) ? m_useRDOQTS : m_useRDOQ) && curUseRDOQ;
 
     assert(width == height);
 
@@ -339,15 +339,16 @@ void TComTrQuant::init(uint32_t maxTrSiz
 }
 
 uint32_t TComTrQuant::transformNxN(TComDataCU* cu,
-                               int16_t*      residual,
-                               uint32_t        stride,
-                               TCoeff*     coeff,
-                               uint32_t        width,
-                               uint32_t        height,
-                               TextType    ttype,
-                               uint32_t        absPartIdx,
-                               int32_t*        lastPos,
-                               bool        useTransformSkip)
+                                   int16_t*    residual,
+                                   uint32_t    stride,
+                                   TCoeff*     coeff,
+                                   uint32_t    width,
+                                   uint32_t    height,
+                                   TextType    ttype,
+                                   uint32_t    absPartIdx,
+                                   int32_t*    lastPos,
+                                   bool        useTransformSkip,
+                                   bool        curUseRDOQ)
 {
     if (cu->getCUTransquantBypass(absPartIdx))
     {
@@ -385,7 +386,7 @@ uint32_t TComTrQuant::transformNxN(TComD
         const uint32_t log2BlockSize = g_convertToBit[width];
         primitives.dct[DCT_4x4 + log2BlockSize - ((width == 4) && (mode != REG_DCT))](residual, m_tmpCoeff, stride);
     }
-    return xQuant(cu, m_tmpCoeff, coeff, width, height, ttype, absPartIdx, lastPos);
+    return xQuant(cu, m_tmpCoeff, coeff, width, height, ttype, absPartIdx, lastPos, curUseRDOQ);
 }
 
 void TComTrQuant::invtransformNxN(bool transQuantBypass, uint32_t mode, int16_t* residual, uint32_t stride, TCoeff* coeff, uint32_t width, uint32_t height, int scalingListType, bool useTransformSkip, int lastPos)
diff -r 2ab39c2dd50f -r f3106abb88b2 source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/Lib/TLibCommon/TComTrQuant.h	Mon Nov 04 18:35:58 2013 -0600
@@ -128,7 +128,7 @@ public:
 
     // transform & inverse transform functions
     uint32_t transformNxN(TComDataCU* cu, int16_t* residual, uint32_t stride, TCoeff* coeff, uint32_t width, uint32_t height,
-                      TextType ttype, uint32_t absPartIdx, int32_t* lastPos, bool useTransformSkip = false);
+                          TextType ttype, uint32_t absPartIdx, int32_t* lastPos, bool useTransformSkip = false, bool curUseRDOQ = true);
 
     void invtransformNxN(bool transQuantBypass, uint32_t mode, int16_t* residual, uint32_t stride, TCoeff* coeff, uint32_t width, uint32_t height, int scalingListType, bool useTransformSkip = false, int lastPos = MAX_INT);
 
@@ -194,7 +194,7 @@ private:
 
     void signBitHidingHDQ(TCoeff* qcoeff, TCoeff* coeff, const uint32_t* scan, int32_t* deltaU, int width, int height);
 
-    uint32_t xQuant(TComDataCU* cu, int32_t* src, TCoeff* dst, int width, int height, TextType ttype, uint32_t absPartIdx, int32_t *lastPos);
+    uint32_t xQuant(TComDataCU* cu, int32_t* src, TCoeff* dst, int width, int height, TextType ttype, uint32_t absPartIdx, int32_t *lastPos, bool curUseRDOQ = true);
 
     // RDOQ functions
     uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, TCoeff* dstCoeff, uint32_t width, uint32_t height, TextType ttype, uint32_t absPartIdx, int32_t *lastPos);
diff -r 2ab39c2dd50f -r f3106abb88b2 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Nov 04 18:35:58 2013 -0600
@@ -2773,7 +2773,7 @@ void TEncSearch::xSetSearchRange(TComDat
  * \returns void
  */
 void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* outResiYuv,
-                                           TShortYUV* outBestResiYuv, TComYuv* outReconYuv, bool bSkipRes)
+                                           TShortYUV* outBestResiYuv, TComYuv* outReconYuv, bool bSkipRes, bool curUseRDOQ)
 {
     if (cu->isIntra(0))
     {
@@ -2854,7 +2854,7 @@ void TEncSearch::encodeResAndCalcRdInter
     m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
 
     uint32_t zeroDistortion = 0;
-    xEstimateResidualQT(cu, 0, 0, outResiYuv, cu->getDepth(0), cost, bits, distortion, &zeroDistortion);
+    xEstimateResidualQT(cu, 0, 0, outResiYuv, cu->getDepth(0), cost, bits, distortion, &zeroDistortion, curUseRDOQ);
 
     m_entropyCoder->resetBits();
     m_entropyCoder->encodeQtRootCbfZero(cu);
@@ -2939,15 +2939,16 @@ void TEncSearch::encodeResAndCalcRdInter
 #pragma warning(disable: 4701) // potentially uninitialized local variable
 #endif
 
-void TEncSearch::xEstimateResidualQT(TComDataCU* cu,
-                                     uint32_t        absPartIdx,
-                                     uint32_t        absTUPartIdx,
-                                     TShortYUV*  resiYuv,
-                                     const uint32_t  depth,
-                                     UInt64 &    rdCost,
-                                     uint32_t &      outBits,
-                                     uint32_t &      outDist,
-                                     uint32_t *      outZeroDist)
+void TEncSearch::xEstimateResidualQT(TComDataCU*    cu,
+                                     uint32_t       absPartIdx,
+                                     uint32_t       absTUPartIdx,
+                                     TShortYUV*     resiYuv,
+                                     const uint32_t depth,
+                                     UInt64 &       rdCost,
+                                     uint32_t &     outBits,
+                                     uint32_t &     outDist,
+                                     uint32_t *     outZeroDist,
+                                     bool           curuseRDOQ)
 {
     assert(cu->getDepth(0) == cu->getDepth(absPartIdx));
     const uint32_t trMode = depth - cu->getDepth(0);
@@ -3014,7 +3015,7 @@ void TEncSearch::xEstimateResidualQT(TCo
             cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
         }
 
-        if (m_cfg->param.bEnableRDOQ)
+        if (m_cfg->param.bEnableRDOQ && curuseRDOQ)
         {
             m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidth, trHeight, TEXT_LUMA);
         }
@@ -3023,13 +3024,13 @@ void TEncSearch::xEstimateResidualQT(TCo
         m_trQuant->selectLambda(TEXT_LUMA);
 
         absSumY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
-                                          trWidth, trHeight, TEXT_LUMA, absPartIdx, &lastPosY);
+                                          trWidth, trHeight, TEXT_LUMA, absPartIdx, &lastPosY, false, curuseRDOQ);
 
         cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
 
         if (bCodeChroma)
         {
-            if (m_cfg->param.bEnableRDOQ)
+            if (m_cfg->param.bEnableRDOQ && curuseRDOQ)
             {
                 m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidthC, trHeightC, TEXT_CHROMA);
             }
@@ -3040,12 +3041,12 @@ void TEncSearch::xEstimateResidualQT(TCo
             m_trQuant->selectLambda(TEXT_CHROMA);
 
             absSumU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
-                                              trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosU);
+                                              trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
 
             curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
             m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
             absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
-                                              trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosV);
+                                              trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
 
             cu->setCbfSubParts(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdx, cu->getDepth(0) + trModeC);
             cu->setCbfSubParts(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
@@ -3318,7 +3319,7 @@ void TEncSearch::xEstimateResidualQT(TCo
 
             m_trQuant->selectLambda(TEXT_LUMA);
             absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
-                                                           trWidth, trHeight, TEXT_LUMA, absPartIdx, &lastPosTransformSkipY, true);
+                                                           trWidth, trHeight, TEXT_LUMA, absPartIdx, &lastPosTransformSkipY, true, curuseRDOQ);
             cu->setCbfSubParts(absSumTransformSkipY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
 
             if (absSumTransformSkipY != 0)
@@ -3398,11 +3399,11 @@ void TEncSearch::xEstimateResidualQT(TCo
             m_trQuant->selectLambda(TEXT_CHROMA);
 
             absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
-                                                           trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosTransformSkipU, true);
+                                                           trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosTransformSkipU, true, curuseRDOQ);
             curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
             m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
             absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
-                                                           trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosTransformSkipV, true);
+                                                           trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosTransformSkipV, true, curuseRDOQ);
 
             cu->setCbfSubParts(absSumTransformSkipU ? setCbf : 0, TEXT_CHROMA_U, absPartIdx, cu->getDepth(0) + trModeC);
             cu->setCbfSubParts(absSumTransformSkipV ? setCbf : 0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);
diff -r 2ab39c2dd50f -r f3106abb88b2 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Mon Nov 04 18:35:58 2013 -0600
@@ -147,7 +147,7 @@ public:
 
     /// encode residual and compute rd-cost for inter mode
     void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* resiYuv, TShortYUV* bestResiYuv,
-                                   TComYuv* reconYuv, bool bSkipRes);
+                                   TComYuv* reconYuv, bool bSkipRes, bool curUseRDOQ = true);
 
     /// set ME search range
     void setAdaptiveSearchRange(int dir, int refIdx, int merange) { m_adaptiveRange[dir][refIdx] = merange; }
@@ -235,7 +235,7 @@ protected:
 
     void xEncodeResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bSubdivAndCbf, TextType ttype);
     void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth,
-                             UInt64 &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist);
+                             UInt64 &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
     void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth, bool bSpatial);
 
     void setWpScalingDistParam(TComDataCU* cu, int refIdx, int picList);
diff -r 2ab39c2dd50f -r f3106abb88b2 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/common/CMakeLists.txt	Mon Nov 04 18:35:58 2013 -0600
@@ -122,7 +122,7 @@ endif(ENABLE_PRIMITIVES_VEC)
 
 if(ENABLE_PRIMITIVES_ASM)
     set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h)
-    set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm)
+    set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm blockcopy8.asm)
     if (NOT X64)
         set(A_SRCS ${A_SRCS} pixel-32.asm)
     endif()
diff -r 2ab39c2dd50f -r f3106abb88b2 source/common/lowres.cpp
--- a/source/common/lowres.cpp	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/common/lowres.cpp	Mon Nov 04 18:35:58 2013 -0600
@@ -47,9 +47,9 @@ void Lowres::create(TComPic *pic, int bf
 
     if (*aqMode)
     {
-        m_qpAqOffset = (double*)x265_malloc(sizeof(double) * cuCount);
-        m_invQscaleFactor = (int*)x265_malloc(sizeof(int) * cuCount);
-        if (!m_qpAqOffset || !m_invQscaleFactor)
+        qpAqOffset = (double*)x265_malloc(sizeof(double) * cuCount);
+        invQscaleFactor = (int*)x265_malloc(sizeof(int) * cuCount);
+        if (!qpAqOffset || !invQscaleFactor)
             *aqMode = 0;
     }
 
@@ -110,8 +110,8 @@ void Lowres::destroy(int bframes)
         X265_FREE(lowresMvCosts[0][i]);
         X265_FREE(lowresMvCosts[1][i]);
     }
-    X265_FREE(m_qpAqOffset);
-    X265_FREE(m_invQscaleFactor);
+    X265_FREE(qpAqOffset);
+    X265_FREE(invQscaleFactor);
 }
 
 // (re) initialize lowres state
@@ -127,7 +127,7 @@ void Lowres::init(TComPicYuv *orig, int 
     satdCost = -1;
     memset(costEst, -1, sizeof(costEst));
 
-    if (m_qpAqOffset && m_invQscaleFactor)
+    if (qpAqOffset && invQscaleFactor)
         memset(costEstAq, -1, sizeof(costEstAq));
 
     for (int y = 0; y < bframes + 2; y++)
diff -r 2ab39c2dd50f -r f3106abb88b2 source/common/lowres.h
--- a/source/common/lowres.h	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/common/lowres.h	Mon Nov 04 18:35:58 2013 -0600
@@ -35,15 +35,15 @@ struct Lowres : public ReferencePlanes
 {
     /* lowres buffers, sizes and strides */
     pixel *buffer[4];
-    double *m_qpAqOffset; // qp Aq offset values for each Cu
-    int*   m_invQscaleFactor; // qScale values for qp Aq Offsets 
+    double *qpAqOffset; // qp Aq offset values for each Cu
+    int    *invQscaleFactor; // qScale values for qp Aq Offsets 
     int    width;     // width of lowres frame in pixels
     int    lines;     // height of lowres frame in pixel lines
     int    frameNum;  // Presentation frame number
     int    sliceType; // Slice type decided by lookahead
     int    leadingBframes; // number of leading B frames for P or I
-    uint64_t m_wp_ssd[3];  // This is different than m_SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame
-    uint64_t m_wp_sum[3];
+    uint64_t wp_ssd[3];  // This is different than m_SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame
+    uint64_t wp_sum[3];
 
     bool   bIntraCalculated;
     bool   bScenecut; // Set to false if the frame cannot possibly be part of a real scenecut.
diff -r 2ab39c2dd50f -r f3106abb88b2 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Mon Nov 04 11:40:49 2013 +0530
+++ b/source/common/pixel.cpp	Mon Nov 04 18:35:58 2013 -0600
@@ -773,6 +773,21 @@ void blockcopy_pp_c(pixel *a, intptr_t s
         b += strideb;
     }
 }
+
+template<int bx, int by>
+void blockcopy_ps_c(pixel *a, intptr_t stridea, int16_t *b, intptr_t strideb)
+{
+    for (int y = 0; y < by; y++)
+    {