[x265] remove m_immedVals

Satoshi Nakagawa nakagawa424 at oki.com
Thu May 12 13:37:05 CEST 2016


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1463052561 -32400
#      Thu May 12 20:29:21 2016 +0900
# Node ID 3d6c4c1fcb9923e8215aefae62bfeeb118e173c0
# Parent  a5362b9533f6a5b77740b4e8f97dba2555b6f929
remove m_immedVals

diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Wed May 04 21:08:09 2016 +0000
+++ b/source/common/ipfilter.cpp	Thu May 12 20:29:21 2016 +0900
@@ -365,10 +365,10 @@
 template<int N, int width, int height>
 void interp_hv_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
 {
-    short immedVals[(64 + 8) * (64 + 8)];
+    ALIGN_VAR_32(int16_t, immed[width * (height + N - 1)]);
 
-    interp_horiz_ps_c<N, width, height>(src, srcStride, immedVals, width, idxX, 1);
-    filterVertical_sp_c<N>(immedVals + 3 * width, width, dst, dstStride, width, height, idxY);
+    interp_horiz_ps_c<N, width, height>(src, srcStride, immed, width, idxX, 1);
+    filterVertical_sp_c<N>(immed + (N / 2 - 1) * width, width, dst, dstStride, width, height, idxY);
 }
 }
 
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.cpp
--- a/source/common/predict.cpp	Wed May 04 21:08:09 2016 +0000
+++ b/source/common/predict.cpp	Thu May 12 20:29:21 2016 +0900
@@ -57,12 +57,10 @@
 
 Predict::Predict()
 {
-    m_immedVals = NULL;
 }
 
 Predict::~Predict()
 {
-    X265_FREE(m_immedVals);
     m_predShortYuv[0].destroy();
     m_predShortYuv[1].destroy();
 }
@@ -72,12 +70,8 @@
     m_csp = csp;
     m_hChromaShift = CHROMA_H_SHIFT(csp);
     m_vChromaShift = CHROMA_V_SHIFT(csp);
-    CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
 
     return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
-
-fail:
-    return false;
 }
 
 void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma)
@@ -258,8 +252,8 @@
     int partEnum = partitionFromSizes(pu.width, pu.height);
     const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
 
-    int xFrac = mv.x & 0x3;
-    int yFrac = mv.y & 0x3;
+    int xFrac = mv.x & 3;
+    int yFrac = mv.y & 3;
 
     if (!(yFrac | xFrac))
         primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
@@ -280,14 +274,14 @@
     intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
     const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
 
-    int xFrac = mv.x & 0x3;
-    int yFrac = mv.y & 0x3;
-
     int partEnum = partitionFromSizes(pu.width, pu.height);
 
     X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n");
     X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
 
+    int xFrac = mv.x & 3;
+    int yFrac = mv.y & 3;
+
     if (!(yFrac | xFrac))
         primitives.pu[partEnum].convert_p2s(src, srcStride, dst, dstStride);
     else if (!yFrac)
@@ -296,11 +290,12 @@
         primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac);
     else
     {
-        int tmpStride = pu.width;
-        int filterSize = NTAPS_LUMA;
-        int halfFilterSize = (filterSize >> 1);
-        primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals, tmpStride, xFrac, 1);
-        primitives.pu[partEnum].luma_vss(m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
+        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
+        int immedStride = pu.width;
+        int halfFilterSize = NTAPS_LUMA >> 1;
+
+        primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, xFrac, 1);
+        primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, yFrac);
     }
 }
 
@@ -309,10 +304,10 @@
     intptr_t dstStride = dstYuv.m_csize;
     intptr_t refStride = refPic.m_strideC;
 
-    int shiftHor = (2 + m_hChromaShift);
-    int shiftVer = (2 + m_vChromaShift);
+    int mvx = mv.x << (1 - m_hChromaShift);
+    int mvy = mv.y << (1 - m_vChromaShift);
 
-    intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
+    intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
 
     const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
     const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
@@ -320,11 +315,11 @@
     pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx);
     pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx);
 
-    int xFrac = mv.x & ((1 << shiftHor) - 1);
-    int yFrac = mv.y & ((1 << shiftVer) - 1);
+    int partEnum = partitionFromSizes(pu.width, pu.height);
 
-    int partEnum = partitionFromSizes(pu.width, pu.height);
-    
+    int xFrac = mvx & 7;
+    int yFrac = mvy & 7;
+
     if (!(yFrac | xFrac))
     {
         primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride);
@@ -332,37 +327,36 @@
     }
     else if (!yFrac)
     {
-        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
-        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
+        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac);
+        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac);
     }
     else if (!xFrac)
     {
-        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
-        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac);
+        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac);
     }
     else
     {
-        int extStride = pu.width >> m_hChromaShift;
-        int filterSize = NTAPS_CHROMA;
-        int halfFilterSize = (filterSize >> 1);
+        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
+        int immedStride = pu.width >> m_hChromaShift;
+        int halfFilterSize = NTAPS_CHROMA >> 1;
 
-        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
-        primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
-
-        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
-        primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
+        primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
+        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
+        primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
     }
 }
 
 void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
 {
+    intptr_t dstStride = dstSYuv.m_csize;
     intptr_t refStride = refPic.m_strideC;
-    intptr_t dstStride = dstSYuv.m_csize;
 
-    int shiftHor = (2 + m_hChromaShift);
-    int shiftVer = (2 + m_vChromaShift);
+    int mvx = mv.x << (1 - m_hChromaShift);
+    int mvy = mv.y << (1 - m_vChromaShift);
 
-    intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
+    intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
 
     const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
     const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
@@ -370,15 +364,15 @@
     int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx);
     int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx);
 
-    int xFrac = mv.x & ((1 << shiftHor) - 1);
-    int yFrac = mv.y & ((1 << shiftVer) - 1);
-
     int partEnum = partitionFromSizes(pu.width, pu.height);
     
     uint32_t cxWidth  = pu.width >> m_hChromaShift;
 
     X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma block size expected to be multiple of 2\n");
 
+    int xFrac = mvx & 7;
+    int yFrac = mvy & 7;
+
     if (!(yFrac | xFrac))
     {
         primitives.chroma[m_csp].pu[partEnum].p2s(refCb, refStride, dstCb, dstStride);
@@ -386,23 +380,24 @@
     }
     else if (!yFrac)
     {
-        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
-        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
+        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac, 0);
+        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac, 0);
     }
     else if (!xFrac)
     {
-        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
-        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac);
+        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac);
     }
     else
     {
-        int extStride = cxWidth;
-        int filterSize = NTAPS_CHROMA;
-        int halfFilterSize = (filterSize >> 1);
-        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
-        primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
-        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
-        primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
+        int immedStride = cxWidth;
+        int halfFilterSize = NTAPS_CHROMA >> 1;
+
+        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
+        primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
+        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
+        primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
     }
 }
 
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.h
--- a/source/common/predict.h	Wed May 04 21:08:09 2016 +0000
+++ b/source/common/predict.h	Thu May 12 20:29:21 2016 +0900
@@ -73,7 +73,6 @@
     };
 
     ShortYuv  m_predShortYuv[2]; /* temporary storage for weighted prediction */
-    int16_t*  m_immedVals;
 
     // Unfiltered/filtered neighbours of the current partition.
     pixel     intraNeighbourBuf[2][258];
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed May 04 21:08:09 2016 +0000
+++ b/source/common/x86/asm-primitives.cpp	Thu May 12 20:29:21 2016 +0900
@@ -861,12 +861,12 @@
 template<int size>
 void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
 {
-    ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA)]);
-    const int filterSize = NTAPS_LUMA;
-    const int halfFilterSize = filterSize >> 1;
-
-    primitives.pu[size].luma_hps(src, srcStride, immed, MAX_CU_SIZE, idxX, 1);
-    primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * MAX_CU_SIZE, MAX_CU_SIZE, dst, dstStride, idxY);
+    ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
+    const int halfFilterSize = NTAPS_LUMA >> 1;
+    const int immedStride = MAX_CU_SIZE;
+
+    primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1);
+    primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY);
 }
 
 #if HIGH_BIT_DEPTH
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Wed May 04 21:08:09 2016 +0000
+++ b/source/encoder/motion.cpp	Thu May 12 20:29:21 2016 +0900
@@ -1180,15 +1180,17 @@
 int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
 {
     intptr_t refStride = ref->lumaStride;
-    pixel *fref = ref->fpelPlane[0] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * refStride;
+    const pixel* fref = ref->fpelPlane[0] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * refStride;
     int xFrac = qmv.x & 0x3;
     int yFrac = qmv.y & 0x3;
     int cost;
-    intptr_t lclStride = fencPUYuv.m_size;
-    X265_CHECK(lclStride == FENC_STRIDE, "fenc buffer is assumed to have FENC_STRIDE by sad_x3 and sad_x4\n");
+    const intptr_t fencStride = FENC_STRIDE;
+    X265_CHECK(fencPUYuv.m_size == FENC_STRIDE, "fenc buffer is assumed to have FENC_STRIDE by sad_x3 and sad_x4\n");
 
+    ALIGN_VAR_32(pixel, subpelbuf[MAX_CU_SIZE * MAX_CU_SIZE]);
+    
     if (!(yFrac | xFrac))
-        cost = cmp(fencPUYuv.m_buf[0], lclStride, fref, refStride);
+        cost = cmp(fencPUYuv.m_buf[0], fencStride, fref, refStride);
     else
     {
         /* we are taking a short-cut here if the reference is weighted. To be
@@ -1196,15 +1198,13 @@
          * the final 16bit values prior to rounding and down shifting. Instead we
          * are simply interpolating the weighted full-pel pixels. Not 100%
          * accurate but good enough for fast qpel ME */
-        ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
         if (!yFrac)
-            primitives.pu[partEnum].luma_hpp(fref, refStride, subpelbuf, lclStride, xFrac);
+            primitives.pu[partEnum].luma_hpp(fref, refStride, subpelbuf, blockwidth, xFrac);
         else if (!xFrac)
-            primitives.pu[partEnum].luma_vpp(fref, refStride, subpelbuf, lclStride, yFrac);
+            primitives.pu[partEnum].luma_vpp(fref, refStride, subpelbuf, blockwidth, yFrac);
         else
-            primitives.pu[partEnum].luma_hvpp(fref, refStride, subpelbuf, lclStride, xFrac, yFrac);
-
-        cost = cmp(fencPUYuv.m_buf[0], lclStride, subpelbuf, lclStride);
+            primitives.pu[partEnum].luma_hvpp(fref, refStride, subpelbuf, blockwidth, xFrac, yFrac);
+        cost = cmp(fencPUYuv.m_buf[0], fencStride, subpelbuf, blockwidth);
     }
 
     if (bChromaSATD)
@@ -1212,12 +1212,12 @@
         int csp    = fencPUYuv.m_csp;
         int hshift = fencPUYuv.m_hChromaShift;
         int vshift = fencPUYuv.m_vChromaShift;
-        int shiftHor = (2 + hshift);
-        int shiftVer = (2 + vshift);
-        lclStride = fencPUYuv.m_csize;
+        int mvx = qmv.x << (1 - hshift);
+        int mvy = qmv.y << (1 - vshift);
+        intptr_t fencStrideC = fencPUYuv.m_csize;
 
         intptr_t refStrideC = ref->reconPic->m_strideC;
-        intptr_t refOffset = (qmv.x >> shiftHor) + (qmv.y >> shiftVer) * refStrideC;
+        intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStrideC;
 
         const pixel* refCb = ref->getCbAddr(ctuAddr, absPartIdx) + refOffset;
         const pixel* refCr = ref->getCrAddr(ctuAddr, absPartIdx) + refOffset;
@@ -1225,48 +1225,46 @@
         X265_CHECK((hshift == 0) || (hshift == 1), "hshift must be 0 or 1\n");
         X265_CHECK((vshift == 0) || (vshift == 1), "vshift must be 0 or 1\n");
 
-        xFrac = qmv.x & (hshift ? 7 : 3);
-        yFrac = qmv.y & (vshift ? 7 : 3);
+        xFrac = mvx & 7;
+        yFrac = mvy & 7;
 
         if (!(yFrac | xFrac))
         {
-            cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, refCb, refStrideC);
-            cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, refCr, refStrideC);
+            cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, refCb, refStrideC);
+            cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, refCr, refStrideC);
         }
         else
         {
-            ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
+            int blockwidthC = blockwidth >> hshift;
+
             if (!yFrac)
             {
-                primitives.chroma[csp].pu[partEnum].filter_hpp(refCb, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
-                cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
+                primitives.chroma[csp].pu[partEnum].filter_hpp(refCb, refStrideC, subpelbuf, blockwidthC, xFrac);
+                cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf, blockwidthC);
 
-                primitives.chroma[csp].pu[partEnum].filter_hpp(refCr, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
-                cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
+                primitives.chroma[csp].pu[partEnum].filter_hpp(refCr, refStrideC, subpelbuf, blockwidthC, xFrac);
+                cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf, blockwidthC);
             }
             else if (!xFrac)
             {
-                primitives.chroma[csp].pu[partEnum].filter_vpp(refCb, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
-                cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
+                primitives.chroma[csp].pu[partEnum].filter_vpp(refCb, refStrideC, subpelbuf, blockwidthC, yFrac);
+                cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf, blockwidthC);
 
-                primitives.chroma[csp].pu[partEnum].filter_vpp(refCr, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
-                cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
+                primitives.chroma[csp].pu[partEnum].filter_vpp(refCr, refStrideC, subpelbuf, blockwidthC, yFrac);
+                cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf, blockwidthC);
             }
             else
             {
-                ALIGN_VAR_32(int16_t, immed[64 * (64 + NTAPS_CHROMA)]);
-
-                int extStride = blockwidth >> hshift;
-                int filterSize = NTAPS_CHROMA;
-                int halfFilterSize = (filterSize >> 1);
+                ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
+                const int halfFilterSize = (NTAPS_CHROMA >> 1);
 
-                primitives.chroma[csp].pu[partEnum].filter_hps(refCb, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
-                primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
-                cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
+                primitives.chroma[csp].pu[partEnum].filter_hps(refCb, refStrideC, immed, blockwidthC, xFrac, 1);
+                primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * blockwidthC, blockwidthC, subpelbuf, blockwidthC, yFrac);
+                cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf, blockwidthC);
 
-                primitives.chroma[csp].pu[partEnum].filter_hps(refCr, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
-                primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
-                cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
+                primitives.chroma[csp].pu[partEnum].filter_hps(refCr, refStrideC, immed, blockwidthC, xFrac, 1);
+                primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * blockwidthC, blockwidthC, subpelbuf, blockwidthC, yFrac);
+                cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf, blockwidthC);
             }
         }
     }
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Wed May 04 21:08:09 2016 +0000
+++ b/source/encoder/weightPrediction.cpp	Thu May 12 20:29:21 2016 +0900
@@ -132,25 +132,25 @@
                 intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2);
                 pixel *temp = src + pixoff + fpeloffset;
 
-                int xFrac = mv.x & 0x7;
-                int yFrac = mv.y & 0x7;
-                if ((yFrac | xFrac) == 0)
+                int xFrac = mv.x & 7;
+                int yFrac = mv.y & 7;
+                if (!(yFrac | xFrac))
                 {
                     primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, temp, stride);
                 }
-                else if (yFrac == 0)
+                else if (!yFrac)
                 {
                     primitives.chroma[csp].pu[LUMA_16x16].filter_hpp(temp, stride, mcout + pixoff, stride, xFrac);
                 }
-                else if (xFrac == 0)
+                else if (!xFrac)
                 {
                     primitives.chroma[csp].pu[LUMA_16x16].filter_vpp(temp, stride, mcout + pixoff, stride, yFrac);
                 }
                 else
                 {
-                    ALIGN_VAR_16(int16_t, imm[16 * (16 + NTAPS_CHROMA)]);
-                    primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, imm, bw, xFrac, 1);
-                    primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(imm + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
+                    ALIGN_VAR_16(int16_t, immed[16 * (16 + NTAPS_CHROMA - 1)]);
+                    primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, immed, bw, xFrac, 1);
+                    primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(immed + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
                 }
             }
             else


More information about the x265-devel mailing list