[x265] [PATCH] primivites: rename luma_p2s to convert_p2s and move into PU

rajesh at multicorewareinc.com rajesh at multicorewareinc.com
Wed Apr 1 15:10:38 CEST 2015


# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1427889433 -19800
#      Wed Apr 01 17:27:13 2015 +0530
# Node ID c26756d8ced6fe69e58b2bb77419b5f975a54de9
# Parent  ac85c775620f1dcb0df056874633cbf916098bd2
primivites: rename luma_p2s to convert_p2s and move into PU

diff -r ac85c775620f -r c26756d8ced6 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/ipfilter.cpp	Wed Apr 01 17:27:13 2015 +0530
@@ -34,27 +34,8 @@
 #endif
 
 namespace {
-template<int dstStride, int width, int height>
-void pixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst)
-{
-    int shift = IF_INTERNAL_PREC - X265_DEPTH;
-    int row, col;
-
-    for (row = 0; row < height; row++)
-    {
-        for (col = 0; col < width; col++)
-        {
-            int16_t val = src[col] << shift;
-            dst[col] = val - (int16_t)IF_INTERNAL_OFFS;
-        }
-
-        src += srcStride;
-        dst += dstStride;
-    }
-}
-
-template<int dstStride>
-void filterPixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height)
+template<int width, int height>
+void filterPixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, int16_t dstStride)
 {
     int shift = IF_INTERNAL_PREC - X265_DEPTH;
     int row, col;
@@ -398,7 +379,7 @@
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>;  \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>;  \
     p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
-    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE / 2, W, H>; 
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].chroma_p2s = filterPixelToShort_c<W, H>;
 
 #define CHROMA_422(W, H) \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
@@ -407,7 +388,7 @@
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>;  \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>;  \
     p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
-    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE / 2, W, H>; 
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].chroma_p2s = filterPixelToShort_c<W, H>;
 
 #define CHROMA_444(W, H) \
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
@@ -416,7 +397,7 @@
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>;  \
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>;  \
     p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
-    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE, W, H>; 
+    p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].chroma_p2s = filterPixelToShort_c<W, H>;
 
 #define LUMA(W, H) \
     p.pu[LUMA_ ## W ## x ## H].luma_hpp     = interp_horiz_pp_c<8, W, H>; \
@@ -426,7 +407,7 @@
     p.pu[LUMA_ ## W ## x ## H].luma_vsp     = interp_vert_sp_c<8, W, H>;  \
     p.pu[LUMA_ ## W ## x ## H].luma_vss     = interp_vert_ss_c<8, W, H>;  \
     p.pu[LUMA_ ## W ## x ## H].luma_hvpp    = interp_hv_pp_c<8, W, H>; \
-    p.pu[LUMA_ ## W ## x ## H].filter_p2s = pixelToShort_c<MAX_CU_SIZE, W, H>
+    p.pu[LUMA_ ## W ## x ## H].convert_p2s = filterPixelToShort_c<W, H>;
 
 void setupFilterPrimitives_c(EncoderPrimitives& p)
 {
@@ -530,11 +511,82 @@
     CHROMA_444(48, 64);
     CHROMA_444(64, 16);
     CHROMA_444(16, 64);
-    p.luma_p2s = filterPixelToShort_c<MAX_CU_SIZE>;
 
-    p.chroma[X265_CSP_I444].p2s = filterPixelToShort_c<MAX_CU_SIZE>;
-    p.chroma[X265_CSP_I420].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
-    p.chroma[X265_CSP_I422].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
+    p.pu[LUMA_4x4].convert_p2s = filterPixelToShort_c<4, 4>;
+    p.pu[LUMA_4x8].convert_p2s = filterPixelToShort_c<4, 8>;
+    p.pu[LUMA_4x16].convert_p2s = filterPixelToShort_c<4, 16>;
+    p.pu[LUMA_8x4].convert_p2s = filterPixelToShort_c<8, 4>;
+    p.pu[LUMA_8x8].convert_p2s = filterPixelToShort_c<8, 8>;
+    p.pu[LUMA_8x16].convert_p2s = filterPixelToShort_c<8, 16>;
+    p.pu[LUMA_8x32].convert_p2s = filterPixelToShort_c<8, 32>;
+    p.pu[LUMA_16x4].convert_p2s = filterPixelToShort_c<16, 4>;
+    p.pu[LUMA_16x8].convert_p2s = filterPixelToShort_c<16, 8>;
+    p.pu[LUMA_16x12].convert_p2s = filterPixelToShort_c<16, 12>;
+    p.pu[LUMA_16x16].convert_p2s = filterPixelToShort_c<16, 16>;
+    p.pu[LUMA_16x32].convert_p2s = filterPixelToShort_c<16, 32>;
+    p.pu[LUMA_16x64].convert_p2s = filterPixelToShort_c<16, 64>;
+    p.pu[LUMA_32x8].convert_p2s = filterPixelToShort_c<32, 8>;
+    p.pu[LUMA_32x16].convert_p2s = filterPixelToShort_c<32, 16>;
+    p.pu[LUMA_32x24].convert_p2s = filterPixelToShort_c<32, 24>;
+    p.pu[LUMA_32x32].convert_p2s = filterPixelToShort_c<32, 32>;
+    p.pu[LUMA_32x64].convert_p2s = filterPixelToShort_c<32, 64>;
+    p.pu[LUMA_64x16].convert_p2s = filterPixelToShort_c<64, 16>;
+    p.pu[LUMA_64x32].convert_p2s = filterPixelToShort_c<64, 32>;
+    p.pu[LUMA_64x48].convert_p2s = filterPixelToShort_c<64, 48>;
+    p.pu[LUMA_64x64].convert_p2s = filterPixelToShort_c<64, 64>;
+    p.pu[LUMA_12x16].convert_p2s = filterPixelToShort_c<12, 16>;
+    p.pu[LUMA_24x32].convert_p2s = filterPixelToShort_c<24, 32>;
+    p.pu[LUMA_48x64].convert_p2s = filterPixelToShort_c<48, 64>;
+
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].chroma_p2s = filterPixelToShort_c<4, 4>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].chroma_p2s = filterPixelToShort_c<8, 8>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].chroma_p2s = filterPixelToShort_c<16, 16>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].chroma_p2s = filterPixelToShort_c<32, 32>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].chroma_p2s = filterPixelToShort_c<4, 2>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].chroma_p2s = filterPixelToShort_c<2, 4>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].chroma_p2s = filterPixelToShort_c<8, 4>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].chroma_p2s = filterPixelToShort_c<4, 8>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].chroma_p2s = filterPixelToShort_c<16, 8>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].chroma_p2s = filterPixelToShort_c<8, 16>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].chroma_p2s = filterPixelToShort_c<32, 16>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].chroma_p2s = filterPixelToShort_c<16, 32>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].chroma_p2s = filterPixelToShort_c<8, 6>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].chroma_p2s = filterPixelToShort_c<6, 8>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].chroma_p2s = filterPixelToShort_c<8, 2>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].chroma_p2s = filterPixelToShort_c<2, 8>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].chroma_p2s = filterPixelToShort_c<16, 12>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].chroma_p2s = filterPixelToShort_c<12, 16>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].chroma_p2s = filterPixelToShort_c<16, 4>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].chroma_p2s = filterPixelToShort_c<4, 16>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].chroma_p2s = filterPixelToShort_c<32, 24>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].chroma_p2s = filterPixelToShort_c<24, 32>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].chroma_p2s = filterPixelToShort_c<32, 8>;
+    p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].chroma_p2s = filterPixelToShort_c<8, 32>;
+
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].chroma_p2s = filterPixelToShort_c<4, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].chroma_p2s = filterPixelToShort_c<8, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].chroma_p2s = filterPixelToShort_c<16, 32>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].chroma_p2s = filterPixelToShort_c<32, 64>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].chroma_p2s = filterPixelToShort_c<4, 4>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].chroma_p2s = filterPixelToShort_c<2, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].chroma_p2s = filterPixelToShort_c<8, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].chroma_p2s = filterPixelToShort_c<4, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].chroma_p2s = filterPixelToShort_c<16, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].chroma_p2s = filterPixelToShort_c<8, 32>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].chroma_p2s = filterPixelToShort_c<32, 32>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].chroma_p2s = filterPixelToShort_c<16, 64>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].chroma_p2s = filterPixelToShort_c<8, 12>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].chroma_p2s = filterPixelToShort_c<6, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].chroma_p2s = filterPixelToShort_c<8, 4>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].chroma_p2s = filterPixelToShort_c<2, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].chroma_p2s = filterPixelToShort_c<16, 24>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].chroma_p2s = filterPixelToShort_c<12, 32>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].chroma_p2s = filterPixelToShort_c<16, 8>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].chroma_p2s = filterPixelToShort_c<4, 32>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].chroma_p2s = filterPixelToShort_c<32, 48>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].chroma_p2s = filterPixelToShort_c<24, 64>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].chroma_p2s = filterPixelToShort_c<32, 16>;
+    p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].chroma_p2s = filterPixelToShort_c<8, 64>;
 
     p.extendRowBorder = extendCURowColBorder;
 }
diff -r ac85c775620f -r c26756d8ced6 source/common/predict.cpp
--- a/source/common/predict.cpp	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/predict.cpp	Wed Apr 01 17:27:13 2015 +0530
@@ -273,7 +273,8 @@
 void Predict::predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
 {
     int16_t* dst = dstSYuv.getLumaAddr(pu.puAbsPartIdx);
-    int dstStride = dstSYuv.m_size;
+    /* get dstStride from predInterLumaShort */
+    int16_t dstStride = MAX_CU_SIZE;
 
     intptr_t srcStride = refPic.m_stride;
     intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
@@ -288,7 +289,7 @@
     X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
 
     if (!(yFrac | xFrac))
-        primitives.luma_p2s(src, srcStride, dst, pu.width, pu.height);
+        primitives.pu[partEnum].convert_p2s(src, srcStride, dst, dstStride);
     else if (!yFrac)
         primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0);
     else if (!xFrac)
@@ -356,7 +357,8 @@
 void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
 {
     intptr_t refStride = refPic.m_strideC;
-    intptr_t dstStride = dstSYuv.m_csize;
+    /* get dstStride from predInterChromaShort */
+    int16_t dstStride = MAX_CU_SIZE / 2;
 
     int shiftHor = (2 + m_hChromaShift);
     int shiftVer = (2 + m_vChromaShift);
@@ -373,16 +375,23 @@
     int yFrac = mv.y & ((1 << shiftVer) - 1);
 
     int partEnum = partitionFromSizes(pu.width, pu.height);
-    
+
     uint32_t cxWidth  = pu.width >> m_hChromaShift;
-    uint32_t cxHeight = pu.height >> m_vChromaShift;
 
-    X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size expected to be multiple of 2\n");
+    X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma block size expected to be multiple of 2\n");
 
     if (!(yFrac | xFrac))
     {
-        primitives.chroma[m_csp].p2s(refCb, refStride, dstCb, cxWidth, cxHeight);
-        primitives.chroma[m_csp].p2s(refCr, refStride, dstCr, cxWidth, cxHeight);
+        if (m_csp != X265_CSP_I444)
+        {
+            primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCb, refStride, dstCb, dstStride);
+            primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCr, refStride, dstCr, dstStride);
+        }
+        else
+        {
+            primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCb, refStride, dstCb, dstStride << 1);
+            primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCr, refStride, dstCr, dstStride << 1);
+        }
     }
     else if (!yFrac)
     {
diff -r ac85c775620f -r c26756d8ced6 source/common/primitives.cpp
--- a/source/common/primitives.cpp	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/primitives.cpp	Wed Apr 01 17:27:13 2015 +0530
@@ -90,7 +90,6 @@
 
     /* alias chroma 4:4:4 from luma primitives (all but chroma filters) */
 
-    p.chroma[X265_CSP_I444].p2s = p.luma_p2s;
     p.chroma[X265_CSP_I444].cu[BLOCK_4x4].sa8d = NULL;
 
     for (int i = 0; i < NUM_PU_SIZES; i++)
@@ -98,7 +97,7 @@
         p.chroma[X265_CSP_I444].pu[i].copy_pp = p.pu[i].copy_pp;
         p.chroma[X265_CSP_I444].pu[i].addAvg  = p.pu[i].addAvg;
         p.chroma[X265_CSP_I444].pu[i].satd    = p.pu[i].satd;
-        p.chroma[X265_CSP_I444].pu[i].chroma_p2s = p.pu[i].filter_p2s;
+        p.chroma[X265_CSP_I444].pu[i].chroma_p2s = p.pu[i].convert_p2s;
     }
 
     for (int i = 0; i < NUM_CU_SIZES; i++)
@@ -185,6 +184,34 @@
     p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sse_pp = p.cu[BLOCK_32x32].sse_pp;
 
     p.chroma[X265_CSP_I422].cu[BLOCK_422_2x4].sse_pp = NULL;
+
+    /* Chroma PU can often use luma P2S primitives */
+    p.chroma[X265_CSP_I444].pu[LUMA_4x4].chroma_p2s = p.pu[LUMA_4x4].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_4x8].chroma_p2s = p.pu[LUMA_4x8].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_4x16].chroma_p2s = p.pu[LUMA_4x16].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_8x4].chroma_p2s = p.pu[LUMA_8x4].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_8x8].chroma_p2s = p.pu[LUMA_8x8].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_8x16].chroma_p2s = p.pu[LUMA_8x16].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_8x32].chroma_p2s = p.pu[LUMA_8x32].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_16x4].chroma_p2s = p.pu[LUMA_16x4].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_16x8].chroma_p2s = p.pu[LUMA_16x8].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_16x12].chroma_p2s = p.pu[LUMA_16x12].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_16x16].chroma_p2s = p.pu[LUMA_16x16].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_16x32].chroma_p2s = p.pu[LUMA_16x32].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_16x64].chroma_p2s = p.pu[LUMA_16x64].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_32x8].chroma_p2s = p.pu[LUMA_32x8].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_32x16].chroma_p2s = p.pu[LUMA_32x16].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_32x24].chroma_p2s = p.pu[LUMA_32x24].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_32x32].chroma_p2s = p.pu[LUMA_32x32].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_32x64].chroma_p2s = p.pu[LUMA_32x64].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_64x16].chroma_p2s = p.pu[LUMA_64x16].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_64x32].chroma_p2s = p.pu[LUMA_64x32].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_64x48].chroma_p2s = p.pu[LUMA_64x48].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_64x64].chroma_p2s = p.pu[LUMA_64x64].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_12x16].chroma_p2s = p.pu[LUMA_12x16].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_24x32].chroma_p2s = p.pu[LUMA_24x32].convert_p2s;
+    p.chroma[X265_CSP_I444].pu[LUMA_48x64].chroma_p2s = p.pu[LUMA_48x64].convert_p2s;
+
 }
 }
 using namespace x265;
diff -r ac85c775620f -r c26756d8ced6 source/common/primitives.h
--- a/source/common/primitives.h	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/primitives.h	Wed Apr 01 17:27:13 2015 +0530
@@ -156,8 +156,7 @@
 typedef void (*filter_sp_t) (const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
 typedef void (*filter_ss_t) (const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
 typedef void (*filter_hv_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
-typedef void (*filter_p2s_wxh_t)(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
-typedef void (*filter_p2s_t)(const pixel* src, intptr_t srcStride, int16_t* dst);
+typedef void (*filter_p2s_t)(const pixel* src, intptr_t srcStride, int16_t* dst, int16_t dstStride);
 
 typedef void (*copy_pp_t)(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride); // dst is aligned
 typedef void (*copy_sp_t)(pixel* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride);
@@ -211,7 +210,7 @@
         addAvg_t       addAvg;      // bidir motion compensation, uses 16bit values
 
         copy_pp_t      copy_pp;
-        filter_p2s_t   filter_p2s;
+        filter_p2s_t   convert_p2s;
     }
     pu[NUM_PU_SIZES];
 
@@ -290,7 +289,6 @@
     weightp_sp_t          weight_sp;
     weightp_pp_t          weight_pp;
 
-    filter_p2s_wxh_t      luma_p2s;
 
     findPosLast_t         findPosLast;
 
@@ -337,7 +335,6 @@
         }
         cu[NUM_CU_SIZES];
 
-        filter_p2s_wxh_t p2s; // takes width/height as arguments
     }
     chroma[X265_CSP_COUNT];
 };
diff -r ac85c775620f -r c26756d8ced6 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Wed Apr 01 17:27:13 2015 +0530
@@ -859,9 +859,6 @@
         PIXEL_AVG_W4(mmx2);
         LUMA_VAR(sse2);
 
-        p.luma_p2s = x265_luma_p2s_sse2;
-        p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_sse2;
-        p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2;
 
         ALL_LUMA_TU(blockfill_s, blockfill_s, sse2);
         ALL_LUMA_TU_S(cpy1Dto2D_shr, cpy1Dto2D_shr_, sse2);
@@ -1249,31 +1246,7 @@
         ASSIGN_SSE_PP(ssse3);
         p.cu[BLOCK_4x4].sse_pp = x265_pixel_ssd_4x4_ssse3;
         p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp = x265_pixel_ssd_4x8_ssse3;
-        p.pu[LUMA_4x4].filter_p2s = x265_pixelToShort_4x4_ssse3;
-        p.pu[LUMA_4x8].filter_p2s = x265_pixelToShort_4x8_ssse3;
-        p.pu[LUMA_4x16].filter_p2s = x265_pixelToShort_4x16_ssse3;
-        p.pu[LUMA_8x4].filter_p2s = x265_pixelToShort_8x4_ssse3;
-        p.pu[LUMA_8x8].filter_p2s = x265_pixelToShort_8x8_ssse3;
-        p.pu[LUMA_8x16].filter_p2s = x265_pixelToShort_8x16_ssse3;
-        p.pu[LUMA_8x32].filter_p2s = x265_pixelToShort_8x32_ssse3;
-        p.pu[LUMA_16x4].filter_p2s = x265_pixelToShort_16x4_ssse3;
-        p.pu[LUMA_16x8].filter_p2s = x265_pixelToShort_16x8_ssse3;
-        p.pu[LUMA_16x12].filter_p2s = x265_pixelToShort_16x12_ssse3;
-        p.pu[LUMA_16x16].filter_p2s = x265_pixelToShort_16x16_ssse3;
-        p.pu[LUMA_16x32].filter_p2s = x265_pixelToShort_16x32_ssse3;
-        p.pu[LUMA_16x64].filter_p2s = x265_pixelToShort_16x64_ssse3;
-        p.pu[LUMA_32x8].filter_p2s = x265_pixelToShort_32x8_ssse3;
-        p.pu[LUMA_32x16].filter_p2s = x265_pixelToShort_32x16_ssse3;
-        p.pu[LUMA_32x24].filter_p2s = x265_pixelToShort_32x24_ssse3;
-        p.pu[LUMA_32x32].filter_p2s = x265_pixelToShort_32x32_ssse3;
-        p.pu[LUMA_32x64].filter_p2s = x265_pixelToShort_32x64_ssse3;
-        p.pu[LUMA_64x16].filter_p2s = x265_pixelToShort_64x16_ssse3;
-        p.pu[LUMA_64x32].filter_p2s = x265_pixelToShort_64x32_ssse3;
-        p.pu[LUMA_64x48].filter_p2s = x265_pixelToShort_64x48_ssse3;
-        p.pu[LUMA_64x64].filter_p2s = x265_pixelToShort_64x64_ssse3;
 
-        p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3;
-        p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3;
 
         p.dst4x4 = x265_dst4_ssse3;
         p.cu[BLOCK_8x8].idct = x265_idct8_ssse3;
diff -r ac85c775620f -r c26756d8ced6 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/x86/ipfilter8.asm	Wed Apr 01 17:27:13 2015 +0530
@@ -7740,320 +7740,6 @@
 FILTER_V4_W16n_H2 64, 48
 FILTER_V4_W16n_H2 48, 64
 FILTER_V4_W16n_H2 64, 16
-;-----------------------------------------------------------------------------
-; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height)
-;-----------------------------------------------------------------------------
-%macro PIXEL_WH_4xN 2
-INIT_XMM ssse3
-cglobal pixelToShort_%1x%2, 3, 7, 6
-
-    ; load width and height
-    mov         r3d, %1
-    mov         r4d, %2
-    ; load constant
-    mova        m4, [pb_128]
-    mova        m5, [tab_c_64_n64]
-.loopH:
-    xor         r5d, r5d
-
-.loopW:
-    mov         r6, r0
-    movh        m0, [r6]
-    punpcklbw   m0, m4
-    pmaddubsw   m0, m5
-
-    movh        m1, [r6 + r1]
-    punpcklbw   m1, m4
-    pmaddubsw   m1, m5
-
-    movh        m2, [r6 + r1 * 2]
-    punpcklbw   m2, m4
-    pmaddubsw   m2, m5
-
-    lea         r6, [r6 + r1 * 2]
-    movh        m3, [r6 + r1]
-    punpcklbw   m3, m4
-    pmaddubsw   m3, m5
-
-    add         r5, 8
-    cmp         r5, r3
-    jg          .width4
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
-    je          .nextH
-    jmp         .loopW
-
-.width4:
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
-
-.nextH:
-    lea         r0, [r0 + r1 * 4]
-    add         r2, FENC_STRIDE * 8
-
-    sub         r4d, 4
-    jnz         .loopH
-    RET
-%endmacro
-PIXEL_WH_4xN 4, 4
-PIXEL_WH_4xN 4, 8
-PIXEL_WH_4xN 4, 16
-
-;-----------------------------------------------------------------------------
-; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height)
-;-----------------------------------------------------------------------------
-%macro PIXEL_WH_8xN 2
-INIT_XMM ssse3
-cglobal pixelToShort_%1x%2, 3, 7, 6
-
-    ; load width and height
-    mov         r3d, %1
-    mov         r4d, %2
-
-    ; load constant
-    mova        m4, [pb_128]
-    mova        m5, [tab_c_64_n64]
-
-.loopH
-    xor         r5d, r5d
-.loopW
-    lea         r6, [r0 + r5]
-
-    movh        m0, [r6]
-    punpcklbw   m0, m4
-    pmaddubsw   m0, m5
-
-    movh        m1, [r6 + r1]
-    punpcklbw   m1, m4
-    pmaddubsw   m1, m5
-
-    movh        m2, [r6 + r1 * 2]
-    punpcklbw   m2, m4
-    pmaddubsw   m2, m5
-
-    lea         r6, [r6 + r1 * 2]
-    movh        m3, [r6 + r1]
-    punpcklbw   m3, m4
-    pmaddubsw   m3, m5
-
-    add         r5, 8
-    cmp         r5, r3
-
-    movu        [r2 + FENC_STRIDE * 0], m0
-    movu        [r2 + FENC_STRIDE * 2], m1
-    movu        [r2 + FENC_STRIDE * 4], m2
-    movu        [r2 + FENC_STRIDE * 6], m3
-
-    je          .nextH
-    jmp         .loopW
-
-
-.nextH:
-    lea         r0, [r0 + r1 * 4]
-    add         r2, FENC_STRIDE * 8
-
-    sub         r4d, 4
-    jnz         .loopH
-    RET
-%endmacro
-PIXEL_WH_8xN 8, 8
-PIXEL_WH_8xN 8, 4
-PIXEL_WH_8xN 8, 16
-PIXEL_WH_8xN 8, 32
-
-
-;-----------------------------------------------------------------------------
-; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height)
-;-----------------------------------------------------------------------------
-%macro PIXEL_WH_16xN 2
-INIT_XMM ssse3
-cglobal pixelToShort_%1x%2, 3, 7, 6
-
-    ; load width and height
-    mov         r3d, %1
-    mov         r4d, %2
-
-    ; load constant
-    mova        m4, [pb_128]
-    mova        m5, [tab_c_64_n64]
-
-.loopH:
-    xor         r5d, r5d
-.loopW:
-    lea         r6, [r0 + r5]
-
-    movh        m0, [r6]
-    punpcklbw   m0, m4
-    pmaddubsw   m0, m5
-
-    movh        m1, [r6 + r1]
-    punpcklbw   m1, m4
-    pmaddubsw   m1, m5
-
-    movh        m2, [r6 + r1 * 2]
-    punpcklbw   m2, m4
-    pmaddubsw   m2, m5
-
-    lea         r6, [r6 + r1 * 2]
-    movh        m3, [r6 + r1]
-    punpcklbw   m3, m4
-    pmaddubsw   m3, m5
-
-    add         r5, 8
-    cmp         r5, r3
-
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
-    je          .nextH
-    jmp         .loopW
-
-
-.nextH:
-    lea         r0, [r0 + r1 * 4]
-    add         r2, FENC_STRIDE * 8
-
-    sub         r4d, 4
-    jnz         .loopH
-
-    RET
-%endmacro
-PIXEL_WH_16xN 16, 16
-PIXEL_WH_16xN 16, 8
-PIXEL_WH_16xN 16, 4
-PIXEL_WH_16xN 16, 12
-PIXEL_WH_16xN 16, 32
-PIXEL_WH_16xN 16, 64
-
-;-----------------------------------------------------------------------------
-; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height)
-;-----------------------------------------------------------------------------
-%macro PIXEL_WH_32xN 2
-INIT_XMM ssse3
-cglobal pixelToShort_%1x%2, 3, 7, 6
-
-    ; load width and height
-    mov         r3d, %1
-    mov         r4d, %2
-
-    ; load constant
-    mova        m4, [pb_128]
-    mova        m5, [tab_c_64_n64]
-
-.loopH:
-    xor         r5d, r5d
-.loopW:
-    lea         r6, [r0 + r5]
-
-    movh        m0, [r6]
-    punpcklbw   m0, m4
-    pmaddubsw   m0, m5
-
-    movh        m1, [r6 + r1]
-    punpcklbw   m1, m4
-    pmaddubsw   m1, m5
-
-    movh        m2, [r6 + r1 * 2]
-    punpcklbw   m2, m4
-    pmaddubsw   m2, m5
-
-    lea         r6, [r6 + r1 * 2]
-    movh        m3, [r6 + r1]
-    punpcklbw   m3, m4
-    pmaddubsw   m3, m5
-
-    add         r5, 8
-    cmp         r5, r3
-
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
-    je          .nextH
-    jmp         .loopW
-
-
-.nextH:
-    lea         r0, [r0 + r1 * 4]
-    add         r2, FENC_STRIDE * 8
-
-    sub         r4d, 4
-    jnz         .loopH
-
-    RET
-%endmacro
-PIXEL_WH_32xN 32, 32
-PIXEL_WH_32xN 32, 8
-PIXEL_WH_32xN 32, 16
-PIXEL_WH_32xN 32, 24
-PIXEL_WH_32xN 32, 64
-
-;-----------------------------------------------------------------------------
-; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height)
-;-----------------------------------------------------------------------------
-%macro PIXEL_WH_64xN 2
-INIT_XMM ssse3
-cglobal pixelToShort_%1x%2, 3, 7, 6
-
-    ; load width and height
-    mov         r3d, %1
-    mov         r4d, %2
-
-    ; load constant
-    mova        m4, [pb_128]
-    mova        m5, [tab_c_64_n64]
-
-.loopH:
-    xor         r5d, r5d
-.loopW:
-    lea         r6, [r0 + r5]
-
-    movh        m0, [r6]
-    punpcklbw   m0, m4
-    pmaddubsw   m0, m5
-
-    movh        m1, [r6 + r1]
-    punpcklbw   m1, m4
-    pmaddubsw   m1, m5
-
-    movh        m2, [r6 + r1 * 2]
-    punpcklbw   m2, m4
-    pmaddubsw   m2, m5
-
-    lea         r6, [r6 + r1 * 2]
-    movh        m3, [r6 + r1]
-    punpcklbw   m3, m4
-    pmaddubsw   m3, m5
-
-    add         r5, 8
-    cmp         r5, r3
-
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
-    je          .nextH
-    jmp         .loopW
-
-
-.nextH:
-    lea         r0, [r0 + r1 * 4]
-    add         r2, FENC_STRIDE * 8
-
-    sub         r4d, 4
-    jnz         .loopH
-
-    RET
-%endmacro
-PIXEL_WH_64xN 64, 64
-PIXEL_WH_64xN 64, 16
-PIXEL_WH_64xN 64, 32
-PIXEL_WH_64xN 64, 48
 
 %macro PROCESS_LUMA_W4_4R 0
     movd        m0, [r0]
diff -r ac85c775620f -r c26756d8ced6 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/x86/ipfilter8.h	Wed Apr 01 17:27:13 2015 +0530
@@ -289,8 +289,6 @@
     SETUP_CHROMA_420_HORIZ_FUNC_DEF(64, 16, cpu); \
     SETUP_CHROMA_420_HORIZ_FUNC_DEF(16, 64, cpu)
 
-void x265_chroma_p2s_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
-void x265_luma_p2s_sse2(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height);
 
 CHROMA_420_VERT_FILTERS(_sse2);
 CHROMA_420_HORIZ_FILTERS(_sse4);
@@ -624,28 +622,6 @@
 LUMA_SP_FILTERS(_avx2);
 LUMA_SS_FILTERS(_avx2);
 void x265_interp_8tap_hv_pp_8x8_sse4(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
-void x265_pixelToShort_4x4_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_4x8_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_4x16_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_8x4_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_8x8_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_8x16_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_8x32_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_16x4_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_16x8_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_16x12_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_16x16_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_16x32_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_16x64_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_32x8_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_32x16_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_32x24_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_32x32_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_32x64_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_64x16_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_64x32_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_64x48_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
-void x265_pixelToShort_64x64_ssse3(const pixel* src, intptr_t srcStride, int16_t* dst);
 #undef LUMA_FILTERS
 #undef LUMA_SP_FILTERS
 #undef LUMA_SS_FILTERS
diff -r ac85c775620f -r c26756d8ced6 source/test/ipfilterharness.cpp
--- a/source/test/ipfilterharness.cpp	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/test/ipfilterharness.cpp	Wed Apr 01 17:27:13 2015 +0530
@@ -61,55 +61,6 @@
     }
 }
 
-bool IPFilterHarness::check_IPFilter_primitive(filter_p2s_wxh_t ref, filter_p2s_wxh_t opt, int isChroma, int csp)
-{
-    intptr_t rand_srcStride;
-    int min_size = isChroma ? 2 : 4;
-    int max_size = isChroma ? (MAX_CU_SIZE >> 1) : MAX_CU_SIZE;
-
-    if (isChroma && (csp == X265_CSP_I444))
-    {
-        min_size = 4;
-        max_size = MAX_CU_SIZE;
-    }
-
-    for (int i = 0; i < ITERS; i++)
-    {
-        int index = i % TEST_CASES;
-        int rand_height = (int16_t)rand() % 100;
-        int rand_width = (int16_t)rand() % 100;
-
-        rand_srcStride = rand_width + rand() % 100;
-        if (rand_srcStride < rand_width)
-            rand_srcStride = rand_width;
-
-        rand_width &= ~(min_size - 1);
-        rand_width = x265_clip3(min_size, max_size, rand_width);
-
-        rand_height &= ~(min_size - 1);
-        rand_height = x265_clip3(min_size, max_size, rand_height);
-
-        ref(pixel_test_buff[index],
-            rand_srcStride,
-            IPF_C_output_s,
-            rand_width,
-            rand_height);
-
-        checked(opt, pixel_test_buff[index],
-                rand_srcStride,
-                IPF_vec_output_s,
-                rand_width,
-                rand_height);
-
-        if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(int16_t)))
-            return false;
-
-        reportfail();
-    }
-
-    return true;
-}
-
 bool IPFilterHarness::check_IPFilterChroma_primitive(filter_pp_t ref, filter_pp_t opt)
 {
     intptr_t rand_srcStride, rand_dstStride;
@@ -518,12 +469,13 @@
     {
         intptr_t rand_srcStride = rand() % 100;
         int index = i % TEST_CASES;
+        int16_t dstStride = MAX_CU_SIZE;
 
-        ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s);
+        ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s, dstStride);
 
-        checked(opt, pixel_test_buff[index] + i, rand_srcStride, IPF_vec_output_s);
+        checked(opt, pixel_test_buff[index] + i, rand_srcStride, IPF_vec_output_s, dstStride);
 
-        if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(pixel)))
+        if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(int16_t)))
             return false;
 
         reportfail();
@@ -532,18 +484,24 @@
     return true;
 }
 
-bool IPFilterHarness::check_IPFilterChromaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt)
+bool IPFilterHarness::check_IPFilterChromaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt, int csp)
 {
     for (int i = 0; i < ITERS; i++)
     {
         intptr_t rand_srcStride = rand() % 100;
         int index = i % TEST_CASES;
+        int16_t dstStride;
 
-        ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s);
+        if (csp == X265_CSP_I444)
+            dstStride = MAX_CU_SIZE;
+        else
+            dstStride = MAX_CU_SIZE >> (int16_t)(csp != X265_CSP_I444);
 
-        checked(opt, pixel_test_buff[index] + i, rand_srcStride, IPF_vec_output_s);
+        ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s, dstStride);
 
-        if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(pixel)))
+        checked(opt, pixel_test_buff[index] + i, rand_srcStride, IPF_vec_output_s, dstStride);
+
+        if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(int16_t)))
             return false;
 
         reportfail();
@@ -554,15 +512,6 @@
 
 bool IPFilterHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
 {
-    if (opt.luma_p2s)
-    {
-        // last parameter does not matter in case of luma
-        if (!check_IPFilter_primitive(ref.luma_p2s, opt.luma_p2s, 0, 1))
-        {
-            printf("luma_p2s failed\n");
-            return false;
-        }
-    }
 
     for (int value = 0; value < NUM_PU_SIZES; value++)
     {
@@ -622,11 +571,11 @@
                 return false;
             }
         }
-        if (opt.pu[value].filter_p2s)
+        if (opt.pu[value].convert_p2s)
         {
-            if (!check_IPFilterLumaP2S_primitive(ref.pu[value].filter_p2s, opt.pu[value].filter_p2s))
+            if (!check_IPFilterLumaP2S_primitive(ref.pu[value].convert_p2s, opt.pu[value].convert_p2s))
             {
-                printf("filter_p2s[%s]", lumaPartStr[value]);
+                printf("convert_p2s[%s]", lumaPartStr[value]);
                 return false;
             }
         }
@@ -634,14 +583,6 @@
 
     for (int csp = X265_CSP_I420; csp < X265_CSP_COUNT; csp++)
     {
-        if (opt.chroma[csp].p2s)
-        {
-            if (!check_IPFilter_primitive(ref.chroma[csp].p2s, opt.chroma[csp].p2s, 1, csp))
-            {
-                printf("chroma_p2s[%s]", x265_source_csp_names[csp]);
-                return false;
-            }
-        }
         for (int value = 0; value < NUM_PU_SIZES; value++)
         {
             if (opt.chroma[csp].pu[value].filter_hpp)
@@ -694,7 +635,7 @@
             }
             if (opt.chroma[csp].pu[value].chroma_p2s)
             {
-                if (!check_IPFilterChromaP2S_primitive(ref.chroma[csp].pu[value].chroma_p2s, opt.chroma[csp].pu[value].chroma_p2s))
+                if (!check_IPFilterChromaP2S_primitive(ref.chroma[csp].pu[value].chroma_p2s, opt.chroma[csp].pu[value].chroma_p2s, csp))
                 {
                     printf("chroma_p2s[%s]", chromaPartStr[csp][value]);
                     return false;
@@ -708,19 +649,10 @@
 
 void IPFilterHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
 {
-    int height = 64;
-    int width = 64;
     int16_t srcStride = 96;
     int16_t dstStride = 96;
     int maxVerticalfilterHalfDistance = 3;
 
-    if (opt.luma_p2s)
-    {
-        printf("luma_p2s\t");
-        REPORT_SPEEDUP(opt.luma_p2s, ref.luma_p2s,
-                       pixel_buff, srcStride, IPF_vec_output_s, width, height);
-    }
-
     for (int value = 0; value < NUM_PU_SIZES; value++)
     {
         if (opt.pu[value].luma_hpp)
@@ -777,23 +709,18 @@
                            pixel_buff + 3 * srcStride, srcStride, IPF_vec_output_p, srcStride, 1, 3);
         }
 
-        if (opt.pu[value].filter_p2s)
+        if (opt.pu[value].convert_p2s)
         {
-            printf("filter_p2s [%s]\t", lumaPartStr[value]);
-            REPORT_SPEEDUP(opt.pu[value].filter_p2s, ref.pu[value].filter_p2s,
-                           pixel_buff, srcStride, IPF_vec_output_s);
+            printf("convert_p2s[%s]\t", lumaPartStr[value]);
+            REPORT_SPEEDUP(opt.pu[value].convert_p2s, ref.pu[value].convert_p2s,
+                               pixel_buff, srcStride,
+                               IPF_vec_output_s, dstStride);
         }
     }
 
     for (int csp = X265_CSP_I420; csp < X265_CSP_COUNT; csp++)
     {
         printf("= Color Space %s =\n", x265_source_csp_names[csp]);
-        if (opt.chroma[csp].p2s)
-        {
-            printf("chroma_p2s\t");
-            REPORT_SPEEDUP(opt.chroma[csp].p2s, ref.chroma[csp].p2s,
-                           pixel_buff, srcStride, IPF_vec_output_s, width, height);
-        }
         for (int value = 0; value < NUM_PU_SIZES; value++)
         {
             if (opt.chroma[csp].pu[value].filter_hpp)
@@ -836,13 +763,12 @@
                                short_buff + maxVerticalfilterHalfDistance * srcStride, srcStride,
                                IPF_vec_output_s, dstStride, 1);
             }
-
             if (opt.chroma[csp].pu[value].chroma_p2s)
             {
                 printf("chroma_p2s[%s]\t", chromaPartStr[csp][value]);
                 REPORT_SPEEDUP(opt.chroma[csp].pu[value].chroma_p2s, ref.chroma[csp].pu[value].chroma_p2s,
                                pixel_buff, srcStride,
-                               IPF_vec_output_s);
+                               IPF_vec_output_s, dstStride);
             }
         }
     }
diff -r ac85c775620f -r c26756d8ced6 source/test/ipfilterharness.h
--- a/source/test/ipfilterharness.h	Tue Mar 31 20:04:28 2015 -0500
+++ b/source/test/ipfilterharness.h	Wed Apr 01 17:27:13 2015 +0530
@@ -50,7 +50,6 @@
     pixel   pixel_test_buff[TEST_CASES][TEST_BUF_SIZE];
     int16_t short_test_buff[TEST_CASES][TEST_BUF_SIZE];
 
-    bool check_IPFilter_primitive(filter_p2s_wxh_t ref, filter_p2s_wxh_t opt, int isChroma, int csp);
     bool check_IPFilterChroma_primitive(filter_pp_t ref, filter_pp_t opt);
     bool check_IPFilterChroma_ps_primitive(filter_ps_t ref, filter_ps_t opt);
     bool check_IPFilterChroma_hps_primitive(filter_hps_t ref, filter_hps_t opt);
@@ -63,7 +62,7 @@
     bool check_IPFilterLuma_ss_primitive(filter_ss_t ref, filter_ss_t opt);
     bool check_IPFilterLumaHV_primitive(filter_hv_pp_t ref, filter_hv_pp_t opt);
     bool check_IPFilterLumaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt);
-    bool check_IPFilterChromaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt);
+    bool check_IPFilterChromaP2S_primitive(filter_p2s_t ref, filter_p2s_t opt, int csp);
 
 public:
 



More information about the x265-devel mailing list