[x265] [PATCH] primivites: remove filter_p2s and move luma_p2s into PU
Rajesh Paulraj
rajesh at multicorewareinc.com
Tue Mar 31 08:16:02 CEST 2015
*This looks highly suspect to me. Why would the destination stride
bedifferent depending on the subpel offsets? You're using MAX_CU_SIZE here**but
dstStride is used everywhere below.*
luma and chroma are using the same C function filterPixelToShort_C. The
dstStride is MAX_CU_SIZE for luma and MAX_CU_SIZE/2 for chroma i420,i422,
MAX_CU_SIZE for i444. For earlier C function we passed
dstStride(MAX_CU_SIZE) directly to C function.
> - p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].chroma_p2s =
pixelToShort_c<MAX_CU_SIZE, W, H>;
now only height and width are passed from here.
This all to make use of same c/asm primitive for luma and chroma
* the 'luma_' prefix is redundant, everything within pu[] is implied to*
* be luma since it is not in chroma[].pu[]*
I will change luma_p2s to convert_p2s everywhere
On Mon, Mar 30, 2015 at 10:59 PM, Steve Borho <steve at borho.org> wrote:
> On 03/30, rajesh at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Rajesh Paulraj<rajesh at multicorewareinc.com>
> > # Date 1427292575 -19800
> > # Wed Mar 25 19:39:35 2015 +0530
> > # Node ID 8797b42373debaab0ef1a2a1f4f8776a69916cfb
> > # Parent 22a312799bb033d40a66fc83a1ac7af192ce2420
> > primivites: remove filter_p2s and move luma_p2s into PU
> >
> > diff -r 22a312799bb0 -r 8797b42373de source/common/ipfilter.cpp
> > --- a/source/common/ipfilter.cpp Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/common/ipfilter.cpp Wed Mar 25 19:39:35 2015 +0530
> > @@ -34,27 +34,8 @@
> > #endif
> >
> > namespace {
> > -template<int dstStride, int width, int height>
> > -void pixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst)
> > -{
> > - int shift = IF_INTERNAL_PREC - X265_DEPTH;
> > - int row, col;
> > -
> > - for (row = 0; row < height; row++)
> > - {
> > - for (col = 0; col < width; col++)
> > - {
> > - int16_t val = src[col] << shift;
> > - dst[col] = val - (int16_t)IF_INTERNAL_OFFS;
> > - }
> > -
> > - src += srcStride;
> > - dst += dstStride;
> > - }
> > -}
> > -
> > -template<int dstStride>
> > -void filterPixelToShort_c(const pixel* src, intptr_t srcStride,
> int16_t* dst, int width, int height)
> > +template<int width, int height>
> > +void filterPixelToShort_c(const pixel* src, intptr_t srcStride,
> int16_t* dst, int16_t dstStride)
> > {
> > int shift = IF_INTERNAL_PREC - X265_DEPTH;
> > int row, col;
> > @@ -398,7 +379,7 @@
> > p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps =
> interp_vert_ps_c<4, W, H>; \
> > p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp =
> interp_vert_sp_c<4, W, H>; \
> > p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss =
> interp_vert_ss_c<4, W, H>; \
> > - p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].chroma_p2s =
> pixelToShort_c<MAX_CU_SIZE / 2, W, H>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].chroma_p2s =
> filterPixelToShort_c<W, H>;
> >
> > #define CHROMA_422(W, H) \
> > p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp =
> interp_horiz_pp_c<4, W, H>; \
> > @@ -407,7 +388,7 @@
> > p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps =
> interp_vert_ps_c<4, W, H>; \
> > p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp =
> interp_vert_sp_c<4, W, H>; \
> > p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss =
> interp_vert_ss_c<4, W, H>; \
> > - p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].chroma_p2s =
> pixelToShort_c<MAX_CU_SIZE / 2, W, H>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].chroma_p2s =
> filterPixelToShort_c<W, H>;
> >
> > #define CHROMA_444(W, H) \
> > p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp =
> interp_horiz_pp_c<4, W, H>; \
> > @@ -416,7 +397,7 @@
> > p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps =
> interp_vert_ps_c<4, W, H>; \
> > p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp =
> interp_vert_sp_c<4, W, H>; \
> > p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss =
> interp_vert_ss_c<4, W, H>; \
> > - p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].chroma_p2s =
> pixelToShort_c<MAX_CU_SIZE, W, H>;
> > + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].chroma_p2s =
> filterPixelToShort_c<W, H>;
> >
> > #define LUMA(W, H) \
> > p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp_horiz_pp_c<8, W,
> H>; \
> > @@ -426,7 +407,7 @@
> > p.pu[LUMA_ ## W ## x ## H].luma_vsp = interp_vert_sp_c<8, W,
> H>; \
> > p.pu[LUMA_ ## W ## x ## H].luma_vss = interp_vert_ss_c<8, W,
> H>; \
> > p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_c<8, W, H>; \
> > - p.pu[LUMA_ ## W ## x ## H].filter_p2s = pixelToShort_c<MAX_CU_SIZE,
> W, H>
> > + p.pu[LUMA_ ## W ## x ## H].luma_p2s = filterPixelToShort_c<W, H>;
> >
> > void setupFilterPrimitives_c(EncoderPrimitives& p)
> > {
> > @@ -530,11 +511,116 @@
> > CHROMA_444(48, 64);
> > CHROMA_444(64, 16);
> > CHROMA_444(16, 64);
> > - p.luma_p2s = filterPixelToShort_c<MAX_CU_SIZE>;
> >
> > - p.chroma[X265_CSP_I444].p2s = filterPixelToShort_c<MAX_CU_SIZE>;
> > - p.chroma[X265_CSP_I420].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
> > - p.chroma[X265_CSP_I422].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
> > + p.pu[LUMA_4x4].luma_p2s = filterPixelToShort_c<4, 4>;
> > + p.pu[LUMA_4x8].luma_p2s = filterPixelToShort_c<4, 8>;
> > + p.pu[LUMA_4x16].luma_p2s = filterPixelToShort_c<4, 16>;
> > + p.pu[LUMA_8x4].luma_p2s = filterPixelToShort_c<8, 4>;
> > + p.pu[LUMA_8x8].luma_p2s = filterPixelToShort_c<8, 8>;
> > + p.pu[LUMA_8x16].luma_p2s = filterPixelToShort_c<8, 16>;
> > + p.pu[LUMA_8x32].luma_p2s = filterPixelToShort_c<8, 32>;
> > + p.pu[LUMA_16x4].luma_p2s = filterPixelToShort_c<16, 4>;
> > + p.pu[LUMA_16x8].luma_p2s = filterPixelToShort_c<16, 8>;
> > + p.pu[LUMA_16x12].luma_p2s = filterPixelToShort_c<16, 12>;
> > + p.pu[LUMA_16x16].luma_p2s = filterPixelToShort_c<16, 16>;
> > + p.pu[LUMA_16x32].luma_p2s = filterPixelToShort_c<16, 32>;
> > + p.pu[LUMA_16x64].luma_p2s = filterPixelToShort_c<16, 64>;
> > + p.pu[LUMA_32x8].luma_p2s = filterPixelToShort_c<32, 8>;
> > + p.pu[LUMA_32x16].luma_p2s = filterPixelToShort_c<32, 16>;
> > + p.pu[LUMA_32x24].luma_p2s = filterPixelToShort_c<32, 24>;
> > + p.pu[LUMA_32x32].luma_p2s = filterPixelToShort_c<32, 32>;
> > + p.pu[LUMA_32x64].luma_p2s = filterPixelToShort_c<32, 64>;
> > + p.pu[LUMA_64x16].luma_p2s = filterPixelToShort_c<64, 16>;
> > + p.pu[LUMA_64x32].luma_p2s = filterPixelToShort_c<64, 32>;
> > + p.pu[LUMA_64x48].luma_p2s = filterPixelToShort_c<64, 48>;
> > + p.pu[LUMA_64x64].luma_p2s = filterPixelToShort_c<64, 64>;
> > + p.pu[LUMA_12x16].luma_p2s = filterPixelToShort_c<12, 16>;
> > + p.pu[LUMA_24x32].luma_p2s = filterPixelToShort_c<24, 32>;
> > + p.pu[LUMA_48x64].luma_p2s = filterPixelToShort_c<48, 64>;
> > +
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_2x2].chroma_p2s =
> filterPixelToShort_c<2, 2>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].chroma_p2s =
> filterPixelToShort_c<4, 4>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].chroma_p2s =
> filterPixelToShort_c<8, 8>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].chroma_p2s =
> filterPixelToShort_c<16, 16>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].chroma_p2s =
> filterPixelToShort_c<32, 32>;
> > +
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].chroma_p2s =
> filterPixelToShort_c<4, 2>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].chroma_p2s =
> filterPixelToShort_c<2, 4>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].chroma_p2s =
> filterPixelToShort_c<8, 4>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].chroma_p2s =
> filterPixelToShort_c<4, 8>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].chroma_p2s =
> filterPixelToShort_c<16, 8>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].chroma_p2s =
> filterPixelToShort_c<8, 16>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].chroma_p2s =
> filterPixelToShort_c<32, 16>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].chroma_p2s =
> filterPixelToShort_c<16, 32>;
> > +
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].chroma_p2s =
> filterPixelToShort_c<8, 6>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].chroma_p2s =
> filterPixelToShort_c<6, 8>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].chroma_p2s =
> filterPixelToShort_c<8, 2>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].chroma_p2s =
> filterPixelToShort_c<2, 8>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].chroma_p2s =
> filterPixelToShort_c<16, 12>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].chroma_p2s =
> filterPixelToShort_c<12, 16>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].chroma_p2s =
> filterPixelToShort_c<16, 4>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].chroma_p2s =
> filterPixelToShort_c<4, 16>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].chroma_p2s =
> filterPixelToShort_c<32, 24>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].chroma_p2s =
> filterPixelToShort_c<24, 32>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].chroma_p2s =
> filterPixelToShort_c<32, 8>;
> > + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].chroma_p2s =
> filterPixelToShort_c<8, 32>;
> > +
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x4].chroma_p2s =
> filterPixelToShort_c<2, 4>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].chroma_p2s =
> filterPixelToShort_c<4, 8>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].chroma_p2s =
> filterPixelToShort_c<8, 16>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].chroma_p2s =
> filterPixelToShort_c<16, 32>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].chroma_p2s =
> filterPixelToShort_c<32, 64>;
> > +
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].chroma_p2s =
> filterPixelToShort_c<4, 4>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].chroma_p2s =
> filterPixelToShort_c<2, 8>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].chroma_p2s =
> filterPixelToShort_c<8, 8>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].chroma_p2s =
> filterPixelToShort_c<4, 16>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].chroma_p2s =
> filterPixelToShort_c<16, 16>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].chroma_p2s =
> filterPixelToShort_c<8, 32>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].chroma_p2s =
> filterPixelToShort_c<32, 32>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].chroma_p2s =
> filterPixelToShort_c<16, 64>;
> > +
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].chroma_p2s =
> filterPixelToShort_c<8, 12>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].chroma_p2s =
> filterPixelToShort_c<6, 16>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].chroma_p2s =
> filterPixelToShort_c<8, 4>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].chroma_p2s =
> filterPixelToShort_c<2, 16>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].chroma_p2s =
> filterPixelToShort_c<16, 24>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].chroma_p2s =
> filterPixelToShort_c<12, 32>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].chroma_p2s =
> filterPixelToShort_c<16, 8>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].chroma_p2s =
> filterPixelToShort_c<4, 32>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].chroma_p2s =
> filterPixelToShort_c<32, 48>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].chroma_p2s =
> filterPixelToShort_c<24, 64>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].chroma_p2s =
> filterPixelToShort_c<32, 16>;
> > + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].chroma_p2s =
> filterPixelToShort_c<8, 64>;
> > +
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_2x2].chroma_p2s =
> filterPixelToShort_c<2, 2>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_4x4].chroma_p2s =
> filterPixelToShort_c<4, 4>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_8x8].chroma_p2s =
> filterPixelToShort_c<8, 8>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_16x16].chroma_p2s =
> filterPixelToShort_c<16, 16>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_32x32].chroma_p2s =
> filterPixelToShort_c<32, 32>;
> > +
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_4x2].chroma_p2s =
> filterPixelToShort_c<4, 2>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_2x4].chroma_p2s =
> filterPixelToShort_c<2, 4>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_8x4].chroma_p2s =
> filterPixelToShort_c<8, 4>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_4x8].chroma_p2s =
> filterPixelToShort_c<4, 8>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_16x8].chroma_p2s =
> filterPixelToShort_c<16, 8>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_8x16].chroma_p2s =
> filterPixelToShort_c<8, 16>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_32x16].chroma_p2s =
> filterPixelToShort_c<32, 16>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_16x32].chroma_p2s =
> filterPixelToShort_c<16, 32>;
> > +
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_8x6].chroma_p2s =
> filterPixelToShort_c<8, 6>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_6x8].chroma_p2s =
> filterPixelToShort_c<6, 8>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_8x2].chroma_p2s =
> filterPixelToShort_c<8, 2>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_2x8].chroma_p2s =
> filterPixelToShort_c<2, 8>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_16x12].chroma_p2s =
> filterPixelToShort_c<16, 12>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_12x16].chroma_p2s =
> filterPixelToShort_c<12, 16>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_16x4].chroma_p2s =
> filterPixelToShort_c<16, 4>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_4x16].chroma_p2s =
> filterPixelToShort_c<4, 16>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_32x24].chroma_p2s =
> filterPixelToShort_c<32, 24>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_24x32].chroma_p2s =
> filterPixelToShort_c<24, 32>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_32x8].chroma_p2s =
> filterPixelToShort_c<32, 8>;
> > + p.chroma[X265_CSP_I444].pu[CHROMA_420_8x32].chroma_p2s =
> filterPixelToShort_c<8, 32>;
> >
> > p.extendRowBorder = extendCURowColBorder;
> > }
> > diff -r 22a312799bb0 -r 8797b42373de source/common/predict.cpp
> > --- a/source/common/predict.cpp Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/common/predict.cpp Wed Mar 25 19:39:35 2015 +0530
> > @@ -288,7 +288,7 @@
> > X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu
> size\n");
> >
> > if (!(yFrac | xFrac))
> > - primitives.luma_p2s(src, srcStride, dst, pu.width, pu.height);
> > + primitives.pu[partEnum].luma_p2s(src, srcStride, dst,
> MAX_CU_SIZE);
>
> This looks highly suspect to me. Why would the destination stride be
> different depending on the subpel offsets? You're using MAX_CU_SIZE here
> but dstStride is used everywhere below.
>
> > else if (!yFrac)
> > primitives.pu[partEnum].luma_hps(src, srcStride, dst,
> dstStride, xFrac, 0);
> > else if (!xFrac)
> > @@ -373,16 +373,23 @@
> > int yFrac = mv.y & ((1 << shiftVer) - 1);
> >
> > int partEnum = partitionFromSizes(pu.width, pu.height);
> > -
> > +
> > uint32_t cxWidth = pu.width >> m_hChromaShift;
> > - uint32_t cxHeight = pu.height >> m_vChromaShift;
> >
> > - X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size
> expected to be multiple of 2\n");
> > + X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0,
> "chroma block size expected to be multiple of 2\n");
> >
> > if (!(yFrac | xFrac))
> > {
> > - primitives.chroma[m_csp].p2s(refCb, refStride, dstCb, cxWidth,
> cxHeight);
> > - primitives.chroma[m_csp].p2s(refCr, refStride, dstCr, cxWidth,
> cxHeight);
> > + if (m_csp != X265_CSP_I444)
> > + {
> > + primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCb,
> refStride, dstCb, MAX_CU_SIZE / 2);
> > + primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCr,
> refStride, dstCr, MAX_CU_SIZE / 2);
> > + }
> > + else
> > + {
> > + primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCb,
> refStride, dstCb, MAX_CU_SIZE);
> > + primitives.chroma[m_csp].pu[partEnum].chroma_p2s(refCr,
> refStride, dstCr, MAX_CU_SIZE);
> > + }
>
> Ditto for everything here
>
> > }
> > else if (!yFrac)
> > {
> > diff -r 22a312799bb0 -r 8797b42373de source/common/primitives.cpp
> > --- a/source/common/primitives.cpp Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/common/primitives.cpp Wed Mar 25 19:39:35 2015 +0530
> > @@ -90,7 +90,6 @@
> >
> > /* alias chroma 4:4:4 from luma primitives (all but chroma filters)
> */
> >
> > - p.chroma[X265_CSP_I444].p2s = p.luma_p2s;
> > p.chroma[X265_CSP_I444].cu[BLOCK_4x4].sa8d = NULL;
> >
> > for (int i = 0; i < NUM_PU_SIZES; i++)
> > @@ -98,7 +97,7 @@
> > p.chroma[X265_CSP_I444].pu[i].copy_pp = p.pu[i].copy_pp;
> > p.chroma[X265_CSP_I444].pu[i].addAvg = p.pu[i].addAvg;
> > p.chroma[X265_CSP_I444].pu[i].satd = p.pu[i].satd;
> > - p.chroma[X265_CSP_I444].pu[i].chroma_p2s = p.pu[i].filter_p2s;
> > + p.chroma[X265_CSP_I444].pu[i].chroma_p2s = p.pu[i].luma_p2s;
> > }
> >
> > for (int i = 0; i < NUM_CU_SIZES; i++)
> > diff -r 22a312799bb0 -r 8797b42373de source/common/primitives.h
> > --- a/source/common/primitives.h Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/common/primitives.h Wed Mar 25 19:39:35 2015 +0530
> > @@ -156,8 +156,7 @@
> > typedef void (*filter_sp_t) (const int16_t* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int coeffIdx);
> > typedef void (*filter_ss_t) (const int16_t* src, intptr_t srcStride,
> int16_t* dst, intptr_t dstStride, int coeffIdx);
> > typedef void (*filter_hv_pp_t) (const pixel* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int idxX, int idxY);
> > -typedef void (*filter_p2s_wxh_t)(const pixel* src, intptr_t srcStride,
> int16_t* dst, int width, int height);
> > -typedef void (*filter_p2s_t)(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > +typedef void (*filter_p2s_t)(const pixel* src, intptr_t srcStride,
> int16_t* dst, int16_t dstStride);
> >
> > typedef void (*copy_pp_t)(pixel* dst, intptr_t dstStride, const pixel*
> src, intptr_t srcStride); // dst is aligned
> > typedef void (*copy_sp_t)(pixel* dst, intptr_t dstStride, const
> int16_t* src, intptr_t srcStride);
> > @@ -211,7 +210,7 @@
> > addAvg_t addAvg; // bidir motion compensation, uses
> 16bit values
> >
> > copy_pp_t copy_pp;
> > - filter_p2s_t filter_p2s;
> > + filter_p2s_t luma_p2s;
> > }
> > pu[NUM_PU_SIZES];
>
> the 'luma_' prefix is redundant, everything within pu[] is implied to
> be luma since it is not in chroma[].pu[]
>
> >
> > @@ -290,7 +289,6 @@
> > weightp_sp_t weight_sp;
> > weightp_pp_t weight_pp;
> >
> > - filter_p2s_wxh_t luma_p2s;
> >
> > findPosLast_t findPosLast;
> >
> > @@ -337,7 +335,6 @@
> > }
> > cu[NUM_CU_SIZES];
> >
> > - filter_p2s_wxh_t p2s; // takes width/height as arguments
> > }
> > chroma[X265_CSP_COUNT];
> > };
> > diff -r 22a312799bb0 -r 8797b42373de source/common/x86/asm-primitives.cpp
> > --- a/source/common/x86/asm-primitives.cpp Fri Mar 27 22:59:30 2015
> -0500
> > +++ b/source/common/x86/asm-primitives.cpp Wed Mar 25 19:39:35 2015
> +0530
> > @@ -859,9 +859,6 @@
> > PIXEL_AVG_W4(mmx2);
> > LUMA_VAR(sse2);
> >
> > - p.luma_p2s = x265_luma_p2s_sse2;
> > - p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_sse2;
> > - p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2;
> >
> > ALL_LUMA_TU(blockfill_s, blockfill_s, sse2);
> > ALL_LUMA_TU_S(cpy1Dto2D_shr, cpy1Dto2D_shr_, sse2);
> > @@ -1249,31 +1246,7 @@
> > ASSIGN_SSE_PP(ssse3);
> > p.cu[BLOCK_4x4].sse_pp = x265_pixel_ssd_4x4_ssse3;
> > p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp =
> x265_pixel_ssd_4x8_ssse3;
> > - p.pu[LUMA_4x4].filter_p2s = x265_pixelToShort_4x4_ssse3;
> > - p.pu[LUMA_4x8].filter_p2s = x265_pixelToShort_4x8_ssse3;
> > - p.pu[LUMA_4x16].filter_p2s = x265_pixelToShort_4x16_ssse3;
> > - p.pu[LUMA_8x4].filter_p2s = x265_pixelToShort_8x4_ssse3;
> > - p.pu[LUMA_8x8].filter_p2s = x265_pixelToShort_8x8_ssse3;
> > - p.pu[LUMA_8x16].filter_p2s = x265_pixelToShort_8x16_ssse3;
> > - p.pu[LUMA_8x32].filter_p2s = x265_pixelToShort_8x32_ssse3;
> > - p.pu[LUMA_16x4].filter_p2s = x265_pixelToShort_16x4_ssse3;
> > - p.pu[LUMA_16x8].filter_p2s = x265_pixelToShort_16x8_ssse3;
> > - p.pu[LUMA_16x12].filter_p2s = x265_pixelToShort_16x12_ssse3;
> > - p.pu[LUMA_16x16].filter_p2s = x265_pixelToShort_16x16_ssse3;
> > - p.pu[LUMA_16x32].filter_p2s = x265_pixelToShort_16x32_ssse3;
> > - p.pu[LUMA_16x64].filter_p2s = x265_pixelToShort_16x64_ssse3;
> > - p.pu[LUMA_32x8].filter_p2s = x265_pixelToShort_32x8_ssse3;
> > - p.pu[LUMA_32x16].filter_p2s = x265_pixelToShort_32x16_ssse3;
> > - p.pu[LUMA_32x24].filter_p2s = x265_pixelToShort_32x24_ssse3;
> > - p.pu[LUMA_32x32].filter_p2s = x265_pixelToShort_32x32_ssse3;
> > - p.pu[LUMA_32x64].filter_p2s = x265_pixelToShort_32x64_ssse3;
> > - p.pu[LUMA_64x16].filter_p2s = x265_pixelToShort_64x16_ssse3;
> > - p.pu[LUMA_64x32].filter_p2s = x265_pixelToShort_64x32_ssse3;
> > - p.pu[LUMA_64x48].filter_p2s = x265_pixelToShort_64x48_ssse3;
> > - p.pu[LUMA_64x64].filter_p2s = x265_pixelToShort_64x64_ssse3;
> >
> > - p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3;
> > - p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3;
> >
> > p.dst4x4 = x265_dst4_ssse3;
> > p.cu[BLOCK_8x8].idct = x265_idct8_ssse3;
> > diff -r 22a312799bb0 -r 8797b42373de source/common/x86/ipfilter8.asm
> > --- a/source/common/x86/ipfilter8.asm Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/common/x86/ipfilter8.asm Wed Mar 25 19:39:35 2015 +0530
> > @@ -7740,320 +7740,6 @@
> > FILTER_V4_W16n_H2 64, 48
> > FILTER_V4_W16n_H2 48, 64
> > FILTER_V4_W16n_H2 64, 16
> >
> -;-----------------------------------------------------------------------------
> > -; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int
> width, int height)
> >
> -;-----------------------------------------------------------------------------
> > -%macro PIXEL_WH_4xN 2
> > -INIT_XMM ssse3
> > -cglobal pixelToShort_%1x%2, 3, 7, 6
> > -
> > - ; load width and height
> > - mov r3d, %1
> > - mov r4d, %2
> > - ; load constant
> > - mova m4, [pb_128]
> > - mova m5, [tab_c_64_n64]
> > -.loopH:
> > - xor r5d, r5d
> > -
> > -.loopW:
> > - mov r6, r0
> > - movh m0, [r6]
> > - punpcklbw m0, m4
> > - pmaddubsw m0, m5
> > -
> > - movh m1, [r6 + r1]
> > - punpcklbw m1, m4
> > - pmaddubsw m1, m5
> > -
> > - movh m2, [r6 + r1 * 2]
> > - punpcklbw m2, m4
> > - pmaddubsw m2, m5
> > -
> > - lea r6, [r6 + r1 * 2]
> > - movh m3, [r6 + r1]
> > - punpcklbw m3, m4
> > - pmaddubsw m3, m5
> > -
> > - add r5, 8
> > - cmp r5, r3
> > - jg .width4
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
> > - je .nextH
> > - jmp .loopW
> > -
> > -.width4:
> > - movh [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
> > - movh [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
> > - movh [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
> > - movh [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
> > -
> > -.nextH:
> > - lea r0, [r0 + r1 * 4]
> > - add r2, FENC_STRIDE * 8
> > -
> > - sub r4d, 4
> > - jnz .loopH
> > - RET
> > -%endmacro
> > -PIXEL_WH_4xN 4, 4
> > -PIXEL_WH_4xN 4, 8
> > -PIXEL_WH_4xN 4, 16
> > -
> >
> -;-----------------------------------------------------------------------------
> > -; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int
> width, int height)
> >
> -;-----------------------------------------------------------------------------
> > -%macro PIXEL_WH_8xN 2
> > -INIT_XMM ssse3
> > -cglobal pixelToShort_%1x%2, 3, 7, 6
> > -
> > - ; load width and height
> > - mov r3d, %1
> > - mov r4d, %2
> > -
> > - ; load constant
> > - mova m4, [pb_128]
> > - mova m5, [tab_c_64_n64]
> > -
> > -.loopH
> > - xor r5d, r5d
> > -.loopW
> > - lea r6, [r0 + r5]
> > -
> > - movh m0, [r6]
> > - punpcklbw m0, m4
> > - pmaddubsw m0, m5
> > -
> > - movh m1, [r6 + r1]
> > - punpcklbw m1, m4
> > - pmaddubsw m1, m5
> > -
> > - movh m2, [r6 + r1 * 2]
> > - punpcklbw m2, m4
> > - pmaddubsw m2, m5
> > -
> > - lea r6, [r6 + r1 * 2]
> > - movh m3, [r6 + r1]
> > - punpcklbw m3, m4
> > - pmaddubsw m3, m5
> > -
> > - add r5, 8
> > - cmp r5, r3
> > -
> > - movu [r2 + FENC_STRIDE * 0], m0
> > - movu [r2 + FENC_STRIDE * 2], m1
> > - movu [r2 + FENC_STRIDE * 4], m2
> > - movu [r2 + FENC_STRIDE * 6], m3
> > -
> > - je .nextH
> > - jmp .loopW
> > -
> > -
> > -.nextH:
> > - lea r0, [r0 + r1 * 4]
> > - add r2, FENC_STRIDE * 8
> > -
> > - sub r4d, 4
> > - jnz .loopH
> > - RET
> > -%endmacro
> > -PIXEL_WH_8xN 8, 8
> > -PIXEL_WH_8xN 8, 4
> > -PIXEL_WH_8xN 8, 16
> > -PIXEL_WH_8xN 8, 32
> > -
> > -
> >
> -;-----------------------------------------------------------------------------
> > -; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int
> width, int height)
> >
> -;-----------------------------------------------------------------------------
> > -%macro PIXEL_WH_16xN 2
> > -INIT_XMM ssse3
> > -cglobal pixelToShort_%1x%2, 3, 7, 6
> > -
> > - ; load width and height
> > - mov r3d, %1
> > - mov r4d, %2
> > -
> > - ; load constant
> > - mova m4, [pb_128]
> > - mova m5, [tab_c_64_n64]
> > -
> > -.loopH:
> > - xor r5d, r5d
> > -.loopW:
> > - lea r6, [r0 + r5]
> > -
> > - movh m0, [r6]
> > - punpcklbw m0, m4
> > - pmaddubsw m0, m5
> > -
> > - movh m1, [r6 + r1]
> > - punpcklbw m1, m4
> > - pmaddubsw m1, m5
> > -
> > - movh m2, [r6 + r1 * 2]
> > - punpcklbw m2, m4
> > - pmaddubsw m2, m5
> > -
> > - lea r6, [r6 + r1 * 2]
> > - movh m3, [r6 + r1]
> > - punpcklbw m3, m4
> > - pmaddubsw m3, m5
> > -
> > - add r5, 8
> > - cmp r5, r3
> > -
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
> > - je .nextH
> > - jmp .loopW
> > -
> > -
> > -.nextH:
> > - lea r0, [r0 + r1 * 4]
> > - add r2, FENC_STRIDE * 8
> > -
> > - sub r4d, 4
> > - jnz .loopH
> > -
> > - RET
> > -%endmacro
> > -PIXEL_WH_16xN 16, 16
> > -PIXEL_WH_16xN 16, 8
> > -PIXEL_WH_16xN 16, 4
> > -PIXEL_WH_16xN 16, 12
> > -PIXEL_WH_16xN 16, 32
> > -PIXEL_WH_16xN 16, 64
> > -
> >
> -;-----------------------------------------------------------------------------
> > -; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int
> width, int height)
> >
> -;-----------------------------------------------------------------------------
> > -%macro PIXEL_WH_32xN 2
> > -INIT_XMM ssse3
> > -cglobal pixelToShort_%1x%2, 3, 7, 6
> > -
> > - ; load width and height
> > - mov r3d, %1
> > - mov r4d, %2
> > -
> > - ; load constant
> > - mova m4, [pb_128]
> > - mova m5, [tab_c_64_n64]
> > -
> > -.loopH:
> > - xor r5d, r5d
> > -.loopW:
> > - lea r6, [r0 + r5]
> > -
> > - movh m0, [r6]
> > - punpcklbw m0, m4
> > - pmaddubsw m0, m5
> > -
> > - movh m1, [r6 + r1]
> > - punpcklbw m1, m4
> > - pmaddubsw m1, m5
> > -
> > - movh m2, [r6 + r1 * 2]
> > - punpcklbw m2, m4
> > - pmaddubsw m2, m5
> > -
> > - lea r6, [r6 + r1 * 2]
> > - movh m3, [r6 + r1]
> > - punpcklbw m3, m4
> > - pmaddubsw m3, m5
> > -
> > - add r5, 8
> > - cmp r5, r3
> > -
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
> > - je .nextH
> > - jmp .loopW
> > -
> > -
> > -.nextH:
> > - lea r0, [r0 + r1 * 4]
> > - add r2, FENC_STRIDE * 8
> > -
> > - sub r4d, 4
> > - jnz .loopH
> > -
> > - RET
> > -%endmacro
> > -PIXEL_WH_32xN 32, 32
> > -PIXEL_WH_32xN 32, 8
> > -PIXEL_WH_32xN 32, 16
> > -PIXEL_WH_32xN 32, 24
> > -PIXEL_WH_32xN 32, 64
> > -
> >
> -;-----------------------------------------------------------------------------
> > -; void pixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int
> width, int height)
> >
> -;-----------------------------------------------------------------------------
> > -%macro PIXEL_WH_64xN 2
> > -INIT_XMM ssse3
> > -cglobal pixelToShort_%1x%2, 3, 7, 6
> > -
> > - ; load width and height
> > - mov r3d, %1
> > - mov r4d, %2
> > -
> > - ; load constant
> > - mova m4, [pb_128]
> > - mova m5, [tab_c_64_n64]
> > -
> > -.loopH:
> > - xor r5d, r5d
> > -.loopW:
> > - lea r6, [r0 + r5]
> > -
> > - movh m0, [r6]
> > - punpcklbw m0, m4
> > - pmaddubsw m0, m5
> > -
> > - movh m1, [r6 + r1]
> > - punpcklbw m1, m4
> > - pmaddubsw m1, m5
> > -
> > - movh m2, [r6 + r1 * 2]
> > - punpcklbw m2, m4
> > - pmaddubsw m2, m5
> > -
> > - lea r6, [r6 + r1 * 2]
> > - movh m3, [r6 + r1]
> > - punpcklbw m3, m4
> > - pmaddubsw m3, m5
> > -
> > - add r5, 8
> > - cmp r5, r3
> > -
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
> > - movu [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
> > - je .nextH
> > - jmp .loopW
> > -
> > -
> > -.nextH:
> > - lea r0, [r0 + r1 * 4]
> > - add r2, FENC_STRIDE * 8
> > -
> > - sub r4d, 4
> > - jnz .loopH
> > -
> > - RET
> > -%endmacro
> > -PIXEL_WH_64xN 64, 64
> > -PIXEL_WH_64xN 64, 16
> > -PIXEL_WH_64xN 64, 32
> > -PIXEL_WH_64xN 64, 48
> >
> > %macro PROCESS_LUMA_W4_4R 0
> > movd m0, [r0]
> > diff -r 22a312799bb0 -r 8797b42373de source/common/x86/ipfilter8.h
> > --- a/source/common/x86/ipfilter8.h Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/common/x86/ipfilter8.h Wed Mar 25 19:39:35 2015 +0530
> > @@ -289,8 +289,6 @@
> > SETUP_CHROMA_420_HORIZ_FUNC_DEF(64, 16, cpu); \
> > SETUP_CHROMA_420_HORIZ_FUNC_DEF(16, 64, cpu)
> >
> > -void x265_chroma_p2s_sse2(const pixel* src, intptr_t srcStride,
> int16_t* dst, int width, int height);
> > -void x265_luma_p2s_sse2(const pixel* src, intptr_t srcStride, int16_t*
> dst, int width, int height);
> >
> > CHROMA_420_VERT_FILTERS(_sse2);
> > CHROMA_420_HORIZ_FILTERS(_sse4);
> > @@ -624,28 +622,6 @@
> > LUMA_SP_FILTERS(_avx2);
> > LUMA_SS_FILTERS(_avx2);
> > void x265_interp_8tap_hv_pp_8x8_sse4(const pixel* src, intptr_t
> srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY);
> > -void x265_pixelToShort_4x4_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_4x8_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_4x16_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_8x4_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_8x8_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_8x16_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_8x32_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_16x4_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_16x8_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_16x12_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_16x16_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_16x32_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_16x64_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_32x8_ssse3(const pixel* src, intptr_t srcStride,
> int16_t* dst);
> > -void x265_pixelToShort_32x16_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_32x24_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_32x32_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_32x64_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_64x16_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_64x32_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_64x48_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > -void x265_pixelToShort_64x64_ssse3(const pixel* src, intptr_t
> srcStride, int16_t* dst);
> > #undef LUMA_FILTERS
> > #undef LUMA_SP_FILTERS
> > #undef LUMA_SS_FILTERS
> > diff -r 22a312799bb0 -r 8797b42373de source/test/ipfilterharness.cpp
> > --- a/source/test/ipfilterharness.cpp Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/test/ipfilterharness.cpp Wed Mar 25 19:39:35 2015 +0530
> > @@ -61,55 +61,6 @@
> > }
> > }
> >
> > -bool IPFilterHarness::check_IPFilter_primitive(filter_p2s_wxh_t ref,
> filter_p2s_wxh_t opt, int isChroma, int csp)
> > -{
> > - intptr_t rand_srcStride;
> > - int min_size = isChroma ? 2 : 4;
> > - int max_size = isChroma ? (MAX_CU_SIZE >> 1) : MAX_CU_SIZE;
> > -
> > - if (isChroma && (csp == X265_CSP_I444))
> > - {
> > - min_size = 4;
> > - max_size = MAX_CU_SIZE;
> > - }
> > -
> > - for (int i = 0; i < ITERS; i++)
> > - {
> > - int index = i % TEST_CASES;
> > - int rand_height = (int16_t)rand() % 100;
> > - int rand_width = (int16_t)rand() % 100;
> > -
> > - rand_srcStride = rand_width + rand() % 100;
> > - if (rand_srcStride < rand_width)
> > - rand_srcStride = rand_width;
> > -
> > - rand_width &= ~(min_size - 1);
> > - rand_width = x265_clip3(min_size, max_size, rand_width);
> > -
> > - rand_height &= ~(min_size - 1);
> > - rand_height = x265_clip3(min_size, max_size, rand_height);
> > -
> > - ref(pixel_test_buff[index],
> > - rand_srcStride,
> > - IPF_C_output_s,
> > - rand_width,
> > - rand_height);
> > -
> > - checked(opt, pixel_test_buff[index],
> > - rand_srcStride,
> > - IPF_vec_output_s,
> > - rand_width,
> > - rand_height);
> > -
> > - if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE *
> sizeof(int16_t)))
> > - return false;
> > -
> > - reportfail();
> > - }
> > -
> > - return true;
> > -}
> > -
> > bool IPFilterHarness::check_IPFilterChroma_primitive(filter_pp_t ref,
> filter_pp_t opt)
> > {
> > intptr_t rand_srcStride, rand_dstStride;
> > @@ -518,12 +469,13 @@
> > {
> > intptr_t rand_srcStride = rand() % 100;
> > int index = i % TEST_CASES;
> > + int16_t dstStride = MAX_CU_SIZE;
> >
> > - ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s);
> > + ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s,
> dstStride);
> >
> > - checked(opt, pixel_test_buff[index] + i, rand_srcStride,
> IPF_vec_output_s);
> > + checked(opt, pixel_test_buff[index] + i, rand_srcStride,
> IPF_vec_output_s, dstStride);
> >
> > - if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE *
> sizeof(pixel)))
> > + if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE *
> sizeof(int16_t)))
> > return false;
> >
> > reportfail();
> > @@ -532,18 +484,24 @@
> > return true;
> > }
> >
> > -bool IPFilterHarness::check_IPFilterChromaP2S_primitive(filter_p2s_t
> ref, filter_p2s_t opt)
> > +bool IPFilterHarness::check_IPFilterChromaP2S_primitive(filter_p2s_t
> ref, filter_p2s_t opt, int csp)
> > {
> > for (int i = 0; i < ITERS; i++)
> > {
> > intptr_t rand_srcStride = rand() % 100;
> > int index = i % TEST_CASES;
> > + int16_t dstStride;
> >
> > - ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s);
> > + if (csp == X265_CSP_I444)
> > + dstStride = MAX_CU_SIZE;
> > + else
> > + dstStride = MAX_CU_SIZE >> (int16_t)(csp != X265_CSP_I444);
> >
> > - checked(opt, pixel_test_buff[index] + i, rand_srcStride,
> IPF_vec_output_s);
> > + ref(pixel_test_buff[index] + i, rand_srcStride, IPF_C_output_s,
> dstStride);
> >
> > - if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE *
> sizeof(pixel)))
> > + checked(opt, pixel_test_buff[index] + i, rand_srcStride,
> IPF_vec_output_s, dstStride);
> > +
> > + if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE *
> sizeof(int16_t)))
> > return false;
> >
> > reportfail();
> > @@ -554,15 +512,6 @@
> >
> > bool IPFilterHarness::testCorrectness(const EncoderPrimitives& ref,
> const EncoderPrimitives& opt)
> > {
> > - if (opt.luma_p2s)
> > - {
> > - // last parameter does not matter in case of luma
> > - if (!check_IPFilter_primitive(ref.luma_p2s, opt.luma_p2s, 0, 1))
> > - {
> > - printf("luma_p2s failed\n");
> > - return false;
> > - }
> > - }
> >
> > for (int value = 0; value < NUM_PU_SIZES; value++)
> > {
> > @@ -622,11 +571,11 @@
> > return false;
> > }
> > }
> > - if (opt.pu[value].filter_p2s)
> > + if (opt.pu[value].luma_p2s)
> > {
> > - if
> (!check_IPFilterLumaP2S_primitive(ref.pu[value].filter_p2s,
> opt.pu[value].filter_p2s))
> > + if
> (!check_IPFilterLumaP2S_primitive(ref.pu[value].luma_p2s,
> opt.pu[value].luma_p2s))
> > {
> > - printf("filter_p2s[%s]", lumaPartStr[value]);
> > + printf("luma_p2s[%s]", lumaPartStr[value]);
> > return false;
> > }
> > }
> > @@ -634,14 +583,6 @@
> >
> > for (int csp = X265_CSP_I420; csp < X265_CSP_COUNT; csp++)
> > {
> > - if (opt.chroma[csp].p2s)
> > - {
> > - if (!check_IPFilter_primitive(ref.chroma[csp].p2s,
> opt.chroma[csp].p2s, 1, csp))
> > - {
> > - printf("chroma_p2s[%s]", x265_source_csp_names[csp]);
> > - return false;
> > - }
> > - }
> > for (int value = 0; value < NUM_PU_SIZES; value++)
> > {
> > if (opt.chroma[csp].pu[value].filter_hpp)
> > @@ -694,7 +635,7 @@
> > }
> > if (opt.chroma[csp].pu[value].chroma_p2s)
> > {
> > - if
> (!check_IPFilterChromaP2S_primitive(ref.chroma[csp].pu[value].chroma_p2s,
> opt.chroma[csp].pu[value].chroma_p2s))
> > + if
> (!check_IPFilterChromaP2S_primitive(ref.chroma[csp].pu[value].chroma_p2s,
> opt.chroma[csp].pu[value].chroma_p2s, csp))
> > {
> > printf("chroma_p2s[%s]", chromaPartStr[csp][value]);
> > return false;
> > @@ -708,19 +649,10 @@
> >
> > void IPFilterHarness::measureSpeed(const EncoderPrimitives& ref, const
> EncoderPrimitives& opt)
> > {
> > - int height = 64;
> > - int width = 64;
> > int16_t srcStride = 96;
> > int16_t dstStride = 96;
> > int maxVerticalfilterHalfDistance = 3;
> >
> > - if (opt.luma_p2s)
> > - {
> > - printf("luma_p2s\t");
> > - REPORT_SPEEDUP(opt.luma_p2s, ref.luma_p2s,
> > - pixel_buff, srcStride, IPF_vec_output_s, width,
> height);
> > - }
> > -
> > for (int value = 0; value < NUM_PU_SIZES; value++)
> > {
> > if (opt.pu[value].luma_hpp)
> > @@ -777,23 +709,18 @@
> > pixel_buff + 3 * srcStride, srcStride,
> IPF_vec_output_p, srcStride, 1, 3);
> > }
> >
> > - if (opt.pu[value].filter_p2s)
> > + if (opt.pu[value].luma_p2s)
> > {
> > - printf("filter_p2s [%s]\t", lumaPartStr[value]);
> > - REPORT_SPEEDUP(opt.pu[value].filter_p2s,
> ref.pu[value].filter_p2s,
> > - pixel_buff, srcStride, IPF_vec_output_s);
> > + printf("luma_p2s[%s]\t", lumaPartStr[value]);
> > + REPORT_SPEEDUP(opt.pu[value].luma_p2s,
> ref.pu[value].luma_p2s,
> > + pixel_buff, srcStride,
> > + IPF_vec_output_s, dstStride);
> > }
> > }
> >
> > for (int csp = X265_CSP_I420; csp < X265_CSP_COUNT; csp++)
> > {
> > printf("= Color Space %s =\n", x265_source_csp_names[csp]);
> > - if (opt.chroma[csp].p2s)
> > - {
> > - printf("chroma_p2s\t");
> > - REPORT_SPEEDUP(opt.chroma[csp].p2s, ref.chroma[csp].p2s,
> > - pixel_buff, srcStride, IPF_vec_output_s,
> width, height);
> > - }
> > for (int value = 0; value < NUM_PU_SIZES; value++)
> > {
> > if (opt.chroma[csp].pu[value].filter_hpp)
> > @@ -836,13 +763,12 @@
> > short_buff +
> maxVerticalfilterHalfDistance * srcStride, srcStride,
> > IPF_vec_output_s, dstStride, 1);
> > }
> > -
> > if (opt.chroma[csp].pu[value].chroma_p2s)
> > {
> > printf("chroma_p2s[%s]\t", chromaPartStr[csp][value]);
> > REPORT_SPEEDUP(opt.chroma[csp].pu[value].chroma_p2s,
> ref.chroma[csp].pu[value].chroma_p2s,
> > pixel_buff, srcStride,
> > - IPF_vec_output_s);
> > + IPF_vec_output_s, dstStride);
> > }
> > }
> > }
> > diff -r 22a312799bb0 -r 8797b42373de source/test/ipfilterharness.h
> > --- a/source/test/ipfilterharness.h Fri Mar 27 22:59:30 2015 -0500
> > +++ b/source/test/ipfilterharness.h Wed Mar 25 19:39:35 2015 +0530
> > @@ -50,7 +50,6 @@
> > pixel pixel_test_buff[TEST_CASES][TEST_BUF_SIZE];
> > int16_t short_test_buff[TEST_CASES][TEST_BUF_SIZE];
> >
> > - bool check_IPFilter_primitive(filter_p2s_wxh_t ref,
> filter_p2s_wxh_t opt, int isChroma, int csp);
> > bool check_IPFilterChroma_primitive(filter_pp_t ref, filter_pp_t
> opt);
> > bool check_IPFilterChroma_ps_primitive(filter_ps_t ref, filter_ps_t
> opt);
> > bool check_IPFilterChroma_hps_primitive(filter_hps_t ref,
> filter_hps_t opt);
> > @@ -63,7 +62,7 @@
> > bool check_IPFilterLuma_ss_primitive(filter_ss_t ref, filter_ss_t
> opt);
> > bool check_IPFilterLumaHV_primitive(filter_hv_pp_t ref,
> filter_hv_pp_t opt);
> > bool check_IPFilterLumaP2S_primitive(filter_p2s_t ref, filter_p2s_t
> opt);
> > - bool check_IPFilterChromaP2S_primitive(filter_p2s_t ref,
> filter_p2s_t opt);
> > + bool check_IPFilterChromaP2S_primitive(filter_p2s_t ref,
> filter_p2s_t opt, int csp);
> >
> > public:
> >
> >
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
> >
>
> --
> Steve Borho
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150331/037e87be/attachment-0001.html>
More information about the x265-devel
mailing list