[x265] [PATCH] primivites: rename luma_p2s to convert_p2s and move into PU
Steve Borho
steve at borho.org
Thu Apr 2 07:50:33 CEST 2015
On 04/02, chen wrote:
>
>
>
> At 2015-04-01 23:31:39,"Steve Borho" <steve at borho.org> wrote:
> >On 04/01, rajesh at multicorewareinc.com wrote:
> >> # HG changeset patch
> >> # User Rajesh Paulraj<rajesh at multicorewareinc.com>
> >> # Date 1427889433 -19800
> >> # Wed Apr 01 17:27:13 2015 +0530
> >> # Node ID c26756d8ced6fe69e58b2bb77419b5f975a54de9
> >> # Parent ac85c775620f1dcb0df056874633cbf916098bd2
> >> primivites: rename luma_p2s to convert_p2s and move into PU
> >>
> >> diff -r ac85c775620f -r c26756d8ced6 source/common/ipfilter.cpp
> >> --- a/source/common/ipfilter.cpp Tue Mar 31 20:04:28 2015 -0500
> >> +++ b/source/common/ipfilter.cpp Wed Apr 01 17:27:13 2015 +0530
> >> @@ -34,27 +34,8 @@
> >> #endif
> >>
> >> namespace {
> >> -template<int dstStride, int width, int height>
> >> -void pixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst)
> >> -{
> >> - int shift = IF_INTERNAL_PREC - X265_DEPTH;
> >> - int row, col;
> >> -
> >> - for (row = 0; row < height; row++)
> >> - {
> >> - for (col = 0; col < width; col++)
> >> - {
> >> - int16_t val = src[col] << shift;
> >> - dst[col] = val - (int16_t)IF_INTERNAL_OFFS;
> >> - }
> >> -
> >> - src += srcStride;
> >> - dst += dstStride;
> >> - }
> >> -}
> >> -
> >> -template<int dstStride>
> >> -void filterPixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, int width, int height)
> >> +template<int width, int height>
> >> +void filterPixelToShort_c(const pixel* src, intptr_t srcStride, int16_t* dst, int16_t dstStride)
> >> {
> >> int shift = IF_INTERNAL_PREC - X265_DEPTH;
> >> int row, col;
> >> @@ -398,7 +379,7 @@
> >> p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
> >> p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
> >> p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
> >> - p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE / 2, W, H>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].chroma_p2s = filterPixelToShort_c<W, H>;
> >>
> >> #define CHROMA_422(W, H) \
> >> p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
> >> @@ -407,7 +388,7 @@
> >> p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
> >> p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
> >> p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
> >> - p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE / 2, W, H>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].chroma_p2s = filterPixelToShort_c<W, H>;
> >>
> >> #define CHROMA_444(W, H) \
> >> p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
> >> @@ -416,7 +397,7 @@
> >> p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
> >> p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
> >> p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>; \
> >> - p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].chroma_p2s = pixelToShort_c<MAX_CU_SIZE, W, H>;
> >> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].chroma_p2s = filterPixelToShort_c<W, H>;
> >>
> >> #define LUMA(W, H) \
> >> p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp_horiz_pp_c<8, W, H>; \
> >> @@ -426,7 +407,7 @@
> >> p.pu[LUMA_ ## W ## x ## H].luma_vsp = interp_vert_sp_c<8, W, H>; \
> >> p.pu[LUMA_ ## W ## x ## H].luma_vss = interp_vert_ss_c<8, W, H>; \
> >> p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_c<8, W, H>; \
> >> - p.pu[LUMA_ ## W ## x ## H].filter_p2s = pixelToShort_c<MAX_CU_SIZE, W, H>
> >> + p.pu[LUMA_ ## W ## x ## H].convert_p2s = filterPixelToShort_c<W, H>;
> >>
> >> void setupFilterPrimitives_c(EncoderPrimitives& p)
> >> {
> >> @@ -530,11 +511,82 @@
> >> CHROMA_444(48, 64);
> >> CHROMA_444(64, 16);
> >> CHROMA_444(16, 64);
> >> - p.luma_p2s = filterPixelToShort_c<MAX_CU_SIZE>;
> >>
> >> - p.chroma[X265_CSP_I444].p2s = filterPixelToShort_c<MAX_CU_SIZE>;
> >> - p.chroma[X265_CSP_I420].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
> >> - p.chroma[X265_CSP_I422].p2s = filterPixelToShort_c<MAX_CU_SIZE / 2>;
> >> + p.pu[LUMA_4x4].convert_p2s = filterPixelToShort_c<4, 4>;
> >> + p.pu[LUMA_4x8].convert_p2s = filterPixelToShort_c<4, 8>;
> >> + p.pu[LUMA_4x16].convert_p2s = filterPixelToShort_c<4, 16>;
> >> + p.pu[LUMA_8x4].convert_p2s = filterPixelToShort_c<8, 4>;
> >> + p.pu[LUMA_8x8].convert_p2s = filterPixelToShort_c<8, 8>;
> >> + p.pu[LUMA_8x16].convert_p2s = filterPixelToShort_c<8, 16>;
> >> + p.pu[LUMA_8x32].convert_p2s = filterPixelToShort_c<8, 32>;
> >> + p.pu[LUMA_16x4].convert_p2s = filterPixelToShort_c<16, 4>;
> >> + p.pu[LUMA_16x8].convert_p2s = filterPixelToShort_c<16, 8>;
> >> + p.pu[LUMA_16x12].convert_p2s = filterPixelToShort_c<16, 12>;
> >> + p.pu[LUMA_16x16].convert_p2s = filterPixelToShort_c<16, 16>;
> >> + p.pu[LUMA_16x32].convert_p2s = filterPixelToShort_c<16, 32>;
> >> + p.pu[LUMA_16x64].convert_p2s = filterPixelToShort_c<16, 64>;
> >> + p.pu[LUMA_32x8].convert_p2s = filterPixelToShort_c<32, 8>;
> >> + p.pu[LUMA_32x16].convert_p2s = filterPixelToShort_c<32, 16>;
> >> + p.pu[LUMA_32x24].convert_p2s = filterPixelToShort_c<32, 24>;
> >> + p.pu[LUMA_32x32].convert_p2s = filterPixelToShort_c<32, 32>;
> >> + p.pu[LUMA_32x64].convert_p2s = filterPixelToShort_c<32, 64>;
> >> + p.pu[LUMA_64x16].convert_p2s = filterPixelToShort_c<64, 16>;
> >> + p.pu[LUMA_64x32].convert_p2s = filterPixelToShort_c<64, 32>;
> >> + p.pu[LUMA_64x48].convert_p2s = filterPixelToShort_c<64, 48>;
> >> + p.pu[LUMA_64x64].convert_p2s = filterPixelToShort_c<64, 64>;
> >> + p.pu[LUMA_12x16].convert_p2s = filterPixelToShort_c<12, 16>;
> >> + p.pu[LUMA_24x32].convert_p2s = filterPixelToShort_c<24, 32>;
> >> + p.pu[LUMA_48x64].convert_p2s = filterPixelToShort_c<48, 64>;
> >
> >aren't there macros for instantiating templates per PU?
> >
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].chroma_p2s = filterPixelToShort_c<4, 4>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].chroma_p2s = filterPixelToShort_c<8, 8>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].chroma_p2s = filterPixelToShort_c<16, 16>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].chroma_p2s = filterPixelToShort_c<32, 32>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].chroma_p2s = filterPixelToShort_c<4, 2>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].chroma_p2s = filterPixelToShort_c<2, 4>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].chroma_p2s = filterPixelToShort_c<8, 4>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].chroma_p2s = filterPixelToShort_c<4, 8>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].chroma_p2s = filterPixelToShort_c<16, 8>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].chroma_p2s = filterPixelToShort_c<8, 16>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].chroma_p2s = filterPixelToShort_c<32, 16>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].chroma_p2s = filterPixelToShort_c<16, 32>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].chroma_p2s = filterPixelToShort_c<8, 6>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].chroma_p2s = filterPixelToShort_c<6, 8>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].chroma_p2s = filterPixelToShort_c<8, 2>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].chroma_p2s = filterPixelToShort_c<2, 8>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].chroma_p2s = filterPixelToShort_c<16, 12>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].chroma_p2s = filterPixelToShort_c<12, 16>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].chroma_p2s = filterPixelToShort_c<16, 4>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].chroma_p2s = filterPixelToShort_c<4, 16>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].chroma_p2s = filterPixelToShort_c<32, 24>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].chroma_p2s = filterPixelToShort_c<24, 32>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].chroma_p2s = filterPixelToShort_c<32, 8>;
> >> + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].chroma_p2s = filterPixelToShort_c<8, 32>;
> >> +
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].chroma_p2s = filterPixelToShort_c<4, 8>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].chroma_p2s = filterPixelToShort_c<8, 16>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].chroma_p2s = filterPixelToShort_c<16, 32>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].chroma_p2s = filterPixelToShort_c<32, 64>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].chroma_p2s = filterPixelToShort_c<4, 4>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].chroma_p2s = filterPixelToShort_c<2, 8>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].chroma_p2s = filterPixelToShort_c<8, 8>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].chroma_p2s = filterPixelToShort_c<4, 16>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].chroma_p2s = filterPixelToShort_c<16, 16>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].chroma_p2s = filterPixelToShort_c<8, 32>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].chroma_p2s = filterPixelToShort_c<32, 32>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].chroma_p2s = filterPixelToShort_c<16, 64>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].chroma_p2s = filterPixelToShort_c<8, 12>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].chroma_p2s = filterPixelToShort_c<6, 16>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].chroma_p2s = filterPixelToShort_c<8, 4>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].chroma_p2s = filterPixelToShort_c<2, 16>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].chroma_p2s = filterPixelToShort_c<16, 24>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].chroma_p2s = filterPixelToShort_c<12, 32>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].chroma_p2s = filterPixelToShort_c<16, 8>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].chroma_p2s = filterPixelToShort_c<4, 32>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].chroma_p2s = filterPixelToShort_c<32, 48>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].chroma_p2s = filterPixelToShort_c<24, 64>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].chroma_p2s = filterPixelToShort_c<32, 16>;
> >> + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].chroma_p2s = filterPixelToShort_c<8, 64>;
> >>
> >> p.extendRowBorder = extendCURowColBorder;
> >> }
> >> diff -r ac85c775620f -r c26756d8ced6 source/common/predict.cpp
> >> --- a/source/common/predict.cpp Tue Mar 31 20:04:28 2015 -0500
> >> +++ b/source/common/predict.cpp Wed Apr 01 17:27:13 2015 +0530
> >> @@ -273,7 +273,8 @@
> >> void Predict::predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
> >> {
> >> int16_t* dst = dstSYuv.getLumaAddr(pu.puAbsPartIdx);
> >> - int dstStride = dstSYuv.m_size;
> >> + /* get dstStride from predInterLumaShort */
> >> + int16_t dstStride = MAX_CU_SIZE;
> >
> >no, this is not right. the stride of the destination buffer is its
> >m_size. I'm not reviewing any more of this.
> >
>
> There have a "X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");"
> so I suggest him replace dstStride by MAX_CU_SIZE, but him forgot replace X265_CHECK one to dstSYuv.m_size
the output stride has to be the stride of the YUV buffer. I don't know
how it ever worked before.
--
Steve Borho
More information about the x265-devel
mailing list