[x265] [PATCH] primitive function for luma and chroma for loops in addAvg()
Steve Borho
steve at borho.org
Sat Nov 16 06:48:02 CET 2013
On Fri, Nov 15, 2013 at 12:13 AM, <dnyaneshwar at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
> # Date 1384495519 -19800
> # Fri Nov 15 11:35:19 2013 +0530
> # Node ID 3c396b064d578b480302e70f52dcc2bf4380f74b
> # Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d
> primitive function for luma and chroma for loops in addAvg()
>
> diff -r c4ca80d19105 -r 3c396b064d57 source/Lib/TLibCommon/TComYuv.cpp
> --- a/source/Lib/TLibCommon/TComYuv.cpp Tue Nov 12 19:10:23 2013 +0530
> +++ b/source/Lib/TLibCommon/TComYuv.cpp Fri Nov 15 11:35:19 2013 +0530
> @@ -590,7 +590,6 @@
>
> void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t
> partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
> {
> - int x, y;
> uint32_t src0Stride, src1Stride, dststride;
> int shiftNum, offset;
>
> @@ -606,6 +605,8 @@
> Pel* dstU = getCbAddr(partUnitIdx);
> Pel* dstV = getCrAddr(partUnitIdx);
>
> + int part = partitionFromSizes(width, height);
>
part needs to be passed into these functions.
> +
> if (bLuma)
> {
> src0Stride = srcYuv0->m_width;
> @@ -614,20 +615,7 @@
> shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
> offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
>
shiftNum and offset are no longer used
>
> - for (y = 0; y < height; y++)
> - {
> - for (x = 0; x < width; x += 4)
> - {
> - dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] +
> offset) >> shiftNum);
> - dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] +
> offset) >> shiftNum);
> - dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] +
> offset) >> shiftNum);
> - dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] +
> offset) >> shiftNum);
> - }
> -
> - srcY0 += src0Stride;
> - srcY1 += src1Stride;
> - dstY += dststride;
> - }
> + primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride,
> srcY1, src1Stride);
> }
> if (bChroma)
> {
> @@ -641,26 +629,8 @@
> width >>= m_hChromaShift;
> height >>= m_vChromaShift;
>
> - for (y = height - 1; y >= 0; y--)
> - {
> - for (x = width - 1; x >= 0; )
> - {
> - // note: chroma min width is 2
> - dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >>
> shiftNum);
> - dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >>
> shiftNum);
> - x--;
> - dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >>
> shiftNum);
> - dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >>
> shiftNum);
> - x--;
> - }
> -
> - srcU0 += src0Stride;
> - srcU1 += src1Stride;
> - srcV0 += src0Stride;
> - srcV1 += src1Stride;
> - dstU += dststride;
> - dstV += dststride;
> - }
> + primitives.chroma_addAvg[part](dstU, dststride, srcU0,
> src0Stride, srcU1, src1Stride);
> + primitives.chroma_addAvg[part](dstV, dststride, srcV0,
> src0Stride, srcV1, src1Stride);
> }
> }
>
> diff -r c4ca80d19105 -r 3c396b064d57 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Tue Nov 12 19:10:23 2013 +0530
> +++ b/source/common/pixel.cpp Fri Nov 15 11:35:19 2013 +0530
> @@ -794,6 +794,27 @@
> a += dstride;
> }
> }
> +
> +template<int bx, int by>
> +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t
> src0Stride, int16_t* src1, intptr_t src1Stride)
> +{
> + int shiftNum, offset;
> + shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
> + offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
> +
> + for (int y = 0; y < by; y++)
> + {
> + for (int x = 0; x < bx; x += 2)
> + {
> + dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) >>
> shiftNum);
> + dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) >>
> shiftNum);
> + }
> +
> + src0 += src0Stride;
> + src1 += src1Stride;
> + dst += dstStride;
> + }
> +}
> } // end anonymous namespace
>
> namespace x265 {
> @@ -835,12 +856,14 @@
> p.satd[LUMA_16x64] = satd8<16, 64>;
>
> #define CHROMA(W, H) \
> + p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvg<W, H>; \
> p.chroma_copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
> p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
>
> #define LUMA(W, H) \
> + p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
> p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
> diff -r c4ca80d19105 -r 3c396b064d57 source/common/primitives.h
> --- a/source/common/primitives.h Tue Nov 12 19:10:23 2013 +0530
> +++ b/source/common/primitives.h Fri Nov 15 11:35:19 2013 +0530
> @@ -208,6 +208,8 @@
>
> typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel
> *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
>
> +typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0,
> intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
> +
> /* Define a structure containing function pointers to optimized encoder
> * primitives. Each pointer can reference either an assembly routine,
> * a vectorized primitive, or a C function. */
> @@ -288,6 +290,9 @@
> var_t var[NUM_LUMA_PARTITIONS];
> ssim_4x4x2_core_t ssim_4x4x2_core;
> plane_copy_deinterleave_t plane_copy_deinterleave_c;
> +
> + addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS];
> + addAvg_t chroma_addAvg[NUM_CHROMA_PARTITIONS];
> };
>
> /* This copy of the table is what gets used by the encoder.
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131115/0e541219/attachment.html>
More information about the x265-devel
mailing list