[x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops

Steve Borho steve at borho.org
Mon Nov 18 22:58:35 CET 2013


This patch series is causing crashes in the encoder on my Mac, discarding for now

On Nov 18, 2013, at 1:14 AM, dnyaneshwar at multicorewareinc.com wrote:

> # HG changeset patch
> # User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
> # Date 1384758687 -19800
> #      Mon Nov 18 12:41:27 2013 +0530
> # Node ID ee062baf96b18ab2ecd64a2e4219b2a5a3c09e5d
> # Parent  e2895ce7bbeb2c3d845fee2578758d0012fa2cb4
> TComYuv::addAvg, primitive function for luma and chroma loops
> 
> diff -r e2895ce7bbeb -r ee062baf96b1 source/Lib/TLibCommon/TComYuv.cpp
> --- a/source/Lib/TLibCommon/TComYuv.cpp	Sun Nov 17 11:24:13 2013 -0600
> +++ b/source/Lib/TLibCommon/TComYuv.cpp	Mon Nov 18 12:41:27 2013 +0530
> @@ -589,9 +589,7 @@
> 
> void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
> {
> -    int x, y;
>     uint32_t src0Stride, src1Stride, dststride;
> -    int shiftNum, offset;
> 
>     int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
>     int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
> @@ -610,29 +608,12 @@
>         src0Stride = srcYuv0->m_width;
>         src1Stride = srcYuv1->m_width;
>         dststride  = getStride();
> -        shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
> -        offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
> 
> -        for (y = 0; y < height; y++)
> -        {
> -            for (x = 0; x < width; x += 4)
> -            {
> -                dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) >> shiftNum);
> -                dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) >> shiftNum);
> -                dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) >> shiftNum);
> -                dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) >> shiftNum);
> -            }
> -
> -            srcY0 += src0Stride;
> -            srcY1 += src1Stride;
> -            dstY  += dststride;
> -        }
> +        int part = partitionFromSizes(width, height);
> +        primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);
>     }
>     if (bChroma)
>     {
> -        shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
> -        offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
> -
>         src0Stride = srcYuv0->m_cwidth;
>         src1Stride = srcYuv1->m_cwidth;
>         dststride  = getCStride();
> @@ -640,26 +621,9 @@
>         width  >>= m_hChromaShift;
>         height >>= m_vChromaShift;
> 
> -        for (y = height - 1; y >= 0; y--)
> -        {
> -            for (x = width - 1; x >= 0; )
> -            {
> -                // note: chroma min width is 2
> -                dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
> -                dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
> -                x--;
> -                dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
> -                dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
> -                x--;
> -            }
> -
> -            srcU0 += src0Stride;
> -            srcU1 += src1Stride;
> -            srcV0 += src0Stride;
> -            srcV1 += src1Stride;
> -            dstU  += dststride;
> -            dstV  += dststride;
> -        }
> +        int part = partitionFromSizes(width, height);
> +        primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);
> +        primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);
>     }
> }
> 
> diff -r e2895ce7bbeb -r ee062baf96b1 source/common/pixel.cpp
> --- a/source/common/pixel.cpp	Sun Nov 17 11:24:13 2013 -0600
> +++ b/source/common/pixel.cpp	Mon Nov 18 12:41:27 2013 +0530
> @@ -794,6 +794,27 @@
>         a += dstride;
>     }
> }
> +
> +template<int bx, int by>
> +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)
> +{
> +    int shiftNum, offset;
> +    shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
> +    offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
> +
> +    for (int y = 0; y < by; y++)
> +    {
> +        for (int x = 0; x < bx; x += 2)
> +        {
> +            dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);
> +            dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);
> +        }
> +
> +        src0 += src0Stride;
> +        src1 += src1Stride;
> +        dst  += dstStride;
> +    }
> +}
> }  // end anonymous namespace
> 
> namespace x265 {
> @@ -835,12 +856,14 @@
>     p.satd[LUMA_16x64] = satd8<16, 64>;
> 
> #define CHROMA(W, H) \
> +    p.chroma_addAvg[CHROMA_ ## W ## x ## H]  = addAvg<W, H>; \
>     p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
>     p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
>     p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
>     p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
> 
> #define LUMA(W, H) \
> +    p.luma_addAvg[LUMA_ ## W ## x ## H]  = addAvg<W, H>; \
>     p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
>     p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
>     p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
> diff -r e2895ce7bbeb -r ee062baf96b1 source/common/primitives.h
> --- a/source/common/primitives.h	Sun Nov 17 11:24:13 2013 -0600
> +++ b/source/common/primitives.h	Mon Nov 18 12:41:27 2013 +0530
> @@ -219,6 +219,8 @@
> 
> typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
> 
> +typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
> +
> /* Define a structure containing function pointers to optimized encoder
>  * primitives.  Each pointer can reference either an assembly routine,
>  * a vectorized primitive, or a C function. */
> @@ -301,6 +303,9 @@
>     var_t           var[NUM_LUMA_PARTITIONS];
>     ssim_4x4x2_core_t ssim_4x4x2_core;
>     plane_copy_deinterleave_t plane_copy_deinterleave_c;
> +
> +    addAvg_t        luma_addAvg[NUM_LUMA_PARTITIONS];
> +    addAvg_t        chroma_addAvg[NUM_CHROMA_PARTITIONS];
> };
> 
> /* This copy of the table is what gets used by the encoder.
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 842 bytes
Desc: Message signed with OpenPGP using GPGMail
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131118/757a130d/attachment.sig>


More information about the x265-devel mailing list