[x265] [PATCH] psy-rd: ported hadamard_ac transform function to x265 for uniform blocksizes(8, 16, 32, 64)

Sumalatha Polureddy sumalatha at multicorewareinc.com
Fri Mar 14 19:22:13 CET 2014


will apply our white space coding style and resend it again

Regards
Sumalatha


On Fri, Mar 14, 2014 at 10:48 PM, Steve Borho <steve at borho.org> wrote:

> On Thu, Mar 13, 2014 at 6:08 AM,  <sumalatha at multicorewareinc.com> wrote:
> > # HG changeset patch
> > # User Sumalatha Polureddy
> > # Date 1394708875 -19800
> > # Node ID dae8674085419080a8bdfd102ed416621a23f164
> > # Parent  879151f659622ace9ddecec8d6e1a631849e2a04
> > psy-rd: ported hadamard_ac transform function to x265 for uniform
> blocksizes(8,16,32,64)
> >
> > diff -r 879151f65962 -r dae867408541 source/common/pixel.cpp
> > --- a/source/common/pixel.cpp   Thu Mar 13 03:25:06 2014 -0500
> > +++ b/source/common/pixel.cpp   Thu Mar 13 16:37:55 2014 +0530
> > @@ -852,6 +852,70 @@
> >          dst  += dstStride;
> >      }
> >  }
>
> these functions do not follow x265's white-space coding style
>
> > +
> > +uint64_t pixel_hadamard_ac_8x8( pixel *pix, intptr_t stride )
> > +{
> > +    sum2_t tmp[32];
> > +    sum2_t a0, a1, a2, a3, dc;
> > +    sum2_t sum4 = 0, sum8 = 0;
> > +    for( int i = 0; i < 8; i++, pix+=stride )
> > +    {
> > +        sum2_t *t = tmp + (i&3) + (i&4)*4;
> > +        a0 = (pix[0] + pix[1]) + ((sum2_t)(pix[0] - pix[1]) <<
> BITS_PER_SUM);
> > +        a1 = (pix[2] + pix[3]) + ((sum2_t)(pix[2] - pix[3]) <<
> BITS_PER_SUM);
> > +        t[0] = a0 + a1;
> > +        t[4] = a0 - a1;
> > +        a2 = (pix[4] + pix[5]) + ((sum2_t)(pix[4] - pix[5]) <<
> BITS_PER_SUM);
> > +        a3 = (pix[6] + pix[7]) + ((sum2_t)(pix[6] - pix[7]) <<
> BITS_PER_SUM);
> > +        t[8] = a2 + a3;
> > +        t[12] = a2 - a3;
> > +    }
> > +    for( int i = 0; i < 8; i++ )
> > +    {
> > +        HADAMARD4(a0, a1, a2, a3, tmp[i * 4 + 0], tmp[i * 4 + 1], tmp[i
> * 4 + 2], tmp[i * 4 + 3]);
> > +        tmp[i * 4 + 0] = a0;
> > +        tmp[i * 4 + 1] = a1;
> > +        tmp[i * 4 + 2] = a2;
> > +        tmp[i * 4 + 3] = a3;
> > +        sum4 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
> > +    }
> > +    for( int i = 0; i < 8; i++ )
> > +    {
> > +        HADAMARD4(a0, a1, a2, a3, tmp[i], tmp[8 + i], tmp[16 + i],
> tmp[24 + i]);
> > +        sum8 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
> > +    }
> > +    dc = (sum_t)(tmp[0] + tmp[8] + tmp[16] + tmp[24]);
> > +    sum4 = (sum_t)sum4 + (sum4 >> BITS_PER_SUM) - dc;
> > +    sum8 = (sum_t)sum8 + (sum8 >> BITS_PER_SUM) - dc;
> > +    return ((uint64_t)sum8 << 32) + sum4;
> > +}
> > +
> > +uint64_t pixel_hadamard_ac_16x16( pixel *pix, intptr_t stride )
> > +{
> > +    uint64_t sum = pixel_hadamard_ac_8x8( pix, stride );
> > +    sum += pixel_hadamard_ac_8x8( pix + 8, stride );
> > +    sum += pixel_hadamard_ac_8x8( pix + 8 * stride, stride );
> > +    sum += pixel_hadamard_ac_8x8( pix + 8 * stride + 8, stride );
> > +    return ((sum >> 34) << 32) + ((uint32_t)sum >> 1);
> > +}
> > +uint64_t pixel_hadamard_ac_32x32( pixel *pix, intptr_t stride )
> > +{
> > +    uint64_t sum = pixel_hadamard_ac_16x16( pix, stride );
> > +    sum += pixel_hadamard_ac_16x16( pix + 16, stride );
> > +    sum += pixel_hadamard_ac_16x16( pix + 16 * stride, stride );
> > +    sum += pixel_hadamard_ac_16x16( pix + 16 * stride + 16, stride );
> > +    return ((sum >> 34) << 32) + ((uint32_t)sum >> 1);
> > +}
> > +
> > +uint64_t pixel_hadamard_ac_64x64( pixel *pix, intptr_t stride )
> > +{
> > +    uint64_t sum = pixel_hadamard_ac_32x32( pix, stride );
> > +    sum += pixel_hadamard_ac_32x32( pix + 32, stride );
> > +    sum += pixel_hadamard_ac_32x32( pix + 32 * stride, stride );
> > +    sum += pixel_hadamard_ac_32x32( pix + 32 * stride + 16, stride );
> > +    return ((sum >> 34) << 32) + ((uint32_t)sum >> 1);
> > +}
> > +
> >  }  // end anonymous namespace
> >
> >  namespace x265 {
> > @@ -1099,5 +1163,11 @@
> >      p.var[BLOCK_32x32] = pixel_var<32>;
> >      p.var[BLOCK_64x64] = pixel_var<64>;
> >      p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
> > +
> > +    p.pixel_hadamard_ac[LUMA_8x8] = pixel_hadamard_ac_8x8;
> > +    p.pixel_hadamard_ac[LUMA_16x16] = pixel_hadamard_ac_16x16;
> > +    p.pixel_hadamard_ac[LUMA_32x32] = pixel_hadamard_ac_32x32;
> > +    p.pixel_hadamard_ac[LUMA_64x64] = pixel_hadamard_ac_64x64;
> > +
> >  }
> >  }
> > diff -r 879151f65962 -r dae867408541 source/common/primitives.h
> > --- a/source/common/primitives.h        Thu Mar 13 03:25:06 2014 -0500
> > +++ b/source/common/primitives.h        Thu Mar 13 16:37:55 2014 +0530
> > @@ -163,6 +163,7 @@
> >  typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst,
> intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
> >
> >  typedef void (*saoCuOrgE0_t)(pixel * rec, int8_t * offsetEo, int
> lcuWidth, int8_t signLeft);
> > +typedef uint64_t (*pixel_hadamard_ac_t)( pixel *pix, intptr_t stride );
> >
> >  /* Define a structure containing function pointers to optimized encoder
> >   * primitives.  Each pointer can reference either an assembly routine,
> > @@ -234,6 +235,8 @@
> >      // sao primitives
> >      saoCuOrgE0_t      saoCuOrgE0;
> >
> > +    pixel_hadamard_ac_t pixel_hadamard_ac[NUM_LUMA_PARTITIONS];
> > +
> >      struct
> >      {
> >          filter_pp_t     filter_vpp[NUM_LUMA_PARTITIONS];
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
>
>
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140314/bf17731d/attachment-0001.html>


More information about the x265-devel mailing list