[x265] [PATCH 1 of 2] primitives: added C primitives to compute SSIM

Aarthi Priya Thirumalai aarthi at multicorewareinc.com
Sat Oct 5 06:07:41 CEST 2013


I understand. If High_Bit)Depth = 1, you could still choose uint32 version
for the primitives  but in ssim_end_1(int..) you might get an overflow when
bitdepth > 9, as mentioned in the comments there. I could use a 64bit int
there for all cases to store intermediary calculations , if you want to
remove those if conditions.


On Sat, Oct 5, 2013 at 12:53 AM, Steve Borho <steve at borho.org> wrote:

>
>
>
> On Fri, Oct 4, 2013 at 6:21 AM, Aarthi Thirumalai <
> aarthi at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Aarthi Thirumalai
>> # Date 1380885375 -19800
>> #      Fri Oct 04 16:46:15 2013 +0530
>> # Node ID 92641f3d3195b8da2275cfc44b1921d8f81a54bc
>> # Parent  bf14f75b8cf99806c75cdc1a50b28b6cf265e3bd
>> primitives: added C primitives to compute SSIM
>>
>> diff -r bf14f75b8cf9 -r 92641f3d3195 source/common/pixel.cpp
>> --- a/source/common/pixel.cpp   Fri Oct 04 01:39:22 2013 -0500
>> +++ b/source/common/pixel.cpp   Fri Oct 04 16:46:15 2013 +0530
>> @@ -653,6 +653,85 @@
>>      }
>>  }
>>
>> +/* structural similarity metric */
>> +template<class T1>
>> +void ssim_4x4x2_core(const pixel *pix1, intptr_t stride1, const pixel
>> *pix2, intptr_t stride2, T1 sums[2][4])
>> +{
>> +    for (int z = 0; z < 2; z++)
>> +    {
>> +        T1 s1 = 0, s2 = 0, ss = 0, s12 = 0;
>> +        for (int y = 0; y < 4; y++)
>> +        {
>> +            for (int x = 0; x < 4; x++)
>> +            {
>> +                T1 a = pix1[x + y * stride1];
>> +                T1 b = pix2[x + y * stride2];
>> +                s1 += a;
>> +                s2 += b;
>> +                ss += a * a;
>> +                ss += b * b;
>> +                s12 += a * b;
>> +            }
>> +        }
>> +
>> +        sums[z][0] = s1;
>> +        sums[z][1] = s2;
>> +        sums[z][2] = ss;
>> +        sums[z][3] = s12;
>> +        pix1 += 4;
>> +        pix2 += 4;
>> +    }
>> +}
>> +
>> +template<class T1>
>> +float ssim_end_4(T1 sum0[5][4], T1 sum1[5][4], int width)
>> +{
>> +    float ssim = 0.0;
>> +
>> +    for (int i = 0; i < width; i++)
>> +    {
>> +        ssim += ssim_end_1(sum0[i][0] + sum0[i + 1][0] + sum1[i][0] +
>> sum1[i + 1][0],
>> +                           sum0[i][1] + sum0[i + 1][1] + sum1[i][1] +
>> sum1[i + 1][1],
>> +                           sum0[i][2] + sum0[i + 1][2] + sum1[i][2] +
>> sum1[i + 1][2],
>> +                           sum0[i][3] + sum0[i + 1][3] + sum1[i][3] +
>> sum1[i + 1][3]);
>> +    }
>> +
>> +    return ssim;
>> +}
>> +
>> +float ssim_end_1(int s1, int s2, int ss, int s12)
>> +{
>> +    static const uint32_t pixelMax = (1 << X265_DEPTH) - 1;
>> +
>> +    /* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 =
>> 4286582784, which will overflow in some cases.
>> +    * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases:
>> ((2^10-1)*16*4)^2 = 4286582784.
>> +    * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 =
>> 1069551616, which will not overflow. */
>> +
>> +#if X265_DEPTH > 9
>> +#define type int64_t
>> +#else
>> +#define type int32_t
>> +#endif
>>
>
> We keep cycling on this issue.
>
> With HIGH_BIT_DEPTH=1, X265_DEPTH evaluates to g_bitDepth (a global
> variable)
> With HIGH_BIT_DEPTH=0, X265_DEPTH evaluates to 8
>
> So with high bit depth builds the bit depth is a runtime option (it could
> be 8, 10, or 12).  For 8bpp builds the bit depth must be 8.
>
> HIGH_BIT_DEPTH=0 builds will probably always use the uint32 version of
> this function.  The HIGH_BIT_DEPTH=1 builds will have to choose between
> uint32 or float at runtime.
>
>
>> +
>> +    static const type ssim_c1 = (type)(.01 * .01 * pixelMax * pixelMax *
>> 64 + .5);
>> +    static const type ssim_c2 = (type)(.03 * .03 * pixelMax * pixelMax *
>> 64 * 63 + .5);
>> +    type vars = ss * 64 - s1 * s1 - s2 * s2;
>> +    type covar = s12 * 64 - s1 * s2;
>> +    return (float)(2 * s1 * s2 + ssim_c1) * (float)(2 * covar + ssim_c2)
>> +           / ((float)(s1 * s1 + s2 * s2 + ssim_c1) * (float)(vars +
>> ssim_c2));
>> +}
>> +
>> +float ssim_end_1(float s1, float s2, float ss, float s12)
>> +{
>> +    static const float pixelMax = (1 << X265_DEPTH) - 1;
>> +    static const float ssim_c1 = (float)(.01 * .01 * pixelMax * pixelMax
>> * 64);
>> +    static const float ssim_c2 = (float)(.03 * .03 * pixelMax * pixelMax
>> * 64 * 63);
>> +    float vars = ss * 64 - s1 * s1 - s2 * s2;
>> +    float covar = s12 * 64 - s1 * s2;
>> +
>> +    return (2 * s1 * s2 + ssim_c1) * (2 * covar + ssim_c2)
>> +           / ((s1 * s1 + s2 * s2 + ssim_c1) * (vars + ssim_c2));
>> +}
>>  }  // end anonymous namespace
>>
>>  namespace x265 {
>> @@ -870,5 +949,10 @@
>>      p.scale1D_128to64 = scale1D_128to64;
>>      p.scale2D_64to32 = scale2D_64to32;
>>      p.frame_init_lowres_core = frame_init_lowres_core;
>> +
>> +    p.ssim_4x4x2_core_float = ssim_4x4x2_core<float>;
>> +    p.ssim_4x4x2_core_int   = ssim_4x4x2_core<int>;
>> +    p.ssim_end4_float       = ssim_end_4<float>;
>> +    p.ssim_end4_int         = ssim_end_4<int>;
>>  }
>>  }
>> diff -r bf14f75b8cf9 -r 92641f3d3195 source/common/primitives.h
>> --- a/source/common/primitives.h        Fri Oct 04 01:39:22 2013 -0500
>> +++ b/source/common/primitives.h        Fri Oct 04 16:46:15 2013 +0530
>> @@ -235,6 +235,10 @@
>>  typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
>>  typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel
>> *dstv, pixel *dstc,
>>                              intptr_t src_stride, intptr_t dst_stride,
>> int width, int height);
>> +typedef void (*ssim_4x4x2_core_int_t)(const pixel *pix1, intptr_t
>> stride1, const pixel *pix2, intptr_t stride2, int sums[2][4]);
>> +typedef void (*ssim_4x4x2_core_float_t)(const pixel *pix1, intptr_t
>> stride1, const pixel *pix2, intptr_t stride2, float sums[2][4]);
>> +typedef float (*ssim_end4_int_t)(int sum0[5][4], int sum1[5][4], int
>> width);
>> +typedef float (*ssim_end4_float_t)(float sum0[5][4], float sum1[5][4],
>> int width);
>>
>>  /* Define a structure containing function pointers to optimized encoder
>>   * primitives.  Each pointer can reference either an assembly routine,
>> @@ -301,6 +305,13 @@
>>      scale_t         scale1D_128to64;
>>      scale_t         scale2D_64to32;
>>      downscale_t     frame_init_lowres_core;
>> +
>> +/* If the pixel depth >15 , use the ssim_float primitives to prevent
>> overflow, else
>> + * ssim_int primitves should be sufficient. */
>> +    ssim_4x4x2_core_int_t     ssim_4x4x2_core_int;
>> +    ssim_4x4x2_core_float_t   ssim_4x4x2_core_float;
>> +    ssim_end4_int_t           ssim_end4_int;
>> +    ssim_end4_float_t         ssim_end4_float;
>>  };
>>
>>  /* This copy of the table is what gets used by the encoder.
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
>
> --
> Steve Borho
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131005/20abbc3e/attachment-0001.html>


More information about the x265-devel mailing list