[x265] [PATCH] asm: removed unused code in pixel_var module

Murugan Vairavel murugan at multicorewareinc.com
Tue Nov 26 08:49:09 CET 2013


Ignore this patch. Need some modifications in C code.



On Tue, Nov 26, 2013 at 12:45 PM, <murugan at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Murugan Vairavel <murugan at multicorewareinc.com>
> # Date 1385450061 -19800
> #      Tue Nov 26 12:44:21 2013 +0530
> # Node ID e866b2f9fcd2d4004e968243f18be1fa2a6c87a9
> # Parent  9e9767a887e3a91c0953b9bfa17c2f34f03ecf11
> asm: removed unused code in pixel_var module
>
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/pixel.cpp
> --- a/source/common/pixel.cpp   Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/pixel.cpp   Tue Nov 26 12:44:21 2013 +0530
> @@ -968,17 +968,8 @@
>      p.ssim_4x4x2_core = ssim_4x4x2_core;
>      p.ssim_end_4 = ssim_end_4;
>
> -    p.var[LUMA_8x4] = pixel_var<8, 4>;
>      p.var[LUMA_8x8] = pixel_var<8, 8>;
> -    p.var[LUMA_8x16] = pixel_var<8, 16>;
> -    p.var[LUMA_8x32] = pixel_var<8, 32>;
> -    p.var[LUMA_16x4] = pixel_var<16, 4>;
> -    p.var[LUMA_16x8] = pixel_var<16, 8>;
> -    p.var[LUMA_16x12] = pixel_var<16, 12>;
>      p.var[LUMA_16x16] = pixel_var<16, 16>;
> -    p.var[LUMA_16x32] = pixel_var<16, 32>;
> -    p.var[LUMA_16x64] = pixel_var<16, 64>;
> -
>      p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
>  }
>  }
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/primitives.h
> --- a/source/common/primitives.h        Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/primitives.h        Tue Nov 26 12:44:21 2013 +0530
> @@ -268,7 +268,7 @@
>      calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];
>      transpose_t     transpose[NUM_SQUARE_BLOCKS];
>
> -    var_t           var[NUM_LUMA_PARTITIONS];
> +    var_t           var[NUM_SQUARE_BLOCKS];
>      ssim_4x4x2_core_t ssim_4x4x2_core;
>      ssim_end4_t     ssim_end_4;
>
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Mon Nov 25 19:28:33 2013
> +0530
> +++ b/source/common/x86/asm-primitives.cpp      Tue Nov 26 12:44:21 2013
> +0530
> @@ -416,16 +416,8 @@
>      p.var[LUMA_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
>
>  #define LUMA_VAR(cpu) \
> -    SETUP_PIXEL_VAR_DEF(8,   4, cpu); \
>      SETUP_PIXEL_VAR_DEF(8,   8, cpu); \
> -    SETUP_PIXEL_VAR_DEF(8,  16, cpu); \
> -    SETUP_PIXEL_VAR_DEF(8,  32, cpu); \
> -    SETUP_PIXEL_VAR_DEF(16,  4, cpu); \
> -    SETUP_PIXEL_VAR_DEF(16,  8, cpu); \
> -    SETUP_PIXEL_VAR_DEF(16, 12, cpu); \
> -    SETUP_PIXEL_VAR_DEF(16, 16, cpu); \
> -    SETUP_PIXEL_VAR_DEF(16, 32, cpu); \
> -    SETUP_PIXEL_VAR_DEF(16, 64, cpu);
> +    SETUP_PIXEL_VAR_DEF(16, 16, cpu);
>
>  namespace x265 {
>  // private x265 namespace
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm     Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/x86/pixel-a.asm     Tue Nov 26 12:44:21 2013 +0530
> @@ -1254,12 +1254,6 @@
>      VAR_2ROW 8*SIZEOF_PIXEL, 16
>      VAR_END 16, 16
>
> -cglobal pixel_var_8x16, 2,3
> -    FIX_STRIDES r1
> -    VAR_START 0
> -    VAR_2ROW r1, 8
> -    VAR_END 8, 16
> -
>  cglobal pixel_var_8x8, 2,3
>      FIX_STRIDES r1
>      VAR_START 0
> @@ -1301,18 +1295,6 @@
>
>  %if HIGH_BIT_DEPTH == 0
>  %macro VAR 0
> -cglobal pixel_var_8x4, 2,3,8
> -    VAR_START 1
> -    lea       r2,    [r1 * 3]
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    VAR_END 8, 4
> -
>  cglobal pixel_var_8x8, 2,3,8
>      VAR_START 1
>      lea       r2,    [r1 * 3]
> @@ -1331,142 +1313,6 @@
>      VAR_CORE
>      VAR_END 8, 8
>
> -
> -cglobal pixel_var_8x16, 2,4,8
> -    VAR_START 1
> -    lea       r2,    [r1 * 3]
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    VAR_END 8, 16
> -
> -cglobal pixel_var_8x32, 2,4,8
> -    VAR_START 1
> -    mov       r2d,   2
> -    lea       r3,    [r1 * 3]
> -.loop:
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    movh      m0,    [r0]
> -    movh      m3,    [r0 + r1]
> -    movhps    m0,    [r0 + r1 * 2]
> -    movhps    m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    dec    r2d
> -    jnz    .loop
> -    VAR_END 8, 32
> -
> -cglobal pixel_var_16x4, 2,3,8
> -    VAR_START 1
> -    lea       r2,    [r1 * 3]
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    VAR_END 16, 4
> -
> -cglobal pixel_var_16x8, 2,3,8
> -    VAR_START 1
> -    lea       r2,    [r1 * 3]
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    VAR_END 16, 8
> -
> -cglobal pixel_var_16x12, 2,3,8
> -    VAR_START 1
> -    lea       r2,    [r1 * 3]
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r2]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    VAR_END 16, 12
> -
>  cglobal pixel_var_16x16, 2,3,8
>      VAR_START 1
>      lea       r2,    [r1 * 3]
> @@ -1506,96 +1352,6 @@
>      DEINTB    1, 0, 4, 3, 7
>      VAR_CORE
>      VAR_END 16, 16
> -
> -cglobal pixel_var_16x32, 2,4,8
> -    VAR_START 1
> -    mov       r2d,   2
> -    lea       r3,    [r1 * 3]
> -.loop:
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    dec    r2d
> -    jg    .loop
> -    VAR_END 16, 32
> -
> -cglobal pixel_var_16x64, 2,4,8
> -    VAR_START 1
> -    mov       r2d,   4
> -    lea       r3,    [r1 * 3]
> -.loop:
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    mova      m0,    [r0]
> -    mova      m3,    [r0 + r1]
> -    DEINTB    1, 0, 4, 3, 7
> -    VAR_CORE
> -    mova      m0,    [r0 + 2 * r1]
> -    mova      m3,    [r0 + r3]
> -    DEINTB    1, 0, 4, 3, 7
> -    lea       r0,    [r0 + r1 * 4]
> -    VAR_CORE
> -    dec    r2d
> -    jg    .loop
> -    VAR_END 16, 64
>  %endmacro ; VAR
>
>  INIT_XMM sse2
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/common/x86/pixel.h Tue Nov 26 12:44:21 2013 +0530
> @@ -351,16 +351,8 @@
>      uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(pixel *pix, intptr_t
> pixstride);
>
>  #define LUMA_PIXELVAR_DEF(cpu) \
> -    SETUP_LUMA_PIXELVAR_FUNC(8,   4, cpu); \
>      SETUP_LUMA_PIXELVAR_FUNC(8,   8, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(8,  16, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(8,  32, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(16,  4, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(16,  8, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(16, 12, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(16, 32, cpu); \
> -    SETUP_LUMA_PIXELVAR_FUNC(16, 48, cpu);
> +    SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu);
>
>  LUMA_PIXELVAR_DEF(_sse2);
>
> diff -r 9e9767a887e3 -r e866b2f9fcd2 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp      Mon Nov 25 19:28:33 2013 +0530
> +++ b/source/test/pixelharness.cpp      Tue Nov 26 12:44:21 2013 +0530
> @@ -777,15 +777,6 @@
>          }
>      }
>
> -    if (opt.var[part])
> -    {
> -        if (!check_pixel_var(ref.var[part], opt.var[part]))
> -        {
> -            printf("var[%s]: failed!\n", lumaPartStr[part]);
> -            return false;
> -        }
> -    }
> -
>      for(int i = 0; i < X265_CSP_COUNT; i++)
>      {
>          if (opt.chroma[i].copy_pp[part])
> @@ -905,6 +896,15 @@
>                  return false;
>              }
>          }
> +
> +    if (opt.var[i])
> +    {
> +        if (!check_pixel_var(ref.var[i], opt.var[i]))
> +        {
> +            printf("var[%dx%d] failed\n", 4 << i, 4 << i);
> +            return false;
> +        }
> +    }
>      }
>
>      if (opt.cvt32to16_shr)
> @@ -1080,12 +1080,6 @@
>          REPORT_SPEEDUP(opt.luma_add_ps[part], ref.luma_add_ps[part],
> pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
>      }
>
> -    if (opt.var[part])
> -    {
> -        HEADER("var[%s]", lumaPartStr[part]);
> -        REPORT_SPEEDUP(opt.var[part], ref.var[part], pbuf1, STRIDE);
> -    }
> -
>      for (int i = 0; i < X265_CSP_COUNT; i++)
>      {
>          if (opt.chroma[i].copy_pp[part])
> @@ -1179,6 +1173,12 @@
>              HEADER("transpose[%dx%d]", 4 << i, 4 << i);
>              REPORT_SPEEDUP(opt.transpose[i], ref.transpose[i], pbuf1,
> pbuf2, STRIDE);
>          }
> +
> +        if (opt.var[i])
> +        {
> +            HEADER("var[%dx%d]", 4 << i, 4 << i);
> +            REPORT_SPEEDUP(opt.var[i], ref.var[i], pbuf1, STRIDE);
> +        }
>      }
>
>      if (opt.cvt32to16_shr)
>



-- 
With Regards,

Murugan. V
+919659287478
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131126/08fb0384/attachment-0001.html>


More information about the x265-devel mailing list