[x265] [PATCH] asm: removed duplicate and redundant constants

Steve Borho steve at borho.org
Tue May 19 17:18:34 CEST 2015


On 05/19, dnyaneshwar at multicorewareinc.com wrote:
> # HG changeset patch
> # User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
> # Date 1432037856 -19800
> #      Tue May 19 17:47:36 2015 +0530
> # Node ID e6fc4b6f16b32debf4a252b47ad6fc9c82364188
> # Parent  b44cdf8dc08c77e84b8707992cd0006bbf23d864
> asm: removed duplicate and redundant constants

this one did apply, queued

> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/const-a.asm
> --- a/source/common/x86/const-a.asm	Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/const-a.asm	Tue May 19 17:47:36 2015 +0530
> @@ -92,7 +92,7 @@
>  const pw_0_15,              times  2 dw   0,   1,   2,   3,   4,   5,   6,   7
>  const pw_ppppmmmm,          times  1 dw   1,   1,   1,   1,  -1,  -1,  -1,  -1
>  const pw_ppmmppmm,          times  1 dw   1,   1,  -1,  -1,   1,   1,  -1,  -1
> -const pw_pmpmpmpm,          times  1 dw   1,  -1,   1,  -1,   1,  -1,   1,  -1
> +const pw_pmpmpmpm,          times 16 dw   1,  -1,   1,  -1,   1,  -1,   1,  -1
>  const pw_pmmpzzzz,          times  1 dw   1,  -1,  -1,   1,   0,   0,   0,   0
>  const multi_2Row,           times  1 dw   1,   2,   3,   4,   1,   2,   3,   4
>  const multiH,               times  1 dw   9,  10,  11,  12,  13,  14,  15,  16
> @@ -102,7 +102,9 @@
>  const pw_planar16_mul,      times  1 dw  15,  14,  13,  12,  11,  10,   9,   8,   7,   6,   5,   4,   3,   2,   1,   0
>  const pw_planar32_mul,      times  1 dw  31,  30,  29,  28,  27,  26,  25,  24,  23,  22,  21,  20,  19,  18,  17,  16
>  const pw_FFFFFFFFFFFFFFF0,           dw 0x00
> -                            times 7  dw 0xff
> +                            times  7 dw 0xff
> +const hmul_16p,             times 16 db   1
> +                            times  8 db   1,  -1
>  
>  
>  ;; 32-bit constants
> @@ -125,7 +127,6 @@
>  const pd_n32768,            times  4 dd 0xffff8000
>  
>  const trans8_shuf,          times  1 dd   0,   4,   1,   5,   2,   6,   3,   7
> -const deinterleave_shufd,   times  1 dd   0,   4,   1,   5,   2,   6,   3,   7
>  
>  const popcnt_table
>  %assign x 0
> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/mc-a.asm
> --- a/source/common/x86/mc-a.asm	Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/mc-a.asm	Tue May 19 17:47:36 2015 +0530
> @@ -58,7 +58,6 @@
>  cextern pw_pixel_max
>  cextern sw_64
>  cextern pd_32
> -cextern deinterleave_shufd
>  
>  ;====================================================================================================================
>  ;void addAvg (int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm	Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/pixel-a.asm	Tue May 19 17:47:36 2015 +0530
> @@ -32,8 +32,6 @@
>  %include "x86util.asm"
>  
>  SECTION_RODATA 32
> -hmul_16p:  times 16 db 1
> -           times 8 db 1, -1
>  hmul_8p:   times 8 db 1
>             times 4 db 1, -1
>             times 8 db 1
> @@ -45,8 +43,7 @@
>             times 2 dw 1, -1
>             times 4 dw 1
>             times 2 dw 1, -1
> -ALIGN 32
> -hmul_w:    times 2 dw 1, -1, 1, -1, 1, -1, 1, -1
> +
>  ALIGN 32
>  transd_shuf1: SHUFFLE_MASK_W 0, 8, 2, 10, 4, 12, 6, 14
>  transd_shuf2: SHUFFLE_MASK_W 1, 9, 3, 11, 5, 13, 7, 15
> @@ -54,8 +51,6 @@
>  sw_f0:     dq 0xfff0, 0
>  pd_f0:     times 4 dd 0xffff0000
>  
> -pw_76543210: dw 0, 1, 2, 3, 4, 5, 6, 7
> -
>  SECTION .text
>  
>  cextern pb_0
> @@ -72,6 +67,7 @@
>  cextern pd_1
>  cextern popcnt_table
>  cextern pd_2
> +cextern hmul_16p
>  
>  ;=============================================================================
>  ; SATD
> @@ -9070,7 +9066,7 @@
>  INIT_XMM sse4
>  cglobal psyCost_ss_8x8, 4, 6, 15
>  
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      mova            m14, [pw_1]
>      add             r1, r1
>      add             r3, r3
> @@ -10220,7 +10216,7 @@
>  INIT_XMM sse4
>  cglobal psyCost_ss_16x16, 4, 9, 16
>  
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      mova            m14, [pw_1]
>      add             r1, r1
>      add             r3, r3
> @@ -10248,7 +10244,7 @@
>  INIT_XMM sse4
>  cglobal psyCost_ss_32x32, 4, 9, 16
>  
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      mova            m14, [pw_1]
>      add             r1, r1
>      add             r3, r3
> @@ -10276,7 +10272,7 @@
>  INIT_XMM sse4
>  cglobal psyCost_ss_64x64, 4, 9, 16
>  
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      mova            m14, [pw_1]
>      add             r1, r1
>      add             r3, r3
> @@ -10717,7 +10713,7 @@
>      and             rsp, ~63
>  
>      mova            m12, [pw_1]
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      add             r1, r1
>      add             r3, r3
>  
> @@ -10737,7 +10733,7 @@
>      and             rsp, ~63
>  
>      mova            m12, [pw_1]
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      add             r1, r1
>      add             r3, r3
>      pxor            m14, m14
> @@ -10771,7 +10767,7 @@
>      and             rsp, ~63
>  
>      mova            m12, [pw_1]
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      add             r1, r1
>      add             r3, r3
>      pxor            m14, m14
> @@ -10805,7 +10801,7 @@
>      and             rsp, ~63
>  
>      mova            m12, [pw_1]
> -    mova            m13, [hmul_w]
> +    mova            m13, [pw_pmpmpmpm]
>      add             r1, r1
>      add             r3, r3
>      pxor            m14, m14
> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm	Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/pixel-util8.asm	Tue May 19 17:47:36 2015 +0530
> @@ -45,13 +45,9 @@
>                          times 16 db 0
>  deinterleave_shuf:      times  2 db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
>  deinterleave_word_shuf: times  2 db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15
> -hmul_16p:               times 16 db 1
> -                        times  8 db 1, -1
>  hmulw_16p:              times  8 dw 1
>                          times  4 dw 1, -1
>  
> -trans8_shuf:            dd 0, 4, 1, 5, 2, 6, 3, 7
> -
>  SECTION .text
>  
>  cextern pw_1
> @@ -72,6 +68,8 @@
>  cextern pb_16
>  cextern pb_32
>  cextern pb_64
> +cextern hmul_16p
> +cextern trans8_shuf
>  
>  ;-----------------------------------------------------------------------------
>  ; void getResidual(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list