[x265] [PATCH] asm: removed duplicate and redundant constants
Steve Borho
steve at borho.org
Tue May 19 17:18:34 CEST 2015
On 05/19, dnyaneshwar at multicorewareinc.com wrote:
> # HG changeset patch
> # User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
> # Date 1432037856 -19800
> # Tue May 19 17:47:36 2015 +0530
> # Node ID e6fc4b6f16b32debf4a252b47ad6fc9c82364188
> # Parent b44cdf8dc08c77e84b8707992cd0006bbf23d864
> asm: removed duplicate and redundant constants
this one did apply, queued
> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/const-a.asm
> --- a/source/common/x86/const-a.asm Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/const-a.asm Tue May 19 17:47:36 2015 +0530
> @@ -92,7 +92,7 @@
> const pw_0_15, times 2 dw 0, 1, 2, 3, 4, 5, 6, 7
> const pw_ppppmmmm, times 1 dw 1, 1, 1, 1, -1, -1, -1, -1
> const pw_ppmmppmm, times 1 dw 1, 1, -1, -1, 1, 1, -1, -1
> -const pw_pmpmpmpm, times 1 dw 1, -1, 1, -1, 1, -1, 1, -1
> +const pw_pmpmpmpm, times 16 dw 1, -1, 1, -1, 1, -1, 1, -1
> const pw_pmmpzzzz, times 1 dw 1, -1, -1, 1, 0, 0, 0, 0
> const multi_2Row, times 1 dw 1, 2, 3, 4, 1, 2, 3, 4
> const multiH, times 1 dw 9, 10, 11, 12, 13, 14, 15, 16
> @@ -102,7 +102,9 @@
> const pw_planar16_mul, times 1 dw 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
> const pw_planar32_mul, times 1 dw 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16
> const pw_FFFFFFFFFFFFFFF0, dw 0x00
> - times 7 dw 0xff
> + times 7 dw 0xff
> +const hmul_16p, times 16 db 1
> + times 8 db 1, -1
>
>
> ;; 32-bit constants
> @@ -125,7 +127,6 @@
> const pd_n32768, times 4 dd 0xffff8000
>
> const trans8_shuf, times 1 dd 0, 4, 1, 5, 2, 6, 3, 7
> -const deinterleave_shufd, times 1 dd 0, 4, 1, 5, 2, 6, 3, 7
>
> const popcnt_table
> %assign x 0
> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/mc-a.asm
> --- a/source/common/x86/mc-a.asm Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/mc-a.asm Tue May 19 17:47:36 2015 +0530
> @@ -58,7 +58,6 @@
> cextern pw_pixel_max
> cextern sw_64
> cextern pd_32
> -cextern deinterleave_shufd
>
> ;====================================================================================================================
> ;void addAvg (int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/pixel-a.asm Tue May 19 17:47:36 2015 +0530
> @@ -32,8 +32,6 @@
> %include "x86util.asm"
>
> SECTION_RODATA 32
> -hmul_16p: times 16 db 1
> - times 8 db 1, -1
> hmul_8p: times 8 db 1
> times 4 db 1, -1
> times 8 db 1
> @@ -45,8 +43,7 @@
> times 2 dw 1, -1
> times 4 dw 1
> times 2 dw 1, -1
> -ALIGN 32
> -hmul_w: times 2 dw 1, -1, 1, -1, 1, -1, 1, -1
> +
> ALIGN 32
> transd_shuf1: SHUFFLE_MASK_W 0, 8, 2, 10, 4, 12, 6, 14
> transd_shuf2: SHUFFLE_MASK_W 1, 9, 3, 11, 5, 13, 7, 15
> @@ -54,8 +51,6 @@
> sw_f0: dq 0xfff0, 0
> pd_f0: times 4 dd 0xffff0000
>
> -pw_76543210: dw 0, 1, 2, 3, 4, 5, 6, 7
> -
> SECTION .text
>
> cextern pb_0
> @@ -72,6 +67,7 @@
> cextern pd_1
> cextern popcnt_table
> cextern pd_2
> +cextern hmul_16p
>
> ;=============================================================================
> ; SATD
> @@ -9070,7 +9066,7 @@
> INIT_XMM sse4
> cglobal psyCost_ss_8x8, 4, 6, 15
>
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> mova m14, [pw_1]
> add r1, r1
> add r3, r3
> @@ -10220,7 +10216,7 @@
> INIT_XMM sse4
> cglobal psyCost_ss_16x16, 4, 9, 16
>
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> mova m14, [pw_1]
> add r1, r1
> add r3, r3
> @@ -10248,7 +10244,7 @@
> INIT_XMM sse4
> cglobal psyCost_ss_32x32, 4, 9, 16
>
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> mova m14, [pw_1]
> add r1, r1
> add r3, r3
> @@ -10276,7 +10272,7 @@
> INIT_XMM sse4
> cglobal psyCost_ss_64x64, 4, 9, 16
>
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> mova m14, [pw_1]
> add r1, r1
> add r3, r3
> @@ -10717,7 +10713,7 @@
> and rsp, ~63
>
> mova m12, [pw_1]
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> add r1, r1
> add r3, r3
>
> @@ -10737,7 +10733,7 @@
> and rsp, ~63
>
> mova m12, [pw_1]
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> add r1, r1
> add r3, r3
> pxor m14, m14
> @@ -10771,7 +10767,7 @@
> and rsp, ~63
>
> mova m12, [pw_1]
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> add r1, r1
> add r3, r3
> pxor m14, m14
> @@ -10805,7 +10801,7 @@
> and rsp, ~63
>
> mova m12, [pw_1]
> - mova m13, [hmul_w]
> + mova m13, [pw_pmpmpmpm]
> add r1, r1
> add r3, r3
> pxor m14, m14
> diff -r b44cdf8dc08c -r e6fc4b6f16b3 source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm Tue May 19 15:18:08 2015 +0530
> +++ b/source/common/x86/pixel-util8.asm Tue May 19 17:47:36 2015 +0530
> @@ -45,13 +45,9 @@
> times 16 db 0
> deinterleave_shuf: times 2 db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
> deinterleave_word_shuf: times 2 db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15
> -hmul_16p: times 16 db 1
> - times 8 db 1, -1
> hmulw_16p: times 8 dw 1
> times 4 dw 1, -1
>
> -trans8_shuf: dd 0, 4, 1, 5, 2, 6, 3, 7
> -
> SECTION .text
>
> cextern pw_1
> @@ -72,6 +68,8 @@
> cextern pb_16
> cextern pb_32
> cextern pb_64
> +cextern hmul_16p
> +cextern trans8_shuf
>
> ;-----------------------------------------------------------------------------
> ; void getResidual(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list