[x265] [PATCH] asm: removed duplicate constants
Deepthi Nandakumar
deepthi at multicorewareinc.com
Wed May 20 08:43:14 CEST 2015
Thanks, there are already 2 patches with similar commit messages. Can you
add more details to the commit message?
On Wed, May 20, 2015 at 12:04 PM, <dnyaneshwar at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
> # Date 1432102991 -19800
> # Wed May 20 11:53:11 2015 +0530
> # Node ID 5244b9a0d9a20262c99801a42e346e0b3e07b315
> # Parent cdf14fea15a846f2deca436a8e057711607f41bf
> asm: removed duplicate constants
>
> diff -r cdf14fea15a8 -r 5244b9a0d9a2 source/common/x86/intrapred8.asm
> --- a/source/common/x86/intrapred8.asm Wed May 20 11:02:10 2015 +0530
> +++ b/source/common/x86/intrapred8.asm Wed May 20 11:53:11 2015 +0530
> @@ -664,14 +664,6 @@
> ALIGN 32
> ;; (blkSize - 1 - x)
> pw_planar4_0: dw 3, 2, 1, 0, 3, 2, 1, 0
> -pw_planar4_1: dw 3, 3, 3, 3, 3, 3, 3, 3
> -pw_planar8_0: dw 7, 6, 5, 4, 3, 2, 1, 0
> -pw_planar8_1: dw 7, 7, 7, 7, 7, 7, 7, 7
> -pw_planar16_0: dw 15, 14, 13, 12, 11, 10, 9, 8
> -pw_planar16_1: dw 15, 15, 15, 15, 15, 15, 15, 15
> -pw_planar32_1: dw 31, 31, 31, 31, 31, 31, 31, 31
> -pw_planar32_L: dw 31, 30, 29, 28, 27, 26, 25, 24
> -pw_planar32_H: dw 23, 22, 21, 20, 19, 18, 17, 16
>
> ALIGN 32
> c_ang8_mode_13: db 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23,
> 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
> @@ -714,7 +706,9 @@
> SECTION .text
>
> cextern pw_2
> +cextern pw_3
> cextern pw_4
> +cextern pw_7
> cextern pw_8
> cextern pw_16
> cextern pw_15
> @@ -1151,7 +1145,7 @@
> pshufd m4, m4, 0xAA
>
> pmullw m3, [multi_2Row] ; (x + 1) * topRight
> - pmullw m0, m1, [pw_planar4_1] ; (blkSize - 1 - y) * above[x]
> + pmullw m0, m1, [pw_3] ; (blkSize - 1 - y) * above[x]
> paddw m3, [pw_4]
> paddw m3, m4
> paddw m3, m0
> @@ -1212,7 +1206,7 @@
> pshufd m4, m4, 0x44
>
> pmullw m3, [multiL] ; (x + 1) * topRight
> - pmullw m0, m1, [pw_planar8_1] ; (blkSize - 1 - y) * above[x]
> + pmullw m0, m1, [pw_7] ; (blkSize - 1 - y) * above[x]
> paddw m3, [pw_8]
> paddw m3, m4
> paddw m3, m0
> @@ -1226,7 +1220,7 @@
> pshufhw m5, m2, 0x55 * (%1 - 4)
> pshufd m5, m5, 0xAA
> %endif
> - pmullw m5, [pw_planar8_0]
> + pmullw m5, [pw_planar16_mul + mmsize]
> paddw m5, m3
> psraw m5, 4
> packuswb m5, m5
> @@ -1269,8 +1263,8 @@
>
> pmullw m4, m3, [multiH] ; (x + 1) * topRight
> pmullw m3, [multiL] ; (x + 1) * topRight
> - pmullw m1, m2, [pw_planar16_1] ; (blkSize - 1 - y) *
> above[x]
> - pmullw m5, m7, [pw_planar16_1] ; (blkSize - 1 - y) *
> above[x]
> + pmullw m1, m2, [pw_15] ; (blkSize - 1 - y) *
> above[x]
> + pmullw m5, m7, [pw_15] ; (blkSize - 1 - y) *
> above[x]
> paddw m4, [pw_16]
> paddw m3, [pw_16]
> paddw m4, m6
> @@ -1308,8 +1302,8 @@
> paddw m4, m1
> lea r0, [r0 + r1]
> %endif
> - pmullw m0, m5, [pw_planar8_0]
> - pmullw m5, [pw_planar16_0]
> + pmullw m0, m5, [pw_planar16_mul + mmsize]
> + pmullw m5, [pw_planar16_mul]
> paddw m0, m4
> paddw m5, m3
> psraw m5, 5
> @@ -1369,7 +1363,7 @@
> mova m9, m11
> mova m10, m11
>
> - mova m12, [pw_planar32_1]
> + mova m12, [pw_31]
> movh m4, [r2 + 1]
> punpcklbw m4, m7
> psubw m8, m4
> @@ -1394,10 +1388,10 @@
> pmullw m4, m12
> paddw m3, m4
>
> - mova m12, [pw_planar32_L]
> - mova m13, [pw_planar32_H]
> - mova m14, [pw_planar16_0]
> - mova m15, [pw_planar8_0]
> + mova m12, [pw_planar32_mul]
> + mova m13, [pw_planar32_mul + mmsize]
> + mova m14, [pw_planar16_mul]
> + mova m15, [pw_planar16_mul + mmsize]
> %macro PROCESS 1
> pmullw m5, %1, m12
> pmullw m6, %1, m13
> @@ -1480,33 +1474,33 @@
> punpcklbw m4, m7
> psubw m5, m6, m4
> mova [rsp + 0 * mmsize], m5
> - pmullw m4, [pw_planar32_1]
> + pmullw m4, [pw_31]
> paddw m0, m4
>
> movh m4, [r2 + 9]
> punpcklbw m4, m7
> psubw m5, m6, m4
> mova [rsp + 1 * mmsize], m5
> - pmullw m4, [pw_planar32_1]
> + pmullw m4, [pw_31]
> paddw m1, m4
>
> movh m4, [r2 + 17]
> punpcklbw m4, m7
> psubw m5, m6, m4
> mova [rsp + 2 * mmsize], m5
> - pmullw m4, [pw_planar32_1]
> + pmullw m4, [pw_31]
> paddw m2, m4
>
> movh m4, [r2 + 25]
> punpcklbw m4, m7
> psubw m5, m6, m4
> mova [rsp + 3 * mmsize], m5
> - pmullw m4, [pw_planar32_1]
> + pmullw m4, [pw_31]
> paddw m3, m4
>
> %macro PROCESS 1
> - pmullw m5, %1, [pw_planar32_L]
> - pmullw m6, %1, [pw_planar32_H]
> + pmullw m5, %1, [pw_planar32_mul]
> + pmullw m6, %1, [pw_planar32_mul + mmsize]
> paddw m5, m0
> paddw m6, m1
> psraw m5, 6
> @@ -1514,8 +1508,8 @@
> packuswb m5, m6
> movu [r0], m5
>
> - pmullw m5, %1, [pw_planar16_0]
> - pmullw %1, [pw_planar8_0]
> + pmullw m5, %1, [pw_planar16_mul]
> + pmullw %1, [pw_planar16_mul + mmsize]
> paddw m5, m2
> paddw %1, m3
> psraw m5, 6
> @@ -2476,7 +2470,7 @@
> pshufd m4, m4, 0xAA
>
> pmullw m3, [multi_2Row] ; (x + 1) * topRight
> - pmullw m0, m1, [pw_planar4_1] ; (blkSize - 1 - y) * above[x]
> + pmullw m0, m1, [pw_3] ; (blkSize - 1 - y) * above[x]
> mova m6, [pw_planar4_0]
> paddw m3, [pw_4]
> paddw m3, m4
> @@ -2535,8 +2529,8 @@
> punpcklbw m4, m0 ; v_bottomLeft
>
> pmullw m3, [multiL] ; (x + 1) * topRight
> - pmullw m0, m1, [pw_planar8_1] ; (blkSize - 1 - y) * above[x]
> - mova m6, [pw_planar8_0]
> + pmullw m0, m1, [pw_7] ; (blkSize - 1 - y) * above[x]
> + mova m6, [pw_planar16_mul + mmsize]
> paddw m3, [pw_8]
> paddw m3, m4
> paddw m3, m0
> @@ -2588,8 +2582,8 @@
>
> pmullw m4, m3, [multiH] ; (x + 1) * topRight
> pmullw m3, [multiL] ; (x + 1) * topRight
> - pmullw m1, m2, [pw_planar16_1] ; (blkSize - 1 - y) *
> above[x]
> - pmullw m5, m7, [pw_planar16_1] ; (blkSize - 1 - y) *
> above[x]
> + pmullw m1, m2, [pw_15] ; (blkSize - 1 - y) *
> above[x]
> + pmullw m5, m7, [pw_15] ; (blkSize - 1 - y) *
> above[x]
> paddw m4, [pw_16]
> paddw m3, [pw_16]
> paddw m4, m6
> @@ -2620,8 +2614,8 @@
> %endif
> %endif
> %endif
> - pmullw m0, m5, [pw_planar8_0]
> - pmullw m5, [pw_planar16_0]
> + pmullw m0, m5, [pw_planar16_mul + mmsize]
> + pmullw m5, [pw_planar16_mul]
> paddw m0, m4
> paddw m5, m3
> paddw m3, m6
> @@ -2740,25 +2734,25 @@
> paddw m3, [pw_32]
>
> pmovzxbw m4, [r2 + 1]
> - pmullw m5, m4, [pw_planar32_1]
> + pmullw m5, m4, [pw_31]
> paddw m0, m5
> psubw m5, m6, m4
> mova m8, m5
>
> pmovzxbw m4, [r2 + 9]
> - pmullw m5, m4, [pw_planar32_1]
> + pmullw m5, m4, [pw_31]
> paddw m1, m5
> psubw m5, m6, m4
> mova m9, m5
>
> pmovzxbw m4, [r2 + 17]
> - pmullw m5, m4, [pw_planar32_1]
> + pmullw m5, m4, [pw_31]
> paddw m2, m5
> psubw m5, m6, m4
> mova m10, m5
>
> pmovzxbw m4, [r2 + 25]
> - pmullw m5, m4, [pw_planar32_1]
> + pmullw m5, m4, [pw_31]
> paddw m3, m5
> psubw m5, m6, m4
> mova m11, m5
> @@ -2769,8 +2763,8 @@
> pshufb m4, m7
> punpcklbw m4, m7
>
> - pmullw m5, m4, [pw_planar32_L]
> - pmullw m6, m4, [pw_planar32_H]
> + pmullw m5, m4, [pw_planar32_mul]
> + pmullw m6, m4, [pw_planar32_mul + mmsize]
> paddw m5, m0
> paddw m6, m1
> paddw m0, m8
> @@ -2780,8 +2774,8 @@
> packuswb m5, m6
> movu [r0], m5
>
> - pmullw m5, m4, [pw_planar16_0]
> - pmullw m4, [pw_planar8_0]
> + pmullw m5, m4, [pw_planar16_mul]
> + pmullw m4, [pw_planar16_mul + mmsize]
> paddw m5, m2
> paddw m4, m3
> paddw m2, m10
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150520/61a7f48f/attachment-0001.html>
More information about the x265-devel
mailing list