[x265] [PATCH] asm: removed duplicate constants
Dnyaneshwar Gorade
dnyaneshwar at multicorewareinc.com
Wed May 20 09:24:56 CEST 2015
sent new patch with modified commit message. thanks.
On Wed, May 20, 2015 at 12:13 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:
> Thanks, there are already 2 patches with similar commit messages. Can you
> add more details to the commit message?
>
> On Wed, May 20, 2015 at 12:04 PM, <dnyaneshwar at multicorewareinc.com>
> wrote:
>
>> # HG changeset patch
>> # User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
>> # Date 1432102991 -19800
>> # Wed May 20 11:53:11 2015 +0530
>> # Node ID 5244b9a0d9a20262c99801a42e346e0b3e07b315
>> # Parent cdf14fea15a846f2deca436a8e057711607f41bf
>> asm: removed duplicate constants
>>
>> diff -r cdf14fea15a8 -r 5244b9a0d9a2 source/common/x86/intrapred8.asm
>> --- a/source/common/x86/intrapred8.asm Wed May 20 11:02:10 2015 +0530
>> +++ b/source/common/x86/intrapred8.asm Wed May 20 11:53:11 2015 +0530
>> @@ -664,14 +664,6 @@
>> ALIGN 32
>> ;; (blkSize - 1 - x)
>> pw_planar4_0: dw 3, 2, 1, 0, 3, 2, 1, 0
>> -pw_planar4_1: dw 3, 3, 3, 3, 3, 3, 3, 3
>> -pw_planar8_0: dw 7, 6, 5, 4, 3, 2, 1, 0
>> -pw_planar8_1: dw 7, 7, 7, 7, 7, 7, 7, 7
>> -pw_planar16_0: dw 15, 14, 13, 12, 11, 10, 9, 8
>> -pw_planar16_1: dw 15, 15, 15, 15, 15, 15, 15, 15
>> -pw_planar32_1: dw 31, 31, 31, 31, 31, 31, 31, 31
>> -pw_planar32_L: dw 31, 30, 29, 28, 27, 26, 25, 24
>> -pw_planar32_H: dw 23, 22, 21, 20, 19, 18, 17, 16
>>
>> ALIGN 32
>> c_ang8_mode_13: db 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9,
>> 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
>> @@ -714,7 +706,9 @@
>> SECTION .text
>>
>> cextern pw_2
>> +cextern pw_3
>> cextern pw_4
>> +cextern pw_7
>> cextern pw_8
>> cextern pw_16
>> cextern pw_15
>> @@ -1151,7 +1145,7 @@
>> pshufd m4, m4, 0xAA
>>
>> pmullw m3, [multi_2Row] ; (x + 1) * topRight
>> - pmullw m0, m1, [pw_planar4_1] ; (blkSize - 1 - y) *
>> above[x]
>> + pmullw m0, m1, [pw_3] ; (blkSize - 1 - y) *
>> above[x]
>> paddw m3, [pw_4]
>> paddw m3, m4
>> paddw m3, m0
>> @@ -1212,7 +1206,7 @@
>> pshufd m4, m4, 0x44
>>
>> pmullw m3, [multiL] ; (x + 1) * topRight
>> - pmullw m0, m1, [pw_planar8_1] ; (blkSize - 1 - y) *
>> above[x]
>> + pmullw m0, m1, [pw_7] ; (blkSize - 1 - y) *
>> above[x]
>> paddw m3, [pw_8]
>> paddw m3, m4
>> paddw m3, m0
>> @@ -1226,7 +1220,7 @@
>> pshufhw m5, m2, 0x55 * (%1 - 4)
>> pshufd m5, m5, 0xAA
>> %endif
>> - pmullw m5, [pw_planar8_0]
>> + pmullw m5, [pw_planar16_mul + mmsize]
>> paddw m5, m3
>> psraw m5, 4
>> packuswb m5, m5
>> @@ -1269,8 +1263,8 @@
>>
>> pmullw m4, m3, [multiH] ; (x + 1) * topRight
>> pmullw m3, [multiL] ; (x + 1) * topRight
>> - pmullw m1, m2, [pw_planar16_1] ; (blkSize - 1 - y) *
>> above[x]
>> - pmullw m5, m7, [pw_planar16_1] ; (blkSize - 1 - y) *
>> above[x]
>> + pmullw m1, m2, [pw_15] ; (blkSize - 1 - y) *
>> above[x]
>> + pmullw m5, m7, [pw_15] ; (blkSize - 1 - y) *
>> above[x]
>> paddw m4, [pw_16]
>> paddw m3, [pw_16]
>> paddw m4, m6
>> @@ -1308,8 +1302,8 @@
>> paddw m4, m1
>> lea r0, [r0 + r1]
>> %endif
>> - pmullw m0, m5, [pw_planar8_0]
>> - pmullw m5, [pw_planar16_0]
>> + pmullw m0, m5, [pw_planar16_mul + mmsize]
>> + pmullw m5, [pw_planar16_mul]
>> paddw m0, m4
>> paddw m5, m3
>> psraw m5, 5
>> @@ -1369,7 +1363,7 @@
>> mova m9, m11
>> mova m10, m11
>>
>> - mova m12, [pw_planar32_1]
>> + mova m12, [pw_31]
>> movh m4, [r2 + 1]
>> punpcklbw m4, m7
>> psubw m8, m4
>> @@ -1394,10 +1388,10 @@
>> pmullw m4, m12
>> paddw m3, m4
>>
>> - mova m12, [pw_planar32_L]
>> - mova m13, [pw_planar32_H]
>> - mova m14, [pw_planar16_0]
>> - mova m15, [pw_planar8_0]
>> + mova m12, [pw_planar32_mul]
>> + mova m13, [pw_planar32_mul + mmsize]
>> + mova m14, [pw_planar16_mul]
>> + mova m15, [pw_planar16_mul + mmsize]
>> %macro PROCESS 1
>> pmullw m5, %1, m12
>> pmullw m6, %1, m13
>> @@ -1480,33 +1474,33 @@
>> punpcklbw m4, m7
>> psubw m5, m6, m4
>> mova [rsp + 0 * mmsize], m5
>> - pmullw m4, [pw_planar32_1]
>> + pmullw m4, [pw_31]
>> paddw m0, m4
>>
>> movh m4, [r2 + 9]
>> punpcklbw m4, m7
>> psubw m5, m6, m4
>> mova [rsp + 1 * mmsize], m5
>> - pmullw m4, [pw_planar32_1]
>> + pmullw m4, [pw_31]
>> paddw m1, m4
>>
>> movh m4, [r2 + 17]
>> punpcklbw m4, m7
>> psubw m5, m6, m4
>> mova [rsp + 2 * mmsize], m5
>> - pmullw m4, [pw_planar32_1]
>> + pmullw m4, [pw_31]
>> paddw m2, m4
>>
>> movh m4, [r2 + 25]
>> punpcklbw m4, m7
>> psubw m5, m6, m4
>> mova [rsp + 3 * mmsize], m5
>> - pmullw m4, [pw_planar32_1]
>> + pmullw m4, [pw_31]
>> paddw m3, m4
>>
>> %macro PROCESS 1
>> - pmullw m5, %1, [pw_planar32_L]
>> - pmullw m6, %1, [pw_planar32_H]
>> + pmullw m5, %1, [pw_planar32_mul]
>> + pmullw m6, %1, [pw_planar32_mul + mmsize]
>> paddw m5, m0
>> paddw m6, m1
>> psraw m5, 6
>> @@ -1514,8 +1508,8 @@
>> packuswb m5, m6
>> movu [r0], m5
>>
>> - pmullw m5, %1, [pw_planar16_0]
>> - pmullw %1, [pw_planar8_0]
>> + pmullw m5, %1, [pw_planar16_mul]
>> + pmullw %1, [pw_planar16_mul + mmsize]
>> paddw m5, m2
>> paddw %1, m3
>> psraw m5, 6
>> @@ -2476,7 +2470,7 @@
>> pshufd m4, m4, 0xAA
>>
>> pmullw m3, [multi_2Row] ; (x + 1) * topRight
>> - pmullw m0, m1, [pw_planar4_1] ; (blkSize - 1 - y) *
>> above[x]
>> + pmullw m0, m1, [pw_3] ; (blkSize - 1 - y) *
>> above[x]
>> mova m6, [pw_planar4_0]
>> paddw m3, [pw_4]
>> paddw m3, m4
>> @@ -2535,8 +2529,8 @@
>> punpcklbw m4, m0 ; v_bottomLeft
>>
>> pmullw m3, [multiL] ; (x + 1) * topRight
>> - pmullw m0, m1, [pw_planar8_1] ; (blkSize - 1 - y) *
>> above[x]
>> - mova m6, [pw_planar8_0]
>> + pmullw m0, m1, [pw_7] ; (blkSize - 1 - y) *
>> above[x]
>> + mova m6, [pw_planar16_mul + mmsize]
>> paddw m3, [pw_8]
>> paddw m3, m4
>> paddw m3, m0
>> @@ -2588,8 +2582,8 @@
>>
>> pmullw m4, m3, [multiH] ; (x + 1) * topRight
>> pmullw m3, [multiL] ; (x + 1) * topRight
>> - pmullw m1, m2, [pw_planar16_1] ; (blkSize - 1 - y) *
>> above[x]
>> - pmullw m5, m7, [pw_planar16_1] ; (blkSize - 1 - y) *
>> above[x]
>> + pmullw m1, m2, [pw_15] ; (blkSize - 1 - y) *
>> above[x]
>> + pmullw m5, m7, [pw_15] ; (blkSize - 1 - y) *
>> above[x]
>> paddw m4, [pw_16]
>> paddw m3, [pw_16]
>> paddw m4, m6
>> @@ -2620,8 +2614,8 @@
>> %endif
>> %endif
>> %endif
>> - pmullw m0, m5, [pw_planar8_0]
>> - pmullw m5, [pw_planar16_0]
>> + pmullw m0, m5, [pw_planar16_mul + mmsize]
>> + pmullw m5, [pw_planar16_mul]
>> paddw m0, m4
>> paddw m5, m3
>> paddw m3, m6
>> @@ -2740,25 +2734,25 @@
>> paddw m3, [pw_32]
>>
>> pmovzxbw m4, [r2 + 1]
>> - pmullw m5, m4, [pw_planar32_1]
>> + pmullw m5, m4, [pw_31]
>> paddw m0, m5
>> psubw m5, m6, m4
>> mova m8, m5
>>
>> pmovzxbw m4, [r2 + 9]
>> - pmullw m5, m4, [pw_planar32_1]
>> + pmullw m5, m4, [pw_31]
>> paddw m1, m5
>> psubw m5, m6, m4
>> mova m9, m5
>>
>> pmovzxbw m4, [r2 + 17]
>> - pmullw m5, m4, [pw_planar32_1]
>> + pmullw m5, m4, [pw_31]
>> paddw m2, m5
>> psubw m5, m6, m4
>> mova m10, m5
>>
>> pmovzxbw m4, [r2 + 25]
>> - pmullw m5, m4, [pw_planar32_1]
>> + pmullw m5, m4, [pw_31]
>> paddw m3, m5
>> psubw m5, m6, m4
>> mova m11, m5
>> @@ -2769,8 +2763,8 @@
>> pshufb m4, m7
>> punpcklbw m4, m7
>>
>> - pmullw m5, m4, [pw_planar32_L]
>> - pmullw m6, m4, [pw_planar32_H]
>> + pmullw m5, m4, [pw_planar32_mul]
>> + pmullw m6, m4, [pw_planar32_mul + mmsize]
>> paddw m5, m0
>> paddw m6, m1
>> paddw m0, m8
>> @@ -2780,8 +2774,8 @@
>> packuswb m5, m6
>> movu [r0], m5
>>
>> - pmullw m5, m4, [pw_planar16_0]
>> - pmullw m4, [pw_planar8_0]
>> + pmullw m5, m4, [pw_planar16_mul]
>> + pmullw m4, [pw_planar16_mul + mmsize]
>> paddw m5, m2
>> paddw m4, m3
>> paddw m2, m10
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150520/cd53c21d/attachment-0001.html>
More information about the x265-devel
mailing list