[x265] [PATCH] asm: removed duplicate constants

Dnyaneshwar Gorade dnyaneshwar at multicorewareinc.com
Wed May 20 09:24:56 CEST 2015


sent new patch with modified commit message. thanks.

On Wed, May 20, 2015 at 12:13 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:

> Thanks, there are already 2 patches with similar commit messages. Can you
> add more details to the commit message?
>
> On Wed, May 20, 2015 at 12:04 PM, <dnyaneshwar at multicorewareinc.com>
> wrote:
>
>> # HG changeset patch
>> # User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
>> # Date 1432102991 -19800
>> #      Wed May 20 11:53:11 2015 +0530
>> # Node ID 5244b9a0d9a20262c99801a42e346e0b3e07b315
>> # Parent  cdf14fea15a846f2deca436a8e057711607f41bf
>> asm: removed duplicate constants
>>
>> diff -r cdf14fea15a8 -r 5244b9a0d9a2 source/common/x86/intrapred8.asm
>> --- a/source/common/x86/intrapred8.asm  Wed May 20 11:02:10 2015 +0530
>> +++ b/source/common/x86/intrapred8.asm  Wed May 20 11:53:11 2015 +0530
>> @@ -664,14 +664,6 @@
>>  ALIGN 32
>>  ;; (blkSize - 1 - x)
>>  pw_planar4_0:         dw 3,  2,  1,  0,  3,  2,  1,  0
>> -pw_planar4_1:         dw 3,  3,  3,  3,  3,  3,  3,  3
>> -pw_planar8_0:         dw 7,  6,  5,  4,  3,  2,  1,  0
>> -pw_planar8_1:         dw 7,  7,  7,  7,  7,  7,  7,  7
>> -pw_planar16_0:        dw 15, 14, 13, 12, 11, 10, 9,  8
>> -pw_planar16_1:        dw 15, 15, 15, 15, 15, 15, 15, 15
>> -pw_planar32_1:        dw 31, 31, 31, 31, 31, 31, 31, 31
>> -pw_planar32_L:        dw 31, 30, 29, 28, 27, 26, 25, 24
>> -pw_planar32_H:        dw 23, 22, 21, 20, 19, 18, 17, 16
>>
>>  ALIGN 32
>>  c_ang8_mode_13:       db 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9, 23, 9,
>> 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
>> @@ -714,7 +706,9 @@
>>  SECTION .text
>>
>>  cextern pw_2
>> +cextern pw_3
>>  cextern pw_4
>> +cextern pw_7
>>  cextern pw_8
>>  cextern pw_16
>>  cextern pw_15
>> @@ -1151,7 +1145,7 @@
>>      pshufd          m4, m4, 0xAA
>>
>>      pmullw          m3, [multi_2Row]        ; (x + 1) * topRight
>> -    pmullw          m0, m1, [pw_planar4_1]  ; (blkSize - 1 - y) *
>> above[x]
>> +    pmullw          m0, m1, [pw_3]          ; (blkSize - 1 - y) *
>> above[x]
>>      paddw           m3, [pw_4]
>>      paddw           m3, m4
>>      paddw           m3, m0
>> @@ -1212,7 +1206,7 @@
>>      pshufd          m4, m4, 0x44
>>
>>      pmullw          m3, [multiL]            ; (x + 1) * topRight
>> -    pmullw          m0, m1, [pw_planar8_1]  ; (blkSize - 1 - y) *
>> above[x]
>> +    pmullw          m0, m1, [pw_7]          ; (blkSize - 1 - y) *
>> above[x]
>>      paddw           m3, [pw_8]
>>      paddw           m3, m4
>>      paddw           m3, m0
>> @@ -1226,7 +1220,7 @@
>>      pshufhw         m5, m2, 0x55 * (%1 - 4)
>>      pshufd          m5, m5, 0xAA
>>  %endif
>> -    pmullw          m5, [pw_planar8_0]
>> +    pmullw          m5, [pw_planar16_mul + mmsize]
>>      paddw           m5, m3
>>      psraw           m5, 4
>>      packuswb        m5, m5
>> @@ -1269,8 +1263,8 @@
>>
>>      pmullw          m4, m3, [multiH]            ; (x + 1) * topRight
>>      pmullw          m3, [multiL]                ; (x + 1) * topRight
>> -    pmullw          m1, m2, [pw_planar16_1]     ; (blkSize - 1 - y) *
>> above[x]
>> -    pmullw          m5, m7, [pw_planar16_1]     ; (blkSize - 1 - y) *
>> above[x]
>> +    pmullw          m1, m2, [pw_15]             ; (blkSize - 1 - y) *
>> above[x]
>> +    pmullw          m5, m7, [pw_15]             ; (blkSize - 1 - y) *
>> above[x]
>>      paddw           m4, [pw_16]
>>      paddw           m3, [pw_16]
>>      paddw           m4, m6
>> @@ -1308,8 +1302,8 @@
>>      paddw           m4, m1
>>      lea             r0, [r0 + r1]
>>  %endif
>> -    pmullw          m0, m5, [pw_planar8_0]
>> -    pmullw          m5, [pw_planar16_0]
>> +    pmullw          m0, m5, [pw_planar16_mul + mmsize]
>> +    pmullw          m5, [pw_planar16_mul]
>>      paddw           m0, m4
>>      paddw           m5, m3
>>      psraw           m5, 5
>> @@ -1369,7 +1363,7 @@
>>      mova            m9, m11
>>      mova            m10, m11
>>
>> -    mova            m12, [pw_planar32_1]
>> +    mova            m12, [pw_31]
>>      movh            m4, [r2 + 1]
>>      punpcklbw       m4, m7
>>      psubw           m8, m4
>> @@ -1394,10 +1388,10 @@
>>      pmullw          m4, m12
>>      paddw           m3, m4
>>
>> -    mova            m12, [pw_planar32_L]
>> -    mova            m13, [pw_planar32_H]
>> -    mova            m14, [pw_planar16_0]
>> -    mova            m15, [pw_planar8_0]
>> +    mova            m12, [pw_planar32_mul]
>> +    mova            m13, [pw_planar32_mul + mmsize]
>> +    mova            m14, [pw_planar16_mul]
>> +    mova            m15, [pw_planar16_mul + mmsize]
>>  %macro PROCESS 1
>>      pmullw          m5, %1, m12
>>      pmullw          m6, %1, m13
>> @@ -1480,33 +1474,33 @@
>>      punpcklbw       m4, m7
>>      psubw           m5, m6, m4
>>      mova            [rsp + 0 * mmsize], m5
>> -    pmullw          m4, [pw_planar32_1]
>> +    pmullw          m4, [pw_31]
>>      paddw           m0, m4
>>
>>      movh            m4, [r2 + 9]
>>      punpcklbw       m4, m7
>>      psubw           m5, m6, m4
>>      mova            [rsp + 1 * mmsize], m5
>> -    pmullw          m4, [pw_planar32_1]
>> +    pmullw          m4, [pw_31]
>>      paddw           m1, m4
>>
>>      movh            m4, [r2 + 17]
>>      punpcklbw       m4, m7
>>      psubw           m5, m6, m4
>>      mova            [rsp + 2 * mmsize], m5
>> -    pmullw          m4, [pw_planar32_1]
>> +    pmullw          m4, [pw_31]
>>      paddw           m2, m4
>>
>>      movh            m4, [r2 + 25]
>>      punpcklbw       m4, m7
>>      psubw           m5, m6, m4
>>      mova            [rsp + 3 * mmsize], m5
>> -    pmullw          m4, [pw_planar32_1]
>> +    pmullw          m4, [pw_31]
>>      paddw           m3, m4
>>
>>  %macro PROCESS 1
>> -    pmullw          m5, %1, [pw_planar32_L]
>> -    pmullw          m6, %1, [pw_planar32_H]
>> +    pmullw          m5, %1, [pw_planar32_mul]
>> +    pmullw          m6, %1, [pw_planar32_mul + mmsize]
>>      paddw           m5, m0
>>      paddw           m6, m1
>>      psraw           m5, 6
>> @@ -1514,8 +1508,8 @@
>>      packuswb        m5, m6
>>      movu            [r0], m5
>>
>> -    pmullw          m5, %1, [pw_planar16_0]
>> -    pmullw          %1, [pw_planar8_0]
>> +    pmullw          m5, %1, [pw_planar16_mul]
>> +    pmullw          %1, [pw_planar16_mul + mmsize]
>>      paddw           m5, m2
>>      paddw           %1, m3
>>      psraw           m5, 6
>> @@ -2476,7 +2470,7 @@
>>      pshufd          m4, m4, 0xAA
>>
>>      pmullw          m3, [multi_2Row]        ; (x + 1) * topRight
>> -    pmullw          m0, m1, [pw_planar4_1]  ; (blkSize - 1 - y) *
>> above[x]
>> +    pmullw          m0, m1, [pw_3]          ; (blkSize - 1 - y) *
>> above[x]
>>      mova            m6, [pw_planar4_0]
>>      paddw           m3, [pw_4]
>>      paddw           m3, m4
>> @@ -2535,8 +2529,8 @@
>>      punpcklbw       m4, m0                  ; v_bottomLeft
>>
>>      pmullw          m3, [multiL]            ; (x + 1) * topRight
>> -    pmullw          m0, m1, [pw_planar8_1]  ; (blkSize - 1 - y) *
>> above[x]
>> -    mova            m6, [pw_planar8_0]
>> +    pmullw          m0, m1, [pw_7]          ; (blkSize - 1 - y) *
>> above[x]
>> +    mova            m6, [pw_planar16_mul + mmsize]
>>      paddw           m3, [pw_8]
>>      paddw           m3, m4
>>      paddw           m3, m0
>> @@ -2588,8 +2582,8 @@
>>
>>      pmullw          m4, m3, [multiH]            ; (x + 1) * topRight
>>      pmullw          m3, [multiL]                ; (x + 1) * topRight
>> -    pmullw          m1, m2, [pw_planar16_1]     ; (blkSize - 1 - y) *
>> above[x]
>> -    pmullw          m5, m7, [pw_planar16_1]     ; (blkSize - 1 - y) *
>> above[x]
>> +    pmullw          m1, m2, [pw_15]             ; (blkSize - 1 - y) *
>> above[x]
>> +    pmullw          m5, m7, [pw_15]             ; (blkSize - 1 - y) *
>> above[x]
>>      paddw           m4, [pw_16]
>>      paddw           m3, [pw_16]
>>      paddw           m4, m6
>> @@ -2620,8 +2614,8 @@
>>  %endif
>>  %endif
>>  %endif
>> -    pmullw          m0, m5, [pw_planar8_0]
>> -    pmullw          m5, [pw_planar16_0]
>> +    pmullw          m0, m5, [pw_planar16_mul + mmsize]
>> +    pmullw          m5, [pw_planar16_mul]
>>      paddw           m0, m4
>>      paddw           m5, m3
>>      paddw           m3, m6
>> @@ -2740,25 +2734,25 @@
>>      paddw           m3, [pw_32]
>>
>>      pmovzxbw        m4, [r2 + 1]
>> -    pmullw          m5, m4, [pw_planar32_1]
>> +    pmullw          m5, m4, [pw_31]
>>      paddw           m0, m5
>>      psubw           m5, m6, m4
>>      mova            m8, m5
>>
>>      pmovzxbw        m4, [r2 + 9]
>> -    pmullw          m5, m4, [pw_planar32_1]
>> +    pmullw          m5, m4, [pw_31]
>>      paddw           m1, m5
>>      psubw           m5, m6, m4
>>      mova            m9, m5
>>
>>      pmovzxbw        m4, [r2 + 17]
>> -    pmullw          m5, m4, [pw_planar32_1]
>> +    pmullw          m5, m4, [pw_31]
>>      paddw           m2, m5
>>      psubw           m5, m6, m4
>>      mova            m10, m5
>>
>>      pmovzxbw        m4, [r2 + 25]
>> -    pmullw          m5, m4, [pw_planar32_1]
>> +    pmullw          m5, m4, [pw_31]
>>      paddw           m3, m5
>>      psubw           m5, m6, m4
>>      mova            m11, m5
>> @@ -2769,8 +2763,8 @@
>>      pshufb          m4, m7
>>      punpcklbw       m4, m7
>>
>> -    pmullw          m5, m4, [pw_planar32_L]
>> -    pmullw          m6, m4, [pw_planar32_H]
>> +    pmullw          m5, m4, [pw_planar32_mul]
>> +    pmullw          m6, m4, [pw_planar32_mul + mmsize]
>>      paddw           m5, m0
>>      paddw           m6, m1
>>      paddw           m0, m8
>> @@ -2780,8 +2774,8 @@
>>      packuswb        m5, m6
>>      movu            [r0], m5
>>
>> -    pmullw          m5, m4, [pw_planar16_0]
>> -    pmullw          m4, [pw_planar8_0]
>> +    pmullw          m5, m4, [pw_planar16_mul]
>> +    pmullw          m4, [pw_planar16_mul + mmsize]
>>      paddw           m5, m2
>>      paddw           m4, m3
>>      paddw           m2, m10
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150520/cd53c21d/attachment-0001.html>


More information about the x265-devel mailing list