[x265] [PATCH] Fixed 32 bit bug in intrapred dc4 sse2

Deepthi Nandakumar deepthi at multicorewareinc.com
Thu Feb 26 05:31:41 CET 2015


Thanks, pushed.

On Thu, Feb 26, 2015 at 8:03 AM, <dtyx265 at gmail.com> wrote:

> # HG changeset patch
> # User David T Yuen <dtyx265 at gmail.com>
> # Date 1424917924 28800
> # Node ID 13346cb90bff040492f0688226f44182bb6b97d8
> # Parent  74c716607444c77b9d5ea1dce5b99c875f0b20fe
> Fixed 32 bit bug in intrapred dc4 sse2
>
> Changed register written from to one that supports low byte access in 32
> bit
> Also moved pw_257 constant to const-a.asm
>
> diff -r 74c716607444 -r 13346cb90bff source/common/x86/const-a.asm
> --- a/source/common/x86/const-a.asm     Tue Feb 24 13:39:16 2015 +0530
> +++ b/source/common/x86/const-a.asm     Wed Feb 25 18:32:04 2015 -0800
> @@ -37,6 +37,7 @@
>  const pw_32,       times 16 dw 32
>  const pw_128,      times 16 dw 128
>  const pw_256,      times 16 dw 256
> +const pw_257,      times 16 dw 257
>  const pw_512,      times 16 dw 512
>  const pw_1023,     times 8  dw 1023
>  const pw_1024,     times 16 dw 1024
> diff -r 74c716607444 -r 13346cb90bff source/common/x86/intrapred8.asm
> --- a/source/common/x86/intrapred8.asm  Tue Feb 24 13:39:16 2015 +0530
> +++ b/source/common/x86/intrapred8.asm  Wed Feb 25 18:32:04 2015 -0800
> @@ -65,8 +65,6 @@
>  pw_planar32_L:        dw 31, 30, 29, 28, 27, 26, 25, 24
>  pw_planar32_H:        dw 23, 22, 21, 20, 19, 18, 17, 16
>
> -pw_257:         times 8 dw 257
> -
>  const ang_table
>  %assign x 0
>  %rep 32
> @@ -80,6 +78,7 @@
>  cextern pw_8
>  cextern pw_16
>  cextern pw_32
> +cextern pw_257
>  cextern pw_1024
>  cextern pb_unpackbd1
>  cextern multiL
> @@ -144,12 +143,21 @@
>      paddw       m2, m1
>      psraw       m2, 2
>      packuswb    m2, m2
> +%if ARCH_X86_64
>      movq        r4, m2
>      mov         [r0], r4b
>      shr         r4, 8
>      mov         [r0 + r1], r4b
>      shr         r4, 8
>      mov         [r0 + r1 * 2], r4b
> +%else
> +    movd        r2d, m2
> +    mov         [r0], r2b
> +    shr         r2, 8
> +    mov         [r0 + r1], r2b
> +    shr         r2, 8
> +    mov         [r0 + r1 * 2], r2b
> +%endif
>  .end:
>      RET
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150226/587c5655/attachment-0001.html>


More information about the x265-devel mailing list