[x265] [PATCH] fix bug in asm code for weightp, the sintel_trailer's width is not multiple of 16

Steve Borho steve at borho.org
Thu Jan 23 19:31:11 CET 2014


On Jan 23, 2014, at 7:51 AM, Min Chen <chenm003 at 163.com> wrote:

> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1390485088 -28800
> # Node ID c93c9f313e2d8da2f2d11f15c1619c1a5e967533
> # Parent  f1bd676fd90f5a9ac9353fcc164fc190786fd694
> fix bug in asm code for weightp, the sintel_trailer's width is not multiple of 16

I don't think the width can be a problem. We always apply the weight to a plane of reconstructed pixels which has borders extended. We could weight multiples of 32 or 64 and it would be ok.

Did this change actually affect the outputs?  If so it indicates a bug.

> 
> diff -r f1bd676fd90f -r c93c9f313e2d source/common/pixel.cpp
> --- a/source/common/pixel.cpp	Thu Jan 23 12:44:25 2014 +0530
> +++ b/source/common/pixel.cpp	Thu Jan 23 21:51:28 2014 +0800
> @@ -511,6 +511,7 @@
> 
> void weight_pp_c(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
> {
> +    assert((width % 8) == 0);
>     int x, y;
> 
>     for (y = 0; y <= height - 1; y++)
> diff -r f1bd676fd90f -r c93c9f313e2d source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm	Thu Jan 23 12:44:25 2014 +0530
> +++ b/source/common/x86/pixel-util8.asm	Thu Jan 23 21:51:28 2014 +0800
> @@ -1198,8 +1198,7 @@
> ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
> INIT_XMM sse4
> -cglobal weight_pp, 6, 7, 6
> -
> +cglobal weight_pp, 6,7,6
>     mov         r6d, r6m
>     shl         r6d, 6
>     movd        m0, r6d         ; m0 = [w0<<6]
> @@ -1222,8 +1221,7 @@
>     mov         r6d, r4d
>     shr         r6d, 4
> .loopW:
> -    movh        m4, [r0]
> -    pmovzxbw    m4, m4
> +    pmovzxbw    m4, [r0]
> 
>     punpcklwd   m3, m4, m5
>     pmaddwd     m3, m0
> @@ -1237,11 +1235,9 @@
> 
>     packssdw    m3, m4
>     packuswb    m3, m3
> -
>     movh        [r1], m3
> 
> -    movh        m4, [r0 + 8]
> -    pmovzxbw    m4, m4
> +    pmovzxbw    m4, [r0 + 8]
> 
>     punpcklwd   m3, m4, m5
>     pmaddwd     m3, m0
> @@ -1255,21 +1251,43 @@
> 
>     packssdw    m3, m4
>     packuswb    m3, m3
> -
>     movh        [r1 + 8], m3
> 
>     add         r0, 16
>     add         r1, 16
> -
>     dec         r6d
> -    jnz         .loopW
> -
> -    lea         r0, [r0 + r2]
> -    lea         r1, [r1 + r3]
> +    jnz        .loopW
> +
> +    ; check left width
> +    test        r4d, 15
> +    jz         .next
> +
> +.width8:
> +    pmovzxbw    m4, [r0]
> +
> +    punpcklwd   m3, m4, m5
> +    pmaddwd     m3, m0
> +    psrad       m3, m1
> +    paddd       m3, m2
> +
> +    punpckhwd   m4, m5
> +    pmaddwd     m4, m0
> +    psrad       m4, m1
> +    paddd       m4, m2
> +
> +    packssdw    m3, m4
> +    packuswb    m3, m3
> +    movh        [r1], m3
> +
> +    add         r0, 8
> +    add         r1, 8
> +
> +.next:
> +    add         r0, r2
> +    add         r1, r3
> 
>     dec         r5d
>     jnz         .loopH
> -
>     RET
> 
> ;-------------------------------------------------------------------------------------------------------------------------------------------------
> 
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



More information about the x265-devel mailing list