[x265] [PATCH] fix bug in asm code for weightp, the sintel_trailer's width is not multiple of 16
Steve Borho
steve at borho.org
Thu Jan 23 19:31:11 CET 2014
On Jan 23, 2014, at 7:51 AM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1390485088 -28800
> # Node ID c93c9f313e2d8da2f2d11f15c1619c1a5e967533
> # Parent f1bd676fd90f5a9ac9353fcc164fc190786fd694
> fix bug in asm code for weightp, the sintel_trailer's width is not multiple of 16
I don't think the width can be a problem. We always apply the weight to a plane of reconstructed pixels which has borders extended. We could weight multiples of 32 or 64 and it would be ok.
Did this change actually affect the outputs? If so it indicates a bug.
>
> diff -r f1bd676fd90f -r c93c9f313e2d source/common/pixel.cpp
> --- a/source/common/pixel.cpp Thu Jan 23 12:44:25 2014 +0530
> +++ b/source/common/pixel.cpp Thu Jan 23 21:51:28 2014 +0800
> @@ -511,6 +511,7 @@
>
> void weight_pp_c(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
> {
> + assert((width % 8) == 0);
> int x, y;
>
> for (y = 0; y <= height - 1; y++)
> diff -r f1bd676fd90f -r c93c9f313e2d source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm Thu Jan 23 12:44:25 2014 +0530
> +++ b/source/common/x86/pixel-util8.asm Thu Jan 23 21:51:28 2014 +0800
> @@ -1198,8 +1198,7 @@
> ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
> INIT_XMM sse4
> -cglobal weight_pp, 6, 7, 6
> -
> +cglobal weight_pp, 6,7,6
> mov r6d, r6m
> shl r6d, 6
> movd m0, r6d ; m0 = [w0<<6]
> @@ -1222,8 +1221,7 @@
> mov r6d, r4d
> shr r6d, 4
> .loopW:
> - movh m4, [r0]
> - pmovzxbw m4, m4
> + pmovzxbw m4, [r0]
>
> punpcklwd m3, m4, m5
> pmaddwd m3, m0
> @@ -1237,11 +1235,9 @@
>
> packssdw m3, m4
> packuswb m3, m3
> -
> movh [r1], m3
>
> - movh m4, [r0 + 8]
> - pmovzxbw m4, m4
> + pmovzxbw m4, [r0 + 8]
>
> punpcklwd m3, m4, m5
> pmaddwd m3, m0
> @@ -1255,21 +1251,43 @@
>
> packssdw m3, m4
> packuswb m3, m3
> -
> movh [r1 + 8], m3
>
> add r0, 16
> add r1, 16
> -
> dec r6d
> - jnz .loopW
> -
> - lea r0, [r0 + r2]
> - lea r1, [r1 + r3]
> + jnz .loopW
> +
> + ; check left width
> + test r4d, 15
> + jz .next
> +
> +.width8:
> + pmovzxbw m4, [r0]
> +
> + punpcklwd m3, m4, m5
> + pmaddwd m3, m0
> + psrad m3, m1
> + paddd m3, m2
> +
> + punpckhwd m4, m5
> + pmaddwd m4, m0
> + psrad m4, m1
> + paddd m4, m2
> +
> + packssdw m3, m4
> + packuswb m3, m3
> + movh [r1], m3
> +
> + add r0, 8
> + add r1, 8
> +
> +.next:
> + add r0, r2
> + add r1, r3
>
> dec r5d
> jnz .loopH
> -
> RET
>
> ;-------------------------------------------------------------------------------------------------------------------------------------------------
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
More information about the x265-devel
mailing list