[x265] [PATCH] weighted prediction pixel, interface simplification
Steve Borho
steve at borho.org
Mon Oct 20 19:32:10 CEST 2014
On 10/20, praveen at multicorewareinc.com wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1413793389 -19800
> # Node ID 3366be6ef59eec3d3ca69ed52942708b5d1b3bc6
> # Parent 1e09d0395826bdd01a4b4e46569853a2f04b9e95
> weighted prediction pixel, interface simplification
Queued, thanks
> diff -r 1e09d0395826 -r 3366be6ef59e source/common/pixel.cpp
> --- a/source/common/pixel.cpp Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/common/pixel.cpp Mon Oct 20 13:53:09 2014 +0530
> @@ -640,11 +640,13 @@
> }
> }
>
> -void weight_pp_c(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
> +void weight_pp_c(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
> {
> int x, y;
>
> X265_CHECK(!(width & 15), "weightp alignment error\n");
> + X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n");
> + X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n");
>
> for (y = 0; y <= height - 1; y++)
> {
> @@ -656,8 +658,8 @@
> x++;
> }
>
> - src += srcStride;
> - dst += dstStride;
> + src += stride;
> + dst += stride;
> }
> }
>
> diff -r 1e09d0395826 -r 3366be6ef59e source/common/primitives.h
> --- a/source/common/primitives.h Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/common/primitives.h Mon Oct 20 13:53:09 2014 +0530
> @@ -168,7 +168,7 @@
> typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
>
> -typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> +typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
> typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,
> diff -r 1e09d0395826 -r 3366be6ef59e source/common/x86/pixel-util.h
> --- a/source/common/x86/pixel-util.h Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/common/x86/pixel-util.h Mon Oct 20 13:53:09 2014 +0530
> @@ -57,7 +57,7 @@
> void x265_dequant_normal_avx2(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
>
> -void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> +void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
> void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
>
> void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t * pix1, intptr_t stride1,
> diff -r 1e09d0395826 -r 3366be6ef59e source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/common/x86/pixel-util8.asm Mon Oct 20 13:53:09 2014 +0530
> @@ -1298,36 +1298,29 @@
>
>
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
> -;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
> +;void weight_pp(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
> INIT_XMM sse4
> cglobal weight_pp, 6, 7, 6
>
> + shl r5d, 6 ; m0 = [w0<<6]
> mov r6d, r6m
> - shl r6d, 6
> - movd m0, r6d ; m0 = [w0<<6]
> -
> - movd m1, r7m ; m1 = [round]
> - punpcklwd m0, m1 ; assuming both (w0<<6) and round are using maximum of 16 bits each.
> - pshufd m0, m0, 0 ; m0 = [w0<<6 round]
> -
> - movd m1, r8m
> -
> - movd m2, r9m
> + shl r6d, 16
> + or r6d, r5d ; assuming both (w0<<6) and round are using maximum of 16 bits each.
> + movd m0, r6d
> + pshufd m0, m0, 0 ; m0 = [w0<<6, round]
> + movd m1, r7m
> + movd m2, r8m
> pshufd m2, m2, 0
> -
> mova m5, [pw_1]
> -
> - sub r2d, r4d
> - sub r3d, r4d
> + sub r2d, r3d
> + shr r3d, 4
>
> .loopH:
> - mov r6d, r4d
> - shr r6d, 4
> + mov r5d, r3d
> +
> .loopW:
> - movh m4, [r0]
> - pmovzxbw m4, m4
> -
> + pmovzxbw m4, [r0]
> punpcklwd m3, m4, m5
> pmaddwd m3, m0
> psrad m3, m1
> @@ -1340,12 +1333,9 @@
>
> packssdw m3, m4
> packuswb m3, m3
> -
> movh [r1], m3
>
> - movh m4, [r0 + 8]
> - pmovzxbw m4, m4
> -
> + pmovzxbw m4, [r0 + 8]
> punpcklwd m3, m4, m5
> pmaddwd m3, m0
> psrad m3, m1
> @@ -1358,21 +1348,19 @@
>
> packssdw m3, m4
> packuswb m3, m3
> -
> movh [r1 + 8], m3
>
> add r0, 16
> add r1, 16
>
> - dec r6d
> + dec r5d
> jnz .loopW
>
> lea r0, [r0 + r2]
> - lea r1, [r1 + r3]
> -
> - dec r5d
> + lea r1, [r1 + r2]
> +
> + dec r4d
> jnz .loopH
> -
> RET
>
> ;-------------------------------------------------------------------------------------------------------------------------------------------------
> diff -r 1e09d0395826 -r 3366be6ef59e source/encoder/reference.cpp
> --- a/source/encoder/reference.cpp Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/encoder/reference.cpp Mon Oct 20 13:53:09 2014 +0530
> @@ -92,7 +92,7 @@
> // Computing weighted CU rows
> int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
> int padwidth = (width + 15) & ~15; // weightp assembly needs even 16 byte widths
> - primitives.weight_pp(src, dst, lumaStride, lumaStride, padwidth, height,
> + primitives.weight_pp(src, dst, lumaStride, padwidth, height,
> weight, round << correction, shift + correction, offset);
>
> // Extending Left & Right
> diff -r 1e09d0395826 -r 3366be6ef59e source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/encoder/slicetype.cpp Mon Oct 20 13:53:09 2014 +0530
> @@ -1383,7 +1383,7 @@
> int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
> int widthHeight = (int)stride;
>
> - primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, widthHeight, widthHeight, m_paddedLines,
> + primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, widthHeight, m_paddedLines,
> scale, round << correction, denom + correction, offset);
> src = m_weightedRef.fpelPlane;
> }
> @@ -1481,7 +1481,7 @@
> int widthHeight = (int)stride;
>
> for (int i = 0; i < 4; i++)
> - primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, widthHeight, widthHeight, m_paddedLines,
> + primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, widthHeight, m_paddedLines,
> scale, round << correction, denom + correction, offset);
>
> m_weightedRef.isWeighted = true;
> diff -r 1e09d0395826 -r 3366be6ef59e source/encoder/weightPrediction.cpp
> --- a/source/encoder/weightPrediction.cpp Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/encoder/weightPrediction.cpp Mon Oct 20 13:53:09 2014 +0530
> @@ -186,7 +186,7 @@
> int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
> int pwidth = ((width + 15) >> 4) << 4;
>
> - primitives.weight_pp(ref, weightTemp, stride, stride, pwidth, height,
> + primitives.weight_pp(ref, weightTemp, stride, pwidth, height,
> weight, round << correction, denom + correction, offset);
> ref = weightTemp;
> }
> diff -r 1e09d0395826 -r 3366be6ef59e source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp Sun Oct 19 20:53:36 2014 -0500
> +++ b/source/test/pixelharness.cpp Mon Oct 20 13:53:09 2014 +0530
> @@ -334,8 +334,8 @@
> for (int i = 0; i < ITERS; i++)
> {
> int index = i % TEST_CASES;
> - checked(opt, pixel_test_buff[index] + j, opt_dest, stride, stride, width, height, w0, round, shift, offset);
> - ref(pixel_test_buff[index] + j, ref_dest, stride, stride, width, height, w0, round, shift, offset);
> + checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round, shift, offset);
> + ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round, shift, offset);
>
> if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
> return false;
> @@ -1775,7 +1775,7 @@
> if (opt.weight_pp)
> {
> HEADER0("weight_pp");
> - REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
> + REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100);
> }
>
> if (opt.weight_sp)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list