[x265] [PATCH] asm: sse4 10bit code for sign primitive
Deepthi Nandakumar
deepthi at multicorewareinc.com
Thu Jun 25 12:04:13 CEST 2015
On Thu, Jun 25, 2015 at 2:19 PM, <rajesh at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Rajesh Paulraj<rajesh at multicorewareinc.com>
> # Date 1435219198 -19800
> # Thu Jun 25 13:29:58 2015 +0530
> # Node ID a03487d6295cf89b065eff36e5c1ec4ee4253243
> # Parent b1af4c36f48a4500a4912373ebcda9a5540b5c15
> asm: sse4 10bit code for sign primitive
>
> calSign 6.16x 356.91 2197.63
>
> diff -r b1af4c36f48a -r a03487d6295c source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Wed Jun 24 10:36:15 2015
> -0500
> +++ b/source/common/x86/asm-primitives.cpp Thu Jun 25 13:29:58 2015
> +0530
> @@ -1097,6 +1097,7 @@
> p.saoCuOrgE3[0] = PFX(saoCuOrgE3_sse4);
> p.saoCuOrgE3[1] = PFX(saoCuOrgE3_sse4);
> p.saoCuOrgB0 = PFX(saoCuOrgB0_sse4);
> + p.sign = x265_calculateSign_sse4;
>
> This should be PFX().
> LUMA_ADDAVG(sse4);
> CHROMA_420_ADDAVG(sse4);
> diff -r b1af4c36f48a -r a03487d6295c source/common/x86/loopfilter.asm
> --- a/source/common/x86/loopfilter.asm Wed Jun 24 10:36:15 2015 -0500
> +++ b/source/common/x86/loopfilter.asm Thu Jun 25 13:29:58 2015 +0530
> @@ -40,6 +40,7 @@
> cextern pw_2
> cextern pw_1023
> cextern pb_movemask
> +cextern pw_1
>
>
>
> ;============================================================================================================
> @@ -1419,3 +1420,49 @@
>
> .end:
> RET
> +
>
> +;-----------------------------------------------------------------------------
> +; void calSign(int8_t *dst, const pixel *src1, const pixel *src2, const
> int endX)
>
> +;-----------------------------------------------------------------------------
> +%if HIGH_BIT_DEPTH
> +INIT_XMM sse4
> +cglobal calculateSign, 4, 7, 5
> + mova m0, [pw_1]
> + mov r4d, r3d
> + shr r3d, 4
> + add r3d, 1
> + mov r5, r0
> + movu m4, [r0 + r4]
> +.loop
> + movu m1, [r1] ; m2 = pRec[x]
> + movu m2, [r2] ; m3 = pTmpU[x]
> +
> + pcmpgtw m3, m1, m2
> + pcmpgtw m2, m1
> + pand m3, m0
> + por m3, m2
> + packsswb m3, m3
> + movh [r0], xm3
> +
> + movu m1, [r1 + 16] ; m2 = pRec[x]
> + movu m2, [r2 + 16] ; m3 = pTmpU[x]
> +
> + pcmpgtw m3, m1, m2
> + pcmpgtw m2, m1
> + pand m3, m0
> + por m3, m2
> + packsswb m3, m3
> + movh [r0 + 8], xm3
> +
> + add r0, 16
> + add r1, 32
> + add r2, 32
> + dec r3d
> + jnz .loop
> +
> + mov r6, r0
> + sub r6, r5
> + sub r4, r6
> + movu [r0 + r4], m4
> + RET
> +%endif
> diff -r b1af4c36f48a -r a03487d6295c source/common/x86/loopfilter.h
> --- a/source/common/x86/loopfilter.h Wed Jun 24 10:36:15 2015 -0500
> +++ b/source/common/x86/loopfilter.h Thu Jun 25 13:29:58 2015 +0530
> @@ -37,7 +37,8 @@
> void PFX(saoCuOrgB0_ ## cpu)(pixel* rec, const int8_t* offsetBo, int
> ctuWidth, int ctuHeight, intptr_t stride); \
> void PFX(saoCuStatsE2_ ## cpu)(const pixel *fenc, const pixel *rec,
> intptr_t stride, int8_t *upBuff1, int8_t *upBufft, int endX, int endY,
> int32_t *stats, int32_t *count); \
> void PFX(saoCuStatsE3_ ## cpu)(const pixel *fenc, const pixel *rec,
> intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats,
> int32_t *count); \
> - void PFX(calSign_ ## cpu)(int8_t *dst, const pixel *src1, const pixel
> *src2, const int endX);
> + void PFX(calSign_ ## cpu)(int8_t *dst, const pixel *src1, const pixel
> *src2, const int endX); \
> + void PFX(calculateSign_ ## cpu)(int8_t *dst, const pixel *src1, const
> pixel *src2, const int endX);
>
> Whats the difference between calculateSign_ and calSign_? They have the
same function signature and are assigned to the same primitive?
> DECL_SAO(sse4);
> DECL_SAO(avx2);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150625/72b29db1/attachment.html>
More information about the x265-devel
mailing list