[x265] [PATCH] asm: avx2 code for weight_sp() for 8bpp
Sumalatha Polureddy
sumalatha at multicorewareinc.com
Thu Apr 2 12:05:46 CEST 2015
yes i will flag in our AVX2 development progress list
Regards
Sumalatha
On Thu, Apr 2, 2015 at 3:32 PM, chen <chenm003 at 163.com> wrote:
> we can use this version and improve in future
> please make a flag in list to remember we improve it
>
> At 2015-04-02 17:51:08,sumalatha at multicorewareinc.com wrote:
> ># HG changeset patch
> ># User Sumalatha Polureddy
> ># Date 1427968258 -19800
> ># Thu Apr 02 15:20:58 2015 +0530
> ># Node ID 7f976e1e89c5940a8bb2f5b965ebd9ed6e6948a6
> ># Parent ac85c775620f1dcb0df056874633cbf916098bd2
> >asm: avx2 code for weight_sp() for 8bpp
> >
> >sse4
> >weight_sp 16.40x 7768.71 127369.20
> >
> >avx2
> >weight_sp 25.83x 4918.74 127040.17
> >
> >diff -r ac85c775620f -r 7f976e1e89c5 source/common/x86/asm-primitives.cpp
> >--- a/source/common/x86/asm-primitives.cpp Tue Mar 31 20:04:28 2015 -0500
> >+++ b/source/common/x86/asm-primitives.cpp Thu Apr 02 15:20:58 2015 +0530
> >@@ -1604,6 +1604,7 @@
> >
> > p.scale1D_128to64 = x265_scale1D_128to64_avx2;
> > p.weight_pp = x265_weight_pp_avx2;
> >+ p.weight_sp = x265_weight_sp_avx2;
> >
> > // intra_pred functions
> > p.cu[BLOCK_8x8].intra_pred[3] = x265_intra_pred_ang8_3_avx2;
> >diff -r ac85c775620f -r 7f976e1e89c5 source/common/x86/pixel-util8.asm
> >--- a/source/common/x86/pixel-util8.asm Tue Mar 31 20:04:28 2015 -0500
> >+++ b/source/common/x86/pixel-util8.asm Thu Apr 02 15:20:58 2015 +0530
> >@@ -1492,6 +1492,84 @@
> > dec r5d
> > jnz .loopH
> > RET
> >+
> >+%if ARCH_X86_64
> >+INIT_YMM avx2
> >+cglobal weight_sp, 6, 9, 7
> >+ mov r7d, r7m
> >+ shl r7d, 16
> >+ or r7d, r6m
> >+ vpbroadcastd m0, r7d ; m0 = times 8 dw w0, round
> >+ movd xm1, r8m ; m1 = [shift]
> >+ vpbroadcastd m2, r9m ; m2 = times 16 dw offset
> >+ vpbroadcastw m3, [pw_1]
> >+ vpbroadcastw m4, [pw_2000]
> >+
> >+ add r2d, r2d ; 2 * srcstride
> >+
> >+ mov r7, r0
> >+ mov r8, r1
> >+.loopH:
> >+ mov r6d, r4d ; width
> >+
> >+ ; save old src and dst
> >+ mov r0, r7 ; src
> >+ mov r1, r8 ; dst
> >+.loopW:
> >+ movu m5, [r0]
> >+ paddw m5, m4
> >+
> >+ punpcklwd m6,m5, m3
> >+ pmaddwd m6, m0
> >+ psrad m6, xm1
> >+ paddd m6, m2
> >+
> >+ punpckhwd m5, m3
> >+ pmaddwd m5, m0
> >+ psrad m5, xm1
> >+ paddd m5, m2
> >+
> >+ packssdw m6, m5
> >+ packuswb m6, m6
> >+ vpermq m6, m6, 10001000b
> >+
> >+ sub r6d, 16
> >+ jl .width8
> >+ movu [r1], xm6
> >+ je .nextH
> >+ add r0, 32
> >+ add r1, 16
> >+ jmp .loopW
> >+
> >+.width8:
> >+ add r6d, 16
> >+ cmp r6d, 8
> >+ jl .width4
> >+ movq [r1], xm6
> >+ je .nextH
> >+ psrldq m6, 8
> >+ sub r6d, 8
> >+ add r1, 8
> >+
> >+.width4:
> >+ cmp r6d, 4
> >+ jl .width2
> >+ movd [r1], xm6
> >+ je .nextH
> >+ add r1, 4
> >+ pshufd m6, m6, 1
> >+
> >+.width2:
> >+ pextrw [r1], xm6, 0
> >+
> >+.nextH:
> >+ lea r7, [r7 + r2]
> >+ lea r8, [r8 + r3]
> >+
> >+ dec r5d
> >+ jnz .loopH
> >+ RET
> >+%endif
> > %endif ; end of (HIGH_BIT_DEPTH == 0)
> >
> >
> >diff -r ac85c775620f -r 7f976e1e89c5 source/common/x86/pixel.h
> >--- a/source/common/x86/pixel.h Tue Mar 31 20:04:28 2015 -0500
> >+++ b/source/common/x86/pixel.h Thu Apr 02 15:20:58 2015 +0530
> >@@ -272,6 +272,7 @@
>
> > int x265_psyCost_ss_16x16_avx2(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride);
>
> > int x265_psyCost_ss_32x32_avx2(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride);
>
> > int x265_psyCost_ss_64x64_avx2(const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride);
>
> >+void x265_weight_sp_avx2(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> >
> > #undef DECL_PIXELS
> > #undef DECL_HEVC_SSD
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150402/b1ddb992/attachment-0001.html>
More information about the x265-devel
mailing list