[x265] [PATCH 3 of 8] asm:intra_pred_ang4_4_sse2 improved ~2% 647.49 -> 634.98 with nits and tweaks
dave
dtyx265 at gmail.com
Sat Mar 28 23:42:32 CET 2015
On 03/28/2015 03:20 PM, chen wrote:
> At 2015-03-29 05:35:21,dtyx265 at gmail.com wrote:
> ># HG changeset patch
> ># User David T Yuen <dtyx265 at gmail.com>
> ># Date 1427576216 25200
> ># Node ID 0a75e3d50518e73f5a199d7519f800a9ff1c2e2c
> ># Parent 6595ba5f989fdd521e268911ddf027665a610e25
> >asm:intra_pred_ang4_4_sse2 improved ~2% 647.49 -> 634.98 with nits and tweaks
> >
> >Corrected parameter count
> >Changed r3 and r4 to r3d and r4d
> >tweaked unpacking for performance
> >
> >diff -r 6595ba5f989f -r 0a75e3d50518 source/common/x86/intrapred8.asm
> >--- a/source/common/x86/intrapred8.asm Sat Mar 28 13:40:20 2015 -0700
> >+++ b/source/common/x86/intrapred8.asm Sat Mar 28 13:56:56 2015 -0700
> >@@ -1413,23 +1413,22 @@
> > movd [r0 + r1], m0
> > RET
> >
> >-cglobal intra_pred_ang4_4, 3,5,8
> >- xor r4, r4
> >- inc r4
> >+cglobal intra_pred_ang4_4, 4,5,8
> load parameter into r3, and overwrite in below?
> >+ xor r4d, r4d
> >+ inc r4d
> > cmp r3m, byte 32
After cmp sets flags, r3 is free to use.
> >- mov r3, 9
> >- cmove r3, r4
> >+ mov r3d, 9
> >+ cmove r3d, r4d
This is to allow reused for transposed mode 32.
> >
> > movh m0, [r2 + r3] ; [8 7 6 5 4 3 2 1]
> >+ punpcklbw m0, m0
> >+ psrldq m0, 1
> >+ mova m2, m0
> >+ psrldq m2, 2 ; [x x x x x x x x 6 5 5 4 4 3 3 2]
> > mova m1, m0
> >- psrldq m1, 1 ; [x 8 7 6 5 4 3 2]
> >- punpcklbw m0, m1 ; [x 8 8 7 7 6 6 5 5 4 4 3 3 2 2 1]
> >- mova m1, m0
> >- psrldq m1, 2 ; [x x x x x x x x 6 5 5 4 4 3 3 2]
> >- mova m3, m0
> >- psrldq m3, 4 ; [x x x x x x x x 7 6 6 5 5 4 4 3]
> >- punpcklqdq m0, m1
> >- punpcklqdq m2, m1, m3
> >+ psrldq m1, 4 ; [x x x x x x x x 7 6 6 5 5 4 4 3]
> >+ punpcklqdq m0, m2
> >+ punpcklqdq m2, m1
> >
> > lea r3, [pw_ang_table + 18 * 16]
> > mova m4, [r3 + 3 * 16] ; [21]
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150328/fe800a10/attachment-0001.html>
More information about the x265-devel
mailing list