[x265] [PATCH Review Only, Need to code all modes] all_angs_pred_8x8, asm code for mode 3

chen chenm003 at 163.com
Thu Dec 5 16:08:39 CET 2013


The code is right, the only one problem is [r5 + N * 16]
If you disasm your obj code, you will found that there have 8 coeff, but only 1 use 1-byte short format, other 7 is 4-bytes format
In this case, initial r5 = 21*16, you will get 6 short format instruction
 
Of course, this is a little problem, code size only affect Intel's instruction decode component

At 2013-12-05 20:07:13,praveen at multicorewareinc.com wrote:
># HG changeset patch
># User Praveen Tiwari
># Date 1386245216 -19800
># Node ID 629c95dc3424212d7cf8915f14f76785a728a887
># Parent  a375e50b53708d4f5ce16096d1b0f26c91cd69f1
>all_angs_pred_8x8, asm code for mode 3
>
>diff -r a375e50b5370 -r 629c95dc3424 source/common/x86/intrapred8.asm
>--- a/source/common/x86/intrapred8.asm	Thu Dec 05 17:25:33 2013 +0530
>+++ b/source/common/x86/intrapred8.asm	Thu Dec 05 17:36:56 2013 +0530
>@@ -1753,4 +1753,69 @@
> punpcklqdq    m1,         m0
> movu          [r0 + 48],  m1
> 
>+; mode 3
>+
>+mova          m7,         [pw_1024]
>+lea           r5,         [ang_table]
>+
>+movu          m0,         [r2 + 1]
>+
>+palignr       m1,         m0,              1
>+palignr       m2,         m0,              2
>+
>+punpcklbw     m3,         m0,        m1
>+pmaddubsw     m3,        [r5 + 26 * 16]
>+pmulhrsw      m3,         m7
>+
>+punpcklbw     m1,         m2
>+pmaddubsw     m1,         [r5 + 20 * 16]
>+pmulhrsw      m1,         m7
>+
>+packuswb      m3,         m1
>+
>+movu          [r0 + 64],  m3
>+
>+palignr       m1,         m0,              3
>+palignr       m3,         m0,              4
>+
>+punpcklbw     m2,         m1
>+pmaddubsw     m2,         [r5 + 14 * 16]
>+pmulhrsw      m2,         m7
>+
>+punpcklbw     m1,         m3
>+pmaddubsw     m1,         [r5 + 8 * 16]
>+pmulhrsw      m1,         m7
>+
>+packuswb      m2,         m1
>+
>+movu          [r0 + 80],  m2
>+
>+palignr       m1,         m0,              5
>+
>+punpcklbw     m3,         m1
>+pmaddubsw     m4,         m3,              [r5 + 2 * 16]
>+pmulhrsw      m4,         m7
>+
>+pmaddubsw     m3,         [r5 + 28 * 16]
>+pmulhrsw      m3,         m7
>+
>+packuswb      m4,         m3
>+
>+movu          [r0 + 96],  m4
>+
>+palignr       m2,         m0,              6
>+palignr       m3,         m0,              7
>+
>+punpcklbw     m1,         m2
>+pmaddubsw     m1,         [r5 + 22 * 16]
>+pmulhrsw      m1,         m7
>+
>+punpcklbw     m2,         m3
>+pmaddubsw     m2,         [r5 + 16 * 16]
>+pmulhrsw      m2,         m7
>+
>+packuswb      m1,         m2
>+
>+movu          [r0 + 112], m1
>+
> RET
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131205/1bd29ffe/attachment.html>


More information about the x265-devel mailing list