<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><DIV>some problem:</DIV>
<DIV>1. load coeff from constant array use unalignment, eg: 'movu m0, [r4+x]'</DIV>
<DIV> </DIV>
<DIV>2. '%rep' make large code, it faster on testbench but slower on Encoder.<BR><BR>3. Disable two of 8x8 functions</DIV><PRE><BR>At 2014-02-03 15:06:59,dnyaneshwar@multicorewareinc.com wrote:
># HG changeset patch
># User Dnyaneshwar G <dnyaneshwar@multicorewareinc.com>
># Date 1391410961 -19800
># Mon Feb 03 12:32:41 2014 +0530
># Node ID 7ad3e3504ea6e5f7355b21c4c7de44ad9e1c0a2a
># Parent aab88ed133647b779b0a1ca33a1e20584103ef7d
>asm: assembly code for IntraAng32x32 all modes
>
>diff -r aab88ed13364 -r 7ad3e3504ea6 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Sun Feb 02 13:09:26 2014 -0600
>+++ b/source/common/x86/asm-primitives.cpp Mon Feb 03 12:32:41 2014 +0530
>@@ -572,9 +572,6 @@
> #define SETUP_INTRA_ANG32(mode, fno, cpu) \
> p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
>
>-#define SETUP_INTRA_ANG32(mode, fno, cpu) \
>- p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
>-
> namespace x265 {
> // private x265 namespace
> void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
>@@ -1013,10 +1010,37 @@
> SETUP_INTRA_ANG4(32, 4, sse4);
> SETUP_INTRA_ANG4(33, 3, sse4);
>
>+ SETUP_INTRA_ANG32(3, 3, sse4);
>+ SETUP_INTRA_ANG32(4, 4, sse4);
>+ SETUP_INTRA_ANG32(5, 5, sse4);
>+ SETUP_INTRA_ANG32(6, 6, sse4);
>+ SETUP_INTRA_ANG32(7, 7, sse4);
>+ SETUP_INTRA_ANG32(8, 8, sse4);
>+ SETUP_INTRA_ANG32(9, 9, sse4);
>+ SETUP_INTRA_ANG32(10, 10, sse4);
>+ SETUP_INTRA_ANG32(11, 11, sse4);
>+ SETUP_INTRA_ANG32(12, 12, sse4);
>+ SETUP_INTRA_ANG32(13, 13, sse4);
>+ SETUP_INTRA_ANG32(14, 14, sse4);
>+ SETUP_INTRA_ANG32(15, 15, sse4);
>+ SETUP_INTRA_ANG32(16, 16, sse4);
> SETUP_INTRA_ANG32(17, 17, sse4);
>-
>- SETUP_INTRA_ANG8(3, 3, sse4);
>- SETUP_INTRA_ANG8(33, 3, sse4);
>+ SETUP_INTRA_ANG32(18, 18, sse4);
</PRE></div>