[x265] [PATCH] asm: assembly code for IntraAng32x32 all modes

chen chenm003 at 163.com
Mon Feb 3 09:19:08 CET 2014


some problem:
1. load coeff from constant array use unalignment, eg: 'movu m0, [r4+x]'
 
2. '%rep' make large code, it faster on testbench but slower on Encoder.

3. Disable two of 8x8 functions

At 2014-02-03 15:06:59,dnyaneshwar at multicorewareinc.com wrote:
># HG changeset patch
># User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
># Date 1391410961 -19800
>#      Mon Feb 03 12:32:41 2014 +0530
># Node ID 7ad3e3504ea6e5f7355b21c4c7de44ad9e1c0a2a
># Parent  aab88ed133647b779b0a1ca33a1e20584103ef7d
>asm: assembly code for IntraAng32x32 all modes
>
>diff -r aab88ed13364 -r 7ad3e3504ea6 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp	Sun Feb 02 13:09:26 2014 -0600
>+++ b/source/common/x86/asm-primitives.cpp	Mon Feb 03 12:32:41 2014 +0530
>@@ -572,9 +572,6 @@
> #define SETUP_INTRA_ANG32(mode, fno, cpu) \
>     p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
> 
>-#define SETUP_INTRA_ANG32(mode, fno, cpu) \
>-    p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
>-
> namespace x265 {
> // private x265 namespace
> void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
>@@ -1013,10 +1010,37 @@
>         SETUP_INTRA_ANG4(32, 4, sse4);
>         SETUP_INTRA_ANG4(33, 3, sse4);
> 
>+        SETUP_INTRA_ANG32(3,  3,  sse4);
>+        SETUP_INTRA_ANG32(4,  4,  sse4);
>+        SETUP_INTRA_ANG32(5,  5,  sse4);
>+        SETUP_INTRA_ANG32(6,  6,  sse4);
>+        SETUP_INTRA_ANG32(7,  7,  sse4);
>+        SETUP_INTRA_ANG32(8,  8,  sse4);
>+        SETUP_INTRA_ANG32(9,  9,  sse4);
>+        SETUP_INTRA_ANG32(10, 10, sse4);
>+        SETUP_INTRA_ANG32(11, 11, sse4);
>+        SETUP_INTRA_ANG32(12, 12, sse4);
>+        SETUP_INTRA_ANG32(13, 13, sse4);
>+        SETUP_INTRA_ANG32(14, 14, sse4);
>+        SETUP_INTRA_ANG32(15, 15, sse4);
>+        SETUP_INTRA_ANG32(16, 16, sse4);
>         SETUP_INTRA_ANG32(17, 17, sse4);
>-
>-        SETUP_INTRA_ANG8(3, 3, sse4);
>-        SETUP_INTRA_ANG8(33, 3, sse4);
>+        SETUP_INTRA_ANG32(18, 18, sse4);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140203/930f1271/attachment.html>


More information about the x265-devel mailing list