[x265-commits] [x265] asm: split 8bpp version of all_angs from intrapred8.asm

Min Chen chenm003 at 163.com
Tue Mar 10 01:26:09 CET 2015


details:   http://hg.videolan.org/x265/rev/96465ffdf1a1
branches:  
changeset: 9670:96465ffdf1a1
user:      Min Chen <chenm003 at 163.com>
date:      Mon Mar 09 19:10:59 2015 -0500
description:
asm: split 8bpp version of all_angs from intrapred8.asm
Subject: [x265] search: fix GCC warnings and nits

details:   http://hg.videolan.org/x265/rev/726fe4088f31
branches:  
changeset: 9671:726fe4088f31
user:      Steve Borho <steve at borho.org>
date:      Mon Mar 09 19:21:25 2015 -0500
description:
search: fix GCC warnings and nits

diffstat:

 source/common/CMakeLists.txt             |      2 +-
 source/common/x86/intrapred8.asm         |  22973 +----------------------------
 source/common/x86/intrapred8_allangs.asm |  23008 +++++++++++++++++++++++++++++
 source/encoder/search.cpp                |      8 +-
 4 files changed, 23013 insertions(+), 22978 deletions(-)

diffs (truncated from 46057 to 300 lines):

diff -r bd4be3c9236e -r 726fe4088f31 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt	Mon Mar 09 14:18:15 2015 -0700
+++ b/source/common/CMakeLists.txt	Mon Mar 09 19:21:25 2015 -0500
@@ -48,7 +48,7 @@ if(ENABLE_ASSEMBLY)
     if(HIGH_BIT_DEPTH)
         set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm)
     else()
-        set(A_SRCS ${A_SRCS} sad-a.asm intrapred8.asm ipfilter8.asm loopfilter.asm)
+        set(A_SRCS ${A_SRCS} sad-a.asm intrapred8.asm intrapred8_allangs.asm ipfilter8.asm loopfilter.asm)
     endif()
 
     if(NOT X64)
diff -r bd4be3c9236e -r 726fe4088f31 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Mon Mar 09 14:18:15 2015 -0700
+++ b/source/common/x86/intrapred8.asm	Mon Mar 09 19:21:25 2015 -0500
@@ -30,7 +30,9 @@ pb_0_8        times 8 db  0,  8
 pb_unpackbw1  times 2 db  1,  8,  2,  8,  3,  8,  4,  8
 pb_swap8:     times 2 db  7,  6,  5,  4,  3,  2,  1,  0
 c_trans_4x4           db  0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15
-tab_Si:               db  0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7
+const tab_S1,         db 15, 14, 12, 11, 10,  9,  7,  6,  5,  4,  2,  1,  0,  0,  0,  0
+const tab_S2,         db 0, 1, 3, 5, 7, 9, 11, 13, 0, 0, 0, 0, 0, 0, 0, 0
+const tab_Si,         db  0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7
 pb_fact0:             db  0,  2,  4,  6,  8, 10, 12, 14,  0,  0,  0,  0,  0,  0,  0,  0
 c_mode32_12_0:        db  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 13,  7,  0
 c_mode32_13_0:        db  3,  6, 10, 13,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
@@ -43,7 +45,6 @@ c_mode32_17_0:        db 15, 14, 12, 11,
 c_mode32_18_0:        db 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0
 c_shuf8_0:            db  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8
 c_deinterval8:        db  0,  8,  1,  9,  2, 10,  3, 11,  4, 12,  5, 13,  6, 14,  7, 15
-tab_S1:               db 15, 14, 12, 11, 10,  9,  7,  6,  5,  4,  2,  1,  0,  0,  0,  0
 pb_unpackbq:          db  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1
 c_mode16_12:    db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 6
 c_mode16_13:    db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 11, 7, 4
@@ -52,7 +53,6 @@ c_mode16_15:          db  0,  0,  0,  0,
 c_mode16_16:          db  8,  6,  5,  3,  2,  0, 15, 14, 12, 11,  9,  8,  6,  5,  3,  2
 c_mode16_17:          db  4,  2,  1,  0, 15, 14, 12, 11, 10,  9,  7,  6,  5,  4,  2,  1
 c_mode16_18:    db 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
-tab_S2:         db 0, 1, 3, 5, 7, 9, 11, 13, 0, 0, 0, 0, 0, 0, 0, 0
 
 ALIGN 32
 trans8_shuf:          dd 0, 4, 1, 5, 2, 6, 3, 7
@@ -9717,22973 +9717,6 @@ cglobal intra_pred_ang32_33, 3,7,8
     jnz        .loop
     RET
 
-;-----------------------------------------------------------------------------
-; void all_angs_pred_4x4(pixel *dest, pixel *refPix, pixel *filtPix, int bLuma)
-;-----------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal all_angs_pred_4x4, 4, 4, 8
-
-; mode 2
-
-movh      m0,         [r1 + 10]
-movd      [r0],       m0
-
-palignr   m1,         m0,      1
-movd      [r0 + 4],   m1
-
-palignr   m1,         m0,      2
-movd      [r0 + 8],   m1
-
-palignr   m1,         m0,      3
-movd      [r0 + 12],  m1
-
-; mode 3
-
-mova          m2,        [pw_1024]
-
-pslldq        m1,        m0,         1
-pinsrb        m1,        [r1 + 9],   0
-punpcklbw     m1,        m0
-
-lea           r3,        [ang_table]
-
-pmaddubsw     m6,        m1,        [r3 + 26 * 16]
-pmulhrsw      m6,        m2
-packuswb      m6,        m6
-movd          [r0 + 16], m6
-
-palignr       m0,        m1,        2
-
-mova          m7,        [r3 + 20 * 16]
-
-pmaddubsw     m3,        m0,        m7
-pmulhrsw      m3,        m2
-packuswb      m3,        m3
-movd          [r0 + 20], m3
-
-; mode 6 [row 3]
-movd          [r0 + 76], m3
-
-palignr       m3,        m1,       4
-
-pmaddubsw     m4,        m3,        [r3 + 14 * 16]
-pmulhrsw      m4,        m2
-packuswb      m4,        m4
-movd          [r0 + 24], m4
-
-palignr       m4,        m1,        6
-
-pmaddubsw     m4,        [r3 + 8 * 16]
-pmulhrsw      m4,        m2
-packuswb      m4,        m4
-movd          [r0 + 28], m4
-
-; mode 4
-
-pmaddubsw     m5,        m1,        [r3 + 21 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 32], m5
-
-pmaddubsw     m5,        m0,        [r3 + 10 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 36], m5
-
-pmaddubsw     m5,        m0,        [r3 + 31 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 40], m5
-
-pmaddubsw     m4,        m3,        m7
-pmulhrsw      m4,        m2
-packuswb      m4,        m4
-movd          [r0 + 44], m4
-
-; mode 5
-
-pmaddubsw     m5,        m1,        [r3 + 17 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 48], m5
-
-pmaddubsw     m5,        m0,        [r3 + 2 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 52], m5
-
-pmaddubsw     m5,        m0,        [r3 + 19 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 56], m5
-
-pmaddubsw     m4,        m3,        [r3 + 4 * 16]
-pmulhrsw      m4,        m2
-packuswb      m4,        m4
-movd          [r0 + 60], m4
-
-; mode 6
-
-pmaddubsw     m5,        m1,        [r3 + 13 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 64], m5
-
-movd          [r0 + 68], m6
-
-pmaddubsw     m5,        m0,        [r3 + 7 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 72], m5
-
-; mode 7
-
-pmaddubsw     m5,        m1,        [r3 + 9 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 80], m5
-
-pmaddubsw     m5,        m1,        [r3 + 18 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 84], m5
-
-pmaddubsw     m5,        m1,        [r3 + 27 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 88], m5
-
-pmaddubsw     m5,        m0,        [r3 + 4 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 92], m5
-
-; mode 8
-
-pmaddubsw     m5,        m1,        [r3 + 5 * 16]
-pmulhrsw      m5,        m2
-packuswb      m5,        m5
-movd          [r0 + 96], m5
-
-pmaddubsw     m5,         m1,       [r3 + 10 * 16]
-pmulhrsw      m5,         m2
-packuswb      m5,         m5
-movd          [r0 + 100], m5
-
-pmaddubsw     m5,         m1,        [r3 + 15 * 16]
-pmulhrsw      m5,         m2
-packuswb      m5,         m5
-movd          [r0 + 104], m5
-
-pmaddubsw     m5,         m1,        [r3 + 20 * 16]
-pmulhrsw      m5,         m2
-packuswb      m5,         m5
-movd          [r0 + 108], m5
-
-; mode 9
-
-pmaddubsw     m5,         m1,        [r3 + 2 * 16]
-pmulhrsw      m5,         m2
-packuswb      m5,         m5
-movd          [r0 + 112], m5
-
-pmaddubsw     m5,         m1,        [r3 + 4 * 16]
-pmulhrsw      m5,         m2
-packuswb      m5,         m5
-movd          [r0 + 116], m5
-
-pmaddubsw     m5,         m1,        [r3 + 6 * 16]
-pmulhrsw      m5,         m2
-packuswb      m5,         m5
-movd          [r0 + 120], m5
-
-pmaddubsw     m5,         m1,        [r3 + 8 * 16]
-pmulhrsw      m5,         m2
-packuswb      m5,         m5
-movd          [r0 + 124], m5
-
-; mode 10
-
-movd         m3,         [r1 + 9]
-pshufd       m4,         m3,        0
-movu         [r0 + 128], m4
-
-pxor         m5,         m5
-movd         m7,         [r1 + 1]
-pshufd       m4,         m7,        0
-punpcklbw    m4,         m5
-
-pinsrb       m7,         [r1],      0
-pshufb       m6,         m7,        m5
-punpcklbw    m6,         m5
-
-psubw        m4,         m6
-psraw        m4,         1
-
-pshufb       m6,         m3,       m5
-punpcklbw    m6,         m5
-
-paddw        m4,         m6
-packuswb     m4,         m5
-
-pextrb       [r0 + 128],  m4,    0
-pextrb       [r0 + 132],  m4,    1
-pextrb       [r0 + 136],  m4,    2
-pextrb       [r0 + 140],  m4,    3
-
-; mode 11
-
-pslldq        m1,        m1,         2
-pinsrb        m1,        [r1],       0
-pinsrb        m1,        [r1 + 9],   1
-
-pmaddubsw     m3,         m1,        [r3 + 30 * 16]
-pmulhrsw      m3,         m2
-packuswb      m3,         m3
-movd          [r0 + 144], m3
-
-pmaddubsw     m3,         m1,        [r3 + 28 * 16]
-pmulhrsw      m3,         m2
-packuswb      m3,         m3
-movd          [r0 + 148], m3
-
-pmaddubsw     m3,         m1,        [r3 + 26 * 16]
-pmulhrsw      m3,         m2
-packuswb      m3,         m3
-movd          [r0 + 152], m3
-
-pmaddubsw     m3,         m1,        [r3 + 24 * 16]
-pmulhrsw      m3,         m2
-packuswb      m3,         m3
-movd          [r0 + 156], m3
-
-; mode 12
-
-pmaddubsw     m3,         m1,        [r3 + 27 * 16]
-pmulhrsw      m3,         m2
-packuswb      m3,         m3
-movd          [r0 + 160], m3
-
-pmaddubsw     m3,         m1,        [r3 + 22 * 16]
-pmulhrsw      m3,         m2
-packuswb      m3,         m3
-movd          [r0 + 164], m3
-
-pmaddubsw     m3,         m1,        [r3 + 17 * 16]
-pmulhrsw      m3,         m2


More information about the x265-commits mailing list