[x265-commits] [x265] asm: split 8bpp version of all_angs from intrapred8.asm
Min Chen
chenm003 at 163.com
Tue Mar 10 01:26:09 CET 2015
details: http://hg.videolan.org/x265/rev/96465ffdf1a1
branches:
changeset: 9670:96465ffdf1a1
user: Min Chen <chenm003 at 163.com>
date: Mon Mar 09 19:10:59 2015 -0500
description:
asm: split 8bpp version of all_angs from intrapred8.asm
Subject: [x265] search: fix GCC warnings and nits
details: http://hg.videolan.org/x265/rev/726fe4088f31
branches:
changeset: 9671:726fe4088f31
user: Steve Borho <steve at borho.org>
date: Mon Mar 09 19:21:25 2015 -0500
description:
search: fix GCC warnings and nits
diffstat:
source/common/CMakeLists.txt | 2 +-
source/common/x86/intrapred8.asm | 22973 +----------------------------
source/common/x86/intrapred8_allangs.asm | 23008 +++++++++++++++++++++++++++++
source/encoder/search.cpp | 8 +-
4 files changed, 23013 insertions(+), 22978 deletions(-)
diffs (truncated from 46057 to 300 lines):
diff -r bd4be3c9236e -r 726fe4088f31 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Mon Mar 09 14:18:15 2015 -0700
+++ b/source/common/CMakeLists.txt Mon Mar 09 19:21:25 2015 -0500
@@ -48,7 +48,7 @@ if(ENABLE_ASSEMBLY)
if(HIGH_BIT_DEPTH)
set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm)
else()
- set(A_SRCS ${A_SRCS} sad-a.asm intrapred8.asm ipfilter8.asm loopfilter.asm)
+ set(A_SRCS ${A_SRCS} sad-a.asm intrapred8.asm intrapred8_allangs.asm ipfilter8.asm loopfilter.asm)
endif()
if(NOT X64)
diff -r bd4be3c9236e -r 726fe4088f31 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Mon Mar 09 14:18:15 2015 -0700
+++ b/source/common/x86/intrapred8.asm Mon Mar 09 19:21:25 2015 -0500
@@ -30,7 +30,9 @@ pb_0_8 times 8 db 0, 8
pb_unpackbw1 times 2 db 1, 8, 2, 8, 3, 8, 4, 8
pb_swap8: times 2 db 7, 6, 5, 4, 3, 2, 1, 0
c_trans_4x4 db 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
-tab_Si: db 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
+const tab_S1, db 15, 14, 12, 11, 10, 9, 7, 6, 5, 4, 2, 1, 0, 0, 0, 0
+const tab_S2, db 0, 1, 3, 5, 7, 9, 11, 13, 0, 0, 0, 0, 0, 0, 0, 0
+const tab_Si, db 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
pb_fact0: db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
c_mode32_12_0: db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 7, 0
c_mode32_13_0: db 3, 6, 10, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
@@ -43,7 +45,6 @@ c_mode32_17_0: db 15, 14, 12, 11,
c_mode32_18_0: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
c_shuf8_0: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
c_deinterval8: db 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15
-tab_S1: db 15, 14, 12, 11, 10, 9, 7, 6, 5, 4, 2, 1, 0, 0, 0, 0
pb_unpackbq: db 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1
c_mode16_12: db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 6
c_mode16_13: db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 11, 7, 4
@@ -52,7 +53,6 @@ c_mode16_15: db 0, 0, 0, 0,
c_mode16_16: db 8, 6, 5, 3, 2, 0, 15, 14, 12, 11, 9, 8, 6, 5, 3, 2
c_mode16_17: db 4, 2, 1, 0, 15, 14, 12, 11, 10, 9, 7, 6, 5, 4, 2, 1
c_mode16_18: db 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
-tab_S2: db 0, 1, 3, 5, 7, 9, 11, 13, 0, 0, 0, 0, 0, 0, 0, 0
ALIGN 32
trans8_shuf: dd 0, 4, 1, 5, 2, 6, 3, 7
@@ -9717,22973 +9717,6 @@ cglobal intra_pred_ang32_33, 3,7,8
jnz .loop
RET
-;-----------------------------------------------------------------------------
-; void all_angs_pred_4x4(pixel *dest, pixel *refPix, pixel *filtPix, int bLuma)
-;-----------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal all_angs_pred_4x4, 4, 4, 8
-
-; mode 2
-
-movh m0, [r1 + 10]
-movd [r0], m0
-
-palignr m1, m0, 1
-movd [r0 + 4], m1
-
-palignr m1, m0, 2
-movd [r0 + 8], m1
-
-palignr m1, m0, 3
-movd [r0 + 12], m1
-
-; mode 3
-
-mova m2, [pw_1024]
-
-pslldq m1, m0, 1
-pinsrb m1, [r1 + 9], 0
-punpcklbw m1, m0
-
-lea r3, [ang_table]
-
-pmaddubsw m6, m1, [r3 + 26 * 16]
-pmulhrsw m6, m2
-packuswb m6, m6
-movd [r0 + 16], m6
-
-palignr m0, m1, 2
-
-mova m7, [r3 + 20 * 16]
-
-pmaddubsw m3, m0, m7
-pmulhrsw m3, m2
-packuswb m3, m3
-movd [r0 + 20], m3
-
-; mode 6 [row 3]
-movd [r0 + 76], m3
-
-palignr m3, m1, 4
-
-pmaddubsw m4, m3, [r3 + 14 * 16]
-pmulhrsw m4, m2
-packuswb m4, m4
-movd [r0 + 24], m4
-
-palignr m4, m1, 6
-
-pmaddubsw m4, [r3 + 8 * 16]
-pmulhrsw m4, m2
-packuswb m4, m4
-movd [r0 + 28], m4
-
-; mode 4
-
-pmaddubsw m5, m1, [r3 + 21 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 32], m5
-
-pmaddubsw m5, m0, [r3 + 10 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 36], m5
-
-pmaddubsw m5, m0, [r3 + 31 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 40], m5
-
-pmaddubsw m4, m3, m7
-pmulhrsw m4, m2
-packuswb m4, m4
-movd [r0 + 44], m4
-
-; mode 5
-
-pmaddubsw m5, m1, [r3 + 17 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 48], m5
-
-pmaddubsw m5, m0, [r3 + 2 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 52], m5
-
-pmaddubsw m5, m0, [r3 + 19 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 56], m5
-
-pmaddubsw m4, m3, [r3 + 4 * 16]
-pmulhrsw m4, m2
-packuswb m4, m4
-movd [r0 + 60], m4
-
-; mode 6
-
-pmaddubsw m5, m1, [r3 + 13 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 64], m5
-
-movd [r0 + 68], m6
-
-pmaddubsw m5, m0, [r3 + 7 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 72], m5
-
-; mode 7
-
-pmaddubsw m5, m1, [r3 + 9 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 80], m5
-
-pmaddubsw m5, m1, [r3 + 18 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 84], m5
-
-pmaddubsw m5, m1, [r3 + 27 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 88], m5
-
-pmaddubsw m5, m0, [r3 + 4 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 92], m5
-
-; mode 8
-
-pmaddubsw m5, m1, [r3 + 5 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 96], m5
-
-pmaddubsw m5, m1, [r3 + 10 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 100], m5
-
-pmaddubsw m5, m1, [r3 + 15 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 104], m5
-
-pmaddubsw m5, m1, [r3 + 20 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 108], m5
-
-; mode 9
-
-pmaddubsw m5, m1, [r3 + 2 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 112], m5
-
-pmaddubsw m5, m1, [r3 + 4 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 116], m5
-
-pmaddubsw m5, m1, [r3 + 6 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 120], m5
-
-pmaddubsw m5, m1, [r3 + 8 * 16]
-pmulhrsw m5, m2
-packuswb m5, m5
-movd [r0 + 124], m5
-
-; mode 10
-
-movd m3, [r1 + 9]
-pshufd m4, m3, 0
-movu [r0 + 128], m4
-
-pxor m5, m5
-movd m7, [r1 + 1]
-pshufd m4, m7, 0
-punpcklbw m4, m5
-
-pinsrb m7, [r1], 0
-pshufb m6, m7, m5
-punpcklbw m6, m5
-
-psubw m4, m6
-psraw m4, 1
-
-pshufb m6, m3, m5
-punpcklbw m6, m5
-
-paddw m4, m6
-packuswb m4, m5
-
-pextrb [r0 + 128], m4, 0
-pextrb [r0 + 132], m4, 1
-pextrb [r0 + 136], m4, 2
-pextrb [r0 + 140], m4, 3
-
-; mode 11
-
-pslldq m1, m1, 2
-pinsrb m1, [r1], 0
-pinsrb m1, [r1 + 9], 1
-
-pmaddubsw m3, m1, [r3 + 30 * 16]
-pmulhrsw m3, m2
-packuswb m3, m3
-movd [r0 + 144], m3
-
-pmaddubsw m3, m1, [r3 + 28 * 16]
-pmulhrsw m3, m2
-packuswb m3, m3
-movd [r0 + 148], m3
-
-pmaddubsw m3, m1, [r3 + 26 * 16]
-pmulhrsw m3, m2
-packuswb m3, m3
-movd [r0 + 152], m3
-
-pmaddubsw m3, m1, [r3 + 24 * 16]
-pmulhrsw m3, m2
-packuswb m3, m3
-movd [r0 + 156], m3
-
-; mode 12
-
-pmaddubsw m3, m1, [r3 + 27 * 16]
-pmulhrsw m3, m2
-packuswb m3, m3
-movd [r0 + 160], m3
-
-pmaddubsw m3, m1, [r3 + 22 * 16]
-pmulhrsw m3, m2
-packuswb m3, m3
-movd [r0 + 164], m3
-
-pmaddubsw m3, m1, [r3 + 17 * 16]
-pmulhrsw m3, m2
More information about the x265-commits
mailing list