[x265] [PATCH] asm: improve intra_pred_dc4_sse4 by merge reduce code
Min Chen
chenm003 at 163.com
Tue Mar 3 03:32:42 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1425349956 28800
# Node ID e10810d6958f2d0b8b0c671e8fc251a7be0fdd4f
# Parent 64214b2faa324d91a015190b8dc69716ebab41f8
asm: improve intra_pred_dc4_sse4 by merge reduce code
---
source/common/x86/intrapred8.asm | 21 ++++++++++-----------
1 files changed, 11 insertions(+), 11 deletions(-)
diff -r 64214b2faa32 -r e10810d6958f source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Mon Mar 02 16:54:00 2015 -0800
+++ b/source/common/x86/intrapred8.asm Mon Mar 02 18:32:36 2015 -0800
@@ -123,6 +123,7 @@
cextern pw_32
cextern pw_257
cextern pw_1024
+cextern pw_4096
cextern pb_unpackbd1
cextern multiL
cextern multiH
@@ -218,9 +219,7 @@
test r4d, r4d
- mov r4d, 4096
- movd m2, r4d
- pmulhrsw m1, m2 ; m1 = (sum + 4) / 8
+ pmulhrsw m1, [pw_4096] ; m1 = (sum + 4) / 8
movd r4d, m1 ; r4d = dc_val
pshufb m1, m0 ; m1 = byte [dc_val ...]
@@ -237,9 +236,13 @@
add r4d, r3d ; r4d = DC * 3 + 2
movd m1, r4d
pshuflw m1, m1, 0 ; m1 = pixDCx3
+ pshufd m1, m1, 0
; filter top
- pmovzxbw m2, [r2]
+ movd m2, [r2]
+ movd m0, [r2 + 9]
+ punpckldq m2, m0
+ pmovzxbw m2, m2
paddw m2, m1
psraw m2, 2
packuswb m2, m2
@@ -255,13 +258,9 @@
; filter left
add r0, r1
- pmovzxbw m2, [r2 + 9]
- paddw m2, m1
- psraw m2, 2
- packuswb m2, m2
- pextrb [r0], m2, 0
- pextrb [r0 + r1], m2, 1
- pextrb [r0 + r1 * 2], m2, 2
+ pextrb [r0], m2, 4
+ pextrb [r0 + r1], m2, 5
+ pextrb [r0 + r1 * 2], m2, 6
.end:
RET
More information about the x265-devel
mailing list