[x265] [PATCH] asm: improve intra_pred_dc4_sse4 by merge reduce code

Min Chen chenm003 at 163.com
Tue Mar 3 03:32:42 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1425349956 28800
# Node ID e10810d6958f2d0b8b0c671e8fc251a7be0fdd4f
# Parent  64214b2faa324d91a015190b8dc69716ebab41f8
asm: improve intra_pred_dc4_sse4 by merge reduce code
---
 source/common/x86/intrapred8.asm     |   21 ++++++++++-----------
 1 files changed, 11 insertions(+), 11 deletions(-)

diff -r 64214b2faa32 -r e10810d6958f source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Mon Mar 02 16:54:00 2015 -0800
+++ b/source/common/x86/intrapred8.asm	Mon Mar 02 18:32:36 2015 -0800
@@ -123,6 +123,7 @@
 cextern pw_32
 cextern pw_257
 cextern pw_1024
+cextern pw_4096
 cextern pb_unpackbd1
 cextern multiL
 cextern multiH
@@ -218,9 +219,7 @@
 
     test        r4d, r4d
 
-    mov         r4d, 4096
-    movd        m2, r4d
-    pmulhrsw    m1, m2              ; m1 = (sum + 4) / 8
+    pmulhrsw    m1, [pw_4096]       ; m1 = (sum + 4) / 8
     movd        r4d, m1             ; r4d = dc_val
     pshufb      m1, m0              ; m1 = byte [dc_val ...]
 
@@ -237,9 +236,13 @@
     add         r4d, r3d            ; r4d = DC * 3 + 2
     movd        m1, r4d
     pshuflw     m1, m1, 0           ; m1 = pixDCx3
+    pshufd      m1, m1, 0
 
     ; filter top
-    pmovzxbw    m2, [r2]
+    movd        m2, [r2]
+    movd        m0, [r2 + 9]
+    punpckldq   m2, m0
+    pmovzxbw    m2, m2
     paddw       m2, m1
     psraw       m2, 2
     packuswb    m2, m2
@@ -255,13 +258,9 @@
 
     ; filter left
     add         r0, r1
-    pmovzxbw    m2, [r2 + 9]
-    paddw       m2, m1
-    psraw       m2, 2
-    packuswb    m2, m2
-    pextrb      [r0], m2, 0
-    pextrb      [r0 + r1], m2, 1
-    pextrb      [r0 + r1 * 2], m2, 2
+    pextrb      [r0], m2, 4
+    pextrb      [r0 + r1], m2, 5
+    pextrb      [r0 + r1 * 2], m2, 6
 
 .end:
     RET



More information about the x265-devel mailing list