[x265] [PATCH] copy_cnt_4: combine mova and paddb to reduce code size, same speedup

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Sep 9 08:07:07 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1410242818 -19800
# Node ID ebb84e9dbb0fa0e8c4c9304b2efd57f8ac3d0c05
# Parent  5026f08bd7d64ab0ee22dcc98dd034030aa65db9
copy_cnt_4: combine mova and paddb to reduce code size, same speedup

diff -r 5026f08bd7d6 -r ebb84e9dbb0f source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Tue Sep 09 11:29:07 2014 +0530
+++ b/source/common/x86/blockcopy8.asm	Tue Sep 09 11:36:58 2014 +0530
@@ -3978,8 +3978,7 @@
     not         ax
     popcnt      ax, ax
 %else
-    mova        m1, [pb_1]
-    paddb       m0, m1
+    paddb       m0, [pb_1]
     psadbw      m0, m2
     pshufd      m1, m0, 2
     paddw       m0, m1


More information about the x265-devel mailing list