[x265] [PATCH] copy_cnt_4: combine mova and paddb to reduce code size, same speedup
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 9 08:07:07 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1410242818 -19800
# Node ID ebb84e9dbb0fa0e8c4c9304b2efd57f8ac3d0c05
# Parent 5026f08bd7d64ab0ee22dcc98dd034030aa65db9
copy_cnt_4: combine mova and paddb to reduce code size, same speedup
diff -r 5026f08bd7d6 -r ebb84e9dbb0f source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Tue Sep 09 11:29:07 2014 +0530
+++ b/source/common/x86/blockcopy8.asm Tue Sep 09 11:36:58 2014 +0530
@@ -3978,8 +3978,7 @@
not ax
popcnt ax, ax
%else
- mova m1, [pb_1]
- paddb m0, m1
+ paddb m0, [pb_1]
psadbw m0, m2
pshufd m1, m0, 2
paddw m0, m1
More information about the x265-devel
mailing list