[x265] [PATCH] asm: created comman asm macro for pixel_sad_64xN functions
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Wed Oct 30 08:28:00 CET 2013
# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1383118037 -19800
# Wed Oct 30 12:57:17 2013 +0530
# Node ID f5e6b97fa8021cc40777a5112a90552f091ff523
# Parent e574ab7ef2d0dffc20a28097fc04cad30f742f0b
asm: created comman asm macro for pixel_sad_64xN functions
diff -r e574ab7ef2d0 -r f5e6b97fa802 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Wed Oct 30 12:54:18 2013 +0530
+++ b/source/common/x86/sad-a.asm Wed Oct 30 12:57:17 2013 +0530
@@ -196,6 +196,68 @@
paddd m0, m1
%endmacro
+%macro PROCESS_SAD_64x4 0
+ movu m1, [r2]
+ movu m2, [r2 + 16]
+ movu m3, [r2 + 32]
+ movu m4, [r2 + 48]
+ psadbw m1, [r0]
+ psadbw m2, [r0 + 16]
+ psadbw m3, [r0 + 32]
+ psadbw m4, [r0 + 48]
+ paddd m1, m2
+ paddd m3, m4
+ paddd m0, m1
+ paddd m0, m3
+ lea r2, [r2 + r3]
+ lea r0, [r0 + r1]
+
+ movu m1, [r2]
+ movu m2, [r2 + 16]
+ movu m3, [r2 + 32]
+ movu m4, [r2 + 48]
+ psadbw m1, [r0]
+ psadbw m2, [r0 + 16]
+ psadbw m3, [r0 + 32]
+ psadbw m4, [r0 + 48]
+ paddd m1, m2
+ paddd m3, m4
+ paddd m0, m1
+ paddd m0, m3
+ lea r2, [r2 + r3]
+ lea r0, [r0 + r1]
+
+ movu m1, [r2]
+ movu m2, [r2 + 16]
+ movu m3, [r2 + 32]
+ movu m4, [r2 + 48]
+ psadbw m1, [r0]
+ psadbw m2, [r0 + 16]
+ psadbw m3, [r0 + 32]
+ psadbw m4, [r0 + 48]
+ paddd m1, m2
+ paddd m3, m4
+ paddd m0, m1
+ paddd m0, m3
+ lea r2, [r2 + r3]
+ lea r0, [r0 + r1]
+
+ movu m1, [r2]
+ movu m2, [r2 + 16]
+ movu m3, [r2 + 32]
+ movu m4, [r2 + 48]
+ psadbw m1, [r0]
+ psadbw m2, [r0 + 16]
+ psadbw m3, [r0 + 32]
+ psadbw m4, [r0 + 48]
+ paddd m1, m2
+ paddd m3, m4
+ paddd m0, m1
+ paddd m0, m3
+ lea r2, [r2 + r3]
+ lea r0, [r0 + r1]
+%endmacro
+
%macro SAD_W16 0
;-----------------------------------------------------------------------------
; int pixel_sad_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
More information about the x265-devel
mailing list