[x265] [PATCH] asm: created comman asm macro for pixel_sad_64xN functions

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Wed Oct 30 08:28:00 CET 2013


# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1383118037 -19800
#      Wed Oct 30 12:57:17 2013 +0530
# Node ID f5e6b97fa8021cc40777a5112a90552f091ff523
# Parent  e574ab7ef2d0dffc20a28097fc04cad30f742f0b
asm: created comman asm macro for pixel_sad_64xN functions

diff -r e574ab7ef2d0 -r f5e6b97fa802 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm	Wed Oct 30 12:54:18 2013 +0530
+++ b/source/common/x86/sad-a.asm	Wed Oct 30 12:57:17 2013 +0530
@@ -196,6 +196,68 @@
     paddd       m0, m1
 %endmacro
 
+%macro PROCESS_SAD_64x4 0
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r2 + 32]
+    movu    m4,  [r2 + 48]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    psadbw  m3,  [r0 + 32]
+    psadbw  m4,  [r0 + 48]
+    paddd   m1,  m2
+    paddd   m3,  m4
+    paddd   m0,  m1
+    paddd   m0,  m3
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r2 + 32]
+    movu    m4,  [r2 + 48]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    psadbw  m3,  [r0 + 32]
+    psadbw  m4,  [r0 + 48]
+    paddd   m1,  m2
+    paddd   m3,  m4
+    paddd   m0,  m1
+    paddd   m0,  m3
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r2 + 32]
+    movu    m4,  [r2 + 48]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    psadbw  m3,  [r0 + 32]
+    psadbw  m4,  [r0 + 48]
+    paddd   m1,  m2
+    paddd   m3,  m4
+    paddd   m0,  m1
+    paddd   m0,  m3
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+
+    movu    m1,  [r2]
+    movu    m2,  [r2 + 16]
+    movu    m3,  [r2 + 32]
+    movu    m4,  [r2 + 48]
+    psadbw  m1,  [r0]
+    psadbw  m2,  [r0 + 16]
+    psadbw  m3,  [r0 + 32]
+    psadbw  m4,  [r0 + 48]
+    paddd   m1,  m2
+    paddd   m3,  m4
+    paddd   m0,  m1
+    paddd   m0,  m3
+    lea     r2,  [r2 + r3]
+    lea     r0,  [r0 + r1]
+%endmacro
+
 %macro SAD_W16 0
 ;-----------------------------------------------------------------------------
 ; int pixel_sad_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )


More information about the x265-devel mailing list