[x265] [PATCH] asm: pixel_satd_64xN for 16bpp

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Dec 3 10:43:33 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386063805 -19800
#      Tue Dec 03 15:13:25 2013 +0530
# Node ID a616349e2a19c18369a9cf4524202fa6ebe5b6be
# Parent  70be1456ef76e3289d91842e0de59cfa0bf06817
asm: pixel_satd_64xN for 16bpp

diff -r 70be1456ef76 -r a616349e2a19 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Dec 03 15:04:38 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 03 15:13:25 2013 +0530
@@ -500,6 +500,10 @@
         p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2;
         p.satd[LUMA_24x32] = x265_pixel_satd_24x32_sse2;
         p.satd[LUMA_48x64] = x265_pixel_satd_48x64_sse2;
+        p.satd[LUMA_64x16] = x265_pixel_satd_64x16_sse2;
+        p.satd[LUMA_64x32] = x265_pixel_satd_64x32_sse2;
+        p.satd[LUMA_64x48] = x265_pixel_satd_64x48_sse2;
+        p.satd[LUMA_64x64] = x265_pixel_satd_64x64_sse2;
 
         p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2;
         p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2;
diff -r 70be1456ef76 -r a616349e2a19 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Tue Dec 03 15:04:38 2013 +0530
+++ b/source/common/x86/pixel-a.asm	Tue Dec 03 15:13:25 2013 +0530
@@ -1644,35 +1644,42 @@
     mov r7, r2
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_END_SSE2 m6
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_END_SSE2 m6, m7
 %else
 cglobal pixel_satd_64x16, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
@@ -1680,42 +1687,52 @@
     mov [rsp], r2
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    lea r0, [r6 + 8]
+%if HIGH_BIT_DEPTH
+    pxor       m7, m7
+%endif
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,8
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 16]
+    add r2,8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,16
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 24]
+    add r2,16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,24
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 32]
+    add r2,24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,32
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 40]
+    add r2,32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,40
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 48]
+    add r2,40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,48
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 56]
+    add r2,48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,56
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_END_SSE2 m6
+    add r2,56*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_END_SSE2 m6, m7
 %endif
 
 %if WIN64
@@ -1727,44 +1744,44 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -1785,51 +1802,51 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 8
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
+    add r2, 8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 16
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
+    add r2, 16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 24
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
+    add r2, 24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 32
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
+    add r2, 32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 40
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
+    add r2, 40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 48
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
+    add r2, 48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 56
+    add r2, 56*SIZEOF_PIXEL
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -1854,56 +1871,56 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -1928,63 +1945,63 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 8
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
+    add r2, 8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 16
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
+    add r2, 16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 24
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
+    add r2, 24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 32
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
+    add r2, 32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 40
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
+    add r2, 40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 48
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
+    add r2, 48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 56
+    add r2, 56*SIZEOF_PIXEL
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2013,68 +2030,68 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2103,75 +2120,75 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 8
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
+    add r2, 8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 16
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
+    add r2, 16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 24
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
+    add r2, 24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 32
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
+    add r2, 32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 40
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
+    add r2, 40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 48
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
+    add r2, 48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 56
+    add r2, 56*SIZEOF_PIXEL
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2


More information about the x265-devel mailing list