[x265] [PATCH] asm: 16bpp support for satd_64xN

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Mon Dec 2 14:21:45 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1385990487 -19800
#      Mon Dec 02 18:51:27 2013 +0530
# Node ID 55597521ae176f6628c4bf3951b0e454be2f33eb
# Parent  fa3a3eced7228599400f9403dba159d433d05222
asm: 16bpp support for satd_64xN

diff -r fa3a3eced722 -r 55597521ae17 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Dec 02 17:50:12 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon Dec 02 18:51:27 2013 +0530
@@ -489,6 +489,8 @@
     if (cpuMask & X265_CPU_SSE2)
     {
         INIT6(satd, _sse2);
+        HEVC_SATD(sse2);
+        p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
         p.satd[LUMA_4x16] = x265_pixel_satd_4x16_sse2;
         p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
         p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
@@ -496,14 +498,9 @@
         p.satd[LUMA_16x32] = x265_pixel_satd_16x32_sse2;
         p.satd[LUMA_16x64] = x265_pixel_satd_16x64_sse2;
         p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2;
-        p.satd[LUMA_24x32] = x265_pixel_satd_24x32_sse2;
-        p.satd[LUMA_48x64] = x265_pixel_satd_48x64_sse2;
         p.satd[LUMA_32x8] = x265_pixel_satd_32x8_sse2;
         p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse2;
         p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse2;
-        p.satd[LUMA_32x32] = x265_pixel_satd_32x32_sse2;
-        p.satd[LUMA_32x64] = x265_pixel_satd_32x64_sse2;
-
 
         p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2;
         p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2;
diff -r fa3a3eced722 -r 55597521ae17 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Mon Dec 02 17:50:12 2013 +0530
+++ b/source/common/x86/pixel-a.asm	Mon Dec 02 18:51:27 2013 +0530
@@ -4255,35 +4255,42 @@
     mov r7, r2
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_END_SSE2 m6
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_END_SSE2 m6, m7
 %else
 cglobal pixel_satd_64x16, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
@@ -4291,42 +4298,50 @@
     mov [rsp], r2
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    lea r0, [r6 + 8]
+    pxor       m7, m7
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,8
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 16]
+    add r2,8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,16
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 24]
+    add r2,16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,24
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 32]
+    add r2,24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,32
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 40]
+    add r2,32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,40
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 48]
+    add r2,40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,48
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    lea r0, [r6 + 56]
+    add r2,48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_ACCUM m6, m0, m7
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2,56
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_END_SSE2 m6
+    add r2,56*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_END_SSE2 m6, m7
 %endif
 
 %if WIN64
@@ -4338,44 +4353,44 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -4396,51 +4411,51 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 8
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
+    add r2, 8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 16
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
+    add r2, 16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 24
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
+    add r2, 24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 32
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
+    add r2, 32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 40
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
+    add r2, 40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 48
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
+    add r2, 48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 56
+    add r2, 56*SIZEOF_PIXEL
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -4465,56 +4480,56 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -4539,63 +4554,63 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 8
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
+    add r2, 8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 16
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
+    add r2, 16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 24
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
+    add r2, 24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 32
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
+    add r2, 32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 40
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
+    add r2, 40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 48
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
+    add r2, 48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 56
+    add r2, 56*SIZEOF_PIXEL
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -4624,68 +4639,68 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
-    lea r2, [r7 + 8]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
-    lea r2, [r7 + 16]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
-    lea r2, [r7 + 24]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
-    lea r2, [r7 + 32]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
-    lea r2, [r7 + 40]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
-    lea r2, [r7 + 48]
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
-    lea r2, [r7 + 56]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
+    lea r2, [r7 + 8*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
+    lea r2, [r7 + 16*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
+    lea r2, [r7 + 24*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
+    lea r2, [r7 + 32*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
+    lea r2, [r7 + 40*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
+    lea r2, [r7 + 48*SIZEOF_PIXEL]
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
+    lea r2, [r7 + 56*SIZEOF_PIXEL]
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -4714,75 +4729,75 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    lea r0, [r6 + 8]
+    lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 8
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 16]
+    add r2, 8*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 16
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 24]
+    add r2, 16*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 24
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 32]
+    add r2, 24*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 32
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 40]
+    add r2, 32*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 40
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 48]
+    add r2, 40*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 48*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 48
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    lea r0, [r6 + 56]
+    add r2, 48*SIZEOF_PIXEL
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 56*SIZEOF_PIXEL]
     mov r2, [rsp]
-    add r2, 56
+    add r2, 56*SIZEOF_PIXEL
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2


More information about the x265-devel mailing list