[x265] [PATCH] asm: pixel_satd - 12x16, 24x32, 48x64 for 16bpp
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Tue Dec 3 10:35:37 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386063278 -19800
# Tue Dec 03 15:04:38 2013 +0530
# Node ID 70be1456ef76e3289d91842e0de59cfa0bf06817
# Parent 21adddaee4606b718fe96f4bb2f5aebcbdf80c2a
asm: pixel_satd - 12x16, 24x32, 48x64 for 16bpp
diff -r 21adddaee460 -r 70be1456ef76 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Dec 03 11:53:32 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp Tue Dec 03 15:04:38 2013 +0530
@@ -497,6 +497,9 @@
p.satd[LUMA_16x12] = x265_pixel_satd_16x12_sse2;
p.satd[LUMA_16x32] = x265_pixel_satd_16x32_sse2;
p.satd[LUMA_16x64] = x265_pixel_satd_16x64_sse2;
+ p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2;
+ p.satd[LUMA_24x32] = x265_pixel_satd_24x32_sse2;
+ p.satd[LUMA_48x64] = x265_pixel_satd_48x64_sse2;
p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2;
p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2;
diff -r 21adddaee460 -r 70be1456ef76 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Tue Dec 03 11:53:32 2013 +0800
+++ b/source/common/x86/pixel-a.asm Tue Dec 03 15:04:38 2013 +0530
@@ -1502,48 +1502,48 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- lea r0, [r6 + 8]
- lea r2, [r7 + 8]
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 16]
- lea r2, [r7 + 16]
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 24]
- lea r2, [r7 + 24]
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 32]
- lea r2, [r7 + 32]
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 40]
- lea r2, [r7 + 40]
+ lea r0, [r6 + 8*SIZEOF_PIXEL]
+ lea r2, [r7 + 8*SIZEOF_PIXEL]
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 16*SIZEOF_PIXEL]
+ lea r2, [r7 + 16*SIZEOF_PIXEL]
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 24*SIZEOF_PIXEL]
+ lea r2, [r7 + 24*SIZEOF_PIXEL]
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 32*SIZEOF_PIXEL]
+ lea r2, [r7 + 32*SIZEOF_PIXEL]
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 40*SIZEOF_PIXEL]
+ lea r2, [r7 + 40*SIZEOF_PIXEL]
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -1572,53 +1572,53 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- lea r0, [r6 + 8]
+ lea r0, [r6 + 8*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2,8
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 16]
+ add r2,8*SIZEOF_PIXEL
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 16*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2,16
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 24]
+ add r2,16*SIZEOF_PIXEL
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 24*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2,24
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 32]
+ add r2,24*SIZEOF_PIXEL
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 32*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2,32
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- lea r0, [r6 + 40]
+ add r2,32*SIZEOF_PIXEL
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 40*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2,40
+ add r2,40*SIZEOF_PIXEL
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2278,14 +2278,14 @@
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
- lea r0, [r6 + 4]
- lea r2, [r7 + 4]
+ lea r0, [r6 + 4*SIZEOF_PIXEL]
+ lea r2, [r7 + 4*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
- lea r0, [r6 + 8]
- lea r2, [r7 + 8]
+ lea r0, [r6 + 8*SIZEOF_PIXEL]
+ lea r2, [r7 + 8*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
@@ -2305,16 +2305,16 @@
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
- lea r0, [r6 + 4]
+ lea r0, [r6 + 4*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2, 4
+ add r2, 4*SIZEOF_PIXEL
SATD_4x8_SSE vertical, 1, add
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
- lea r0, [r6 + 8]
+ lea r0, [r6 + 8*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2, 8
+ add r2, 8*SIZEOF_PIXEL
SATD_4x8_SSE vertical, 1, add
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
@@ -2333,19 +2333,21 @@
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- lea r0, [r6 + 8]
- lea r2, [r7 + 8]
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- lea r0, [r6 + 16]
- lea r2, [r7 + 16]
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- SATD_END_SSE2 m6
+ SATD_ACCUM m6, m0, m7
+ lea r0, [r6 + 8*SIZEOF_PIXEL]
+ lea r2, [r7 + 8*SIZEOF_PIXEL]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_ACCUM m6, m0, m7
+ lea r0, [r6 + 16*SIZEOF_PIXEL]
+ lea r2, [r7 + 16*SIZEOF_PIXEL]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6, m7
%else
cglobal pixel_satd_24x32, 4,7,8,0-4
SATD_START_SSE2 m6, m7
@@ -2355,21 +2357,26 @@
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- lea r0, [r6 + 8]
+%if HIGH_BIT_DEPTH
+ pxor m7, m7
+%endif
+ SATD_ACCUM m6, m0, m7
+ lea r0, [r6 + 8*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2, 8
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- lea r0, [r6 + 16]
+ add r2, 8*SIZEOF_PIXEL
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_ACCUM m6, m0, m7
+ lea r0, [r6 + 16*SIZEOF_PIXEL]
mov r2, [rsp]
- add r2, 16
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- SATD_END_SSE2 m6
+ add r2, 16*SIZEOF_PIXEL
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6, m7
%endif ;WIN64
cglobal pixel_satd_8x32, 4,6,8
More information about the x265-devel
mailing list