[x265] [PATCH] asm: assembly code for pixel_satd_32x32
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Wed Nov 13 12:14:16 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1384341217 -19800
# Wed Nov 13 16:43:37 2013 +0530
# Node ID 4ee655b93b0388268bbec051205f02d83861549b
# Parent 2ffe634ebd71a74e0af2749b1b9de894a191d1d0
asm: assembly code for pixel_satd_32x32
diff -r 2ffe634ebd71 -r 4ee655b93b03 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Nov 13 13:08:03 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Nov 13 16:43:37 2013 +0530
@@ -59,7 +59,7 @@
#define INIT8(name, cpu) INIT8_NAME(name, name, cpu)
#define HEVC_SATD(cpu) \
- p.satd[LUMA_32x32] = cmp<32, 32, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
+ p.satd[LUMA_32x32] = x265_pixel_satd_32x32_ ## cpu; \
p.satd[LUMA_24x32] = x265_pixel_satd_24x32_ ## cpu; \
p.satd[LUMA_64x64] = cmp<64, 64, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
p.satd[LUMA_64x32] = cmp<64, 32, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
@@ -538,6 +538,7 @@
p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_avx;
SA8D_INTER_FROM_BLOCK(avx);
ASSGN_SSE(avx);
+ HEVC_SATD(avx);
p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_avx;
p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_avx;
diff -r 2ffe634ebd71 -r 4ee655b93b03 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Wed Nov 13 13:08:03 2013 +0530
+++ b/source/common/x86/pixel-a.asm Wed Nov 13 16:43:37 2013 +0530
@@ -1792,6 +1792,33 @@
call pixel_satd_16x4_internal
SATD_END_SSE2 m10
+cglobal pixel_satd_32x32, 4,8,8 ;if WIN64 && notcpuflag(avx)
+ SATD_START_SSE2 m10, m7
+ mov r6, r0
+ mov r7, r2
+%if vertical
+ mova m7, [pw_00ff]
+%endif
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ lea r0, [r6 + 16]
+ lea r2, [r7 + 16]
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ SATD_END_SSE2 m10
+
%else
cglobal pixel_satd_32x8, 4,6,8 ;if !WIN64
@@ -1916,6 +1943,68 @@
SATD_END_SSE2 m6
%endif
+%if WIN64
+cglobal pixel_satd_32x32, 4,8,8 ;if WIN64 && cpuflag(avx)
+ SATD_START_SSE2 m6, m7
+ mov r6, r0
+ mov r7, r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ lea r2, [r7 + 8]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 16]
+ lea r2, [r7 + 16]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 24]
+ lea r2, [r7 + 24]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6
+%else
+cglobal pixel_satd_32x32, 4,6,8 ;if !WIN64
+ SATD_START_SSE2 m6, m7
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 8
+ add r2, 8
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 16
+ add r2, 16
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 24
+ add r2, 24
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6
+%endif
+
cglobal pixel_satd_16x4, 4,6,8
SATD_START_SSE2 m6, m7
BACKUP_POINTERS
More information about the x265-devel
mailing list