[x265] [PATCH] asm: assembly code for pixel_satd_24x32
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Wed Nov 13 08:38:38 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1384328283 -19800
# Wed Nov 13 13:08:03 2013 +0530
# Node ID 2ffe634ebd71a74e0af2749b1b9de894a191d1d0
# Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d
asm: assembly code for pixel_satd_24x32
diff -r c4ca80d19105 -r 2ffe634ebd71 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Nov 12 19:10:23 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Nov 13 13:08:03 2013 +0530
@@ -60,7 +60,7 @@
#define HEVC_SATD(cpu) \
p.satd[LUMA_32x32] = cmp<32, 32, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
- p.satd[LUMA_24x32] = cmp<24, 32, 8, 16, x265_pixel_satd_8x16_ ## cpu>; \
+ p.satd[LUMA_24x32] = x265_pixel_satd_24x32_ ## cpu; \
p.satd[LUMA_64x64] = cmp<64, 64, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
p.satd[LUMA_64x32] = cmp<64, 32, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
p.satd[LUMA_32x64] = cmp<32, 64, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
@@ -359,7 +359,6 @@
INIT8(sad_x3, _mmx2);
INIT8(sad_x4, _mmx2);
INIT8(satd, _mmx2);
- HEVC_SATD(mmx2);
p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2;
p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
@@ -588,10 +587,17 @@
{
INIT2(sad_x4, _avx2);
INIT4(satd, _avx2);
- HEVC_SATD(avx2);
INIT2_NAME(sse_pp, ssd, _avx2);
p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_avx2;
SA8D_INTER_FROM_BLOCK8(avx2);
+ p.satd[LUMA_32x32] = cmp<32, 32, 16, 16, x265_pixel_satd_16x16_avx2>;
+ p.satd[LUMA_24x32] = cmp<24, 32, 8, 16, x265_pixel_satd_8x16_avx2>;
+ p.satd[LUMA_64x64] = cmp<64, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
+ p.satd[LUMA_64x32] = cmp<64, 32, 16, 16, x265_pixel_satd_16x16_avx2>;
+ p.satd[LUMA_32x64] = cmp<32, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
+ p.satd[LUMA_64x48] = cmp<64, 48, 16, 16, x265_pixel_satd_16x16_avx2>;
+ p.satd[LUMA_48x64] = cmp<48, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
+ p.satd[LUMA_64x16] = cmp<64, 16, 16, 16, x265_pixel_satd_16x16_avx2>;
p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
diff -r c4ca80d19105 -r 2ffe634ebd71 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Tue Nov 12 19:10:23 2013 +0530
+++ b/source/common/x86/pixel-a.asm Wed Nov 13 13:08:03 2013 +0530
@@ -2051,6 +2051,54 @@
RET
%endif
+%if WIN64
+cglobal pixel_satd_24x32, 4,8,8
+ SATD_START_SSE2 m6, m7
+ mov r6, r0
+ mov r7, r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ lea r2, [r7 + 8]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 16]
+ lea r2, [r7 + 16]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6
+%else
+cglobal pixel_satd_24x32, 4,6,8
+ SATD_START_SSE2 m6, m7
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 8
+ add r2, 8
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 16
+ add r2, 16
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6
+%endif ;WIN64
+
cglobal pixel_satd_8x32, 4,6,8
SATD_START_SSE2 m6, m7
%if vertical
More information about the x265-devel
mailing list