[x265] [PATCH] asm: assembly code for x265_pixel_satd_32x16
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Mon Nov 11 15:36:42 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1384180564 -19800
# Mon Nov 11 20:06:04 2013 +0530
# Node ID fffe6456b8a1a3b06685b1d755c98b3fe7005ab9
# Parent 1130addeb3b8f9daffa9c0f5d2852e1812169c02
asm: assembly code for x265_pixel_satd_32x16
diff -r 1130addeb3b8 -r fffe6456b8a1 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 11 17:01:26 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 11 20:06:04 2013 +0530
@@ -294,6 +294,7 @@
HEVC_SATD(mmx2);
p.satd[LUMA_12x16] = cmp<12, 16, 4, 16, x265_pixel_satd_4x16_mmx2>;
p.satd[LUMA_32x8] = x265_pixel_satd_32x8_sse2;
+ p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse2;
p.sa8d[BLOCK_4x4] = x265_pixel_satd_4x4_mmx2;
p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2;
@@ -444,6 +445,7 @@
p.satd[LUMA_4x16] = x265_pixel_satd_4x16_sse4;
p.satd[LUMA_12x16] = cmp<12, 16, 4, 16, x265_pixel_satd_4x16_sse4>;
p.satd[LUMA_32x8] = x265_pixel_satd_32x8_sse4;
+ p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse4;
p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_sse4;
p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_sse4;
SA8D_INTER_FROM_BLOCK(sse4);
@@ -469,6 +471,7 @@
p.satd[LUMA_4x16] = x265_pixel_satd_4x16_avx;
p.satd[LUMA_12x16] = cmp<12, 16, 4, 16, x265_pixel_satd_4x16_avx>;
p.satd[LUMA_32x8] = x265_pixel_satd_32x8_avx;
+ p.satd[LUMA_32x16] = x265_pixel_satd_32x16_avx;
p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_avx;
p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_avx;
SA8D_INTER_FROM_BLOCK(avx);
diff -r 1130addeb3b8 -r fffe6456b8a1 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Mon Nov 11 17:01:26 2013 +0530
+++ b/source/common/x86/pixel-a.asm Mon Nov 11 20:06:04 2013 +0530
@@ -1761,7 +1761,70 @@
call pixel_satd_16x4_internal
SATD_END_SSE2 m10
+cglobal pixel_satd_32x16, 4,8,8 ;if WIN64 && notcpuflag(avx)
+ SATD_START_SSE2 m10, m7
+ mov r6, r0
+ mov r7, r2
+%if vertical
+ mova m7, [pw_00ff]
+%endif
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ lea r0, [r6 + 16]
+ lea r2, [r7 + 16]
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ call pixel_satd_16x4_internal
+ SATD_END_SSE2 m10
+
%else
+cglobal pixel_satd_32x16, 4,8,8
+%if WIN64 ;if WIN64 && cpuflag(avx)
+ SATD_START_SSE2 m6, m7
+ mov r6, r0
+ mov r7, r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ lea r2, [r7 + 8]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 16]
+ lea r2, [r7 + 16]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 24]
+ lea r2, [r7 + 24]
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6
+%else ;if !WIN64
+ SATD_START_SSE2 m6, m7
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 8
+ add r2, 8
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 16
+ add r2, 16
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ mov r0, r0mp
+ mov r2, r2mp
+ add r0, 24
+ add r2, 24
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6
+%endif
cglobal pixel_satd_32x8, 4,6,8 ;if !WIN64
SATD_START_SSE2 m6, m7
More information about the x265-devel
mailing list