[x265] [PATCH] asm: instantiate some sad_x3 and sad_x4 functions for HEVC partitions
Steve Borho
steve at borho.org
Thu Oct 24 09:26:19 CEST 2013
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1382599561 18000
# Thu Oct 24 02:26:01 2013 -0500
# Node ID 3b8fa23f68ececed69fe59f3772dc5adda1b31c9
# Parent e8f05b1c543a66734e53ab8fb95674cc81e7ac7c
asm: instantiate some sad_x3 and sad_x4 functions for HEVC partitions
diff -r e8f05b1c543a -r 3b8fa23f68ec source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Oct 23 23:59:51 2013 -0500
+++ b/source/common/x86/asm-primitives.cpp Thu Oct 24 02:26:01 2013 -0500
@@ -269,7 +269,15 @@
p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
+ p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3;
p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_ssse3;
+ p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_ssse3;
+ p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_ssse3;
+
+ p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_ssse3;
+ p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_ssse3;
+ p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_ssse3;
+ p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ssse3;
}
if (cpuMask & X265_CPU_SSE4)
{
@@ -291,6 +299,13 @@
p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_avx;
SA8D_INTER_FROM_BLOCK(avx);
ASSGN_SSE(avx);
+
+ p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_avx;
+ p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_avx;
+ p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_avx;
+ p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx;
+ p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_avx;
+ p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx;
}
if (cpuMask & X265_CPU_XOP)
{
@@ -310,6 +325,11 @@
INIT2_NAME(sse_pp, ssd, _avx2);
p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_avx2;
SA8D_INTER_FROM_BLOCK8(avx2);
+
+ p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_avx2;
+ p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
+ p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_avx2;
+ p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
}
#endif // if HIGH_BIT_DEPTH
}
diff -r e8f05b1c543a -r 3b8fa23f68ec source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Oct 23 23:59:51 2013 -0500
+++ b/source/common/x86/pixel.h Thu Oct 24 02:26:01 2013 -0500
@@ -29,8 +29,13 @@
#define X265_I386_PIXEL_H
#define DECL_PIXELS(ret, name, suffix, args) \
+ ret x265_pixel_ ## name ## _16x64_ ## suffix args; \
+ ret x265_pixel_ ## name ## _16x32_ ## suffix args; \
ret x265_pixel_ ## name ## _16x16_ ## suffix args; \
+ ret x265_pixel_ ## name ## _16x12_ ## suffix args; \
ret x265_pixel_ ## name ## _16x8_ ## suffix args; \
+ ret x265_pixel_ ## name ## _16x4_ ## suffix args; \
+ ret x265_pixel_ ## name ## _8x32_ ## suffix args; \
ret x265_pixel_ ## name ## _8x16_ ## suffix args; \
ret x265_pixel_ ## name ## _8x8_ ## suffix args; \
ret x265_pixel_ ## name ## _8x4_ ## suffix args; \
@@ -58,6 +63,7 @@
DECL_X4(sad, sse2)
DECL_X4(sad, sse3)
DECL_X4(sad, ssse3)
+DECL_X4(sad, avx)
DECL_X4(sad, avx2)
DECL_X1(ssd, mmx)
DECL_X1(ssd, mmx2)
diff -r e8f05b1c543a -r 3b8fa23f68ec source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Wed Oct 23 23:59:51 2013 -0500
+++ b/source/common/x86/sad-a.asm Thu Oct 24 02:26:01 2013 -0500
@@ -1504,8 +1504,10 @@
INIT_XMM sse3
SAD_X_SSE2 3, 16, 16, 7
SAD_X_SSE2 3, 16, 8, 7
+SAD_X_SSE2 3, 16, 4, 7
SAD_X_SSE2 4, 16, 16, 7
SAD_X_SSE2 4, 16, 8, 7
+SAD_X_SSE2 4, 16, 4, 7
%macro SAD_X_SSSE3 3
cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,8
@@ -1518,19 +1520,32 @@
%endmacro
INIT_XMM ssse3
+SAD_X_SSE2 3, 16, 32, 7
SAD_X_SSE2 3, 16, 16, 7
+SAD_X_SSE2 3, 16, 12, 7
SAD_X_SSE2 3, 16, 8, 7
+SAD_X_SSE2 3, 8, 32, 7
+SAD_X_SSE2 3, 8, 16, 7
+SAD_X_SSE2 4, 16, 32, 7
SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16, 12, 7
SAD_X_SSE2 4, 16, 8, 7
+SAD_X_SSSE3 4, 8, 32
SAD_X_SSSE3 4, 8, 16
SAD_X_SSSE3 4, 8, 8
SAD_X_SSSE3 4, 8, 4
INIT_XMM avx
+SAD_X_SSE2 3, 16, 32, 6
SAD_X_SSE2 3, 16, 16, 6
+SAD_X_SSE2 3, 16, 12, 6
SAD_X_SSE2 3, 16, 8, 6
+SAD_X_SSE2 3, 16, 4, 6
+SAD_X_SSE2 4, 16, 32, 7
SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16, 12, 7
SAD_X_SSE2 4, 16, 8, 7
+SAD_X_SSE2 4, 16, 4, 7
%macro SAD_X_AVX2 4
cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
@@ -1543,9 +1558,13 @@
%endmacro
INIT_YMM avx2
+SAD_X_AVX2 3, 16, 32, 7
SAD_X_AVX2 3, 16, 16, 7
+SAD_X_AVX2 3, 16, 12, 7
SAD_X_AVX2 3, 16, 8, 7
+SAD_X_AVX2 4, 16, 32, 8
SAD_X_AVX2 4, 16, 16, 8
+SAD_X_AVX2 4, 16, 12, 8
SAD_X_AVX2 4, 16, 8, 8
;=============================================================================
More information about the x265-devel
mailing list