[x265] [PATCH] added asm primitives for 10bpp sad functions
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Fri Mar 14 07:36:56 CET 2014
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1394778953 -19800
# Fri Mar 14 12:05:53 2014 +0530
# Node ID 347addacca36eff95bbe884cdcb41af3c5c9314b
# Parent 7b5699e6bb75d28631d9fc942f3f30a3652ef8a2
added asm primitives for 10bpp sad functions
diff -r 7b5699e6bb75 -r 347addacca36 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Mar 13 18:29:54 2014 -0500
+++ b/source/common/x86/asm-primitives.cpp Fri Mar 14 12:05:53 2014 +0530
@@ -126,6 +126,25 @@
p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ ## cpu; \
p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ ## cpu
+#define SAD(cpu) \
+ p.sad[LUMA_8x32] = x265_pixel_sad_8x32_ ## cpu; \
+ p.sad[LUMA_16x4] = x265_pixel_sad_16x4_ ## cpu; \
+ p.sad[LUMA_16x12] = x265_pixel_sad_16x12_ ## cpu; \
+ p.sad[LUMA_16x32] = x265_pixel_sad_16x32_ ## cpu; \
+ p.sad[LUMA_16x64] = x265_pixel_sad_16x64_ ## cpu; \
+ p.sad[LUMA_32x8] = x265_pixel_sad_32x8_ ## cpu; \
+ p.sad[LUMA_32x16] = x265_pixel_sad_32x16_ ## cpu; \
+ p.sad[LUMA_32x24] = x265_pixel_sad_32x24_ ## cpu; \
+ p.sad[LUMA_32x32] = x265_pixel_sad_32x32_ ## cpu; \
+ p.sad[LUMA_32x64] = x265_pixel_sad_32x64_ ## cpu; \
+ p.sad[LUMA_64x16] = x265_pixel_sad_64x16_ ## cpu; \
+ p.sad[LUMA_64x32] = x265_pixel_sad_64x32_ ## cpu; \
+ p.sad[LUMA_64x48] = x265_pixel_sad_64x48_ ## cpu; \
+ p.sad[LUMA_64x64] = x265_pixel_sad_64x64_ ## cpu; \
+ p.sad[LUMA_48x64] = x265_pixel_sad_48x64_ ## cpu; \
+ p.sad[LUMA_24x32] = x265_pixel_sad_24x32_ ## cpu; \
+ p.sad[LUMA_12x16] = x265_pixel_sad_12x16_ ## cpu
+
#define ASSGN_SSE(cpu) \
p.sse_pp[LUMA_8x8] = x265_pixel_ssd_8x8_ ## cpu; \
p.sse_pp[LUMA_8x4] = x265_pixel_ssd_8x4_ ## cpu; \
@@ -914,6 +933,10 @@
#if HIGH_BIT_DEPTH
if (cpuMask & X265_CPU_SSE2)
{
+ INIT8(sad, _mmx2);
+ INIT2(sad, _sse2);
+ SAD(sse2);
+
INIT6(satd, _sse2);
HEVC_SATD(sse2);
p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
@@ -1096,29 +1119,9 @@
LUMA_VAR(_sse2);
- p.sad[LUMA_8x32] = x265_pixel_sad_8x32_sse2;
- p.sad[LUMA_16x4] = x265_pixel_sad_16x4_sse2;
- p.sad[LUMA_16x12] = x265_pixel_sad_16x12_sse2;
- p.sad[LUMA_16x32] = x265_pixel_sad_16x32_sse2;
- p.sad[LUMA_16x64] = x265_pixel_sad_16x64_sse2;
-
- p.sad[LUMA_32x8] = x265_pixel_sad_32x8_sse2;
- p.sad[LUMA_32x16] = x265_pixel_sad_32x16_sse2;
- p.sad[LUMA_32x24] = x265_pixel_sad_32x24_sse2;
- p.sad[LUMA_32x32] = x265_pixel_sad_32x32_sse2;
- p.sad[LUMA_32x64] = x265_pixel_sad_32x64_sse2;
-
- p.sad[LUMA_64x16] = x265_pixel_sad_64x16_sse2;
- p.sad[LUMA_64x32] = x265_pixel_sad_64x32_sse2;
- p.sad[LUMA_64x48] = x265_pixel_sad_64x48_sse2;
- p.sad[LUMA_64x64] = x265_pixel_sad_64x64_sse2;
-
- p.sad[LUMA_48x64] = x265_pixel_sad_48x64_sse2;
- p.sad[LUMA_24x32] = x265_pixel_sad_24x32_sse2;
- p.sad[LUMA_12x16] = x265_pixel_sad_12x16_sse2;
-
ASSGN_SSE(sse2);
INIT2(sad, _sse2);
+ SAD(sse2);
INIT2(sad_x3, _sse2);
INIT2(sad_x4, _sse2);
HEVC_SATD(sse2);
diff -r 7b5699e6bb75 -r 347addacca36 source/common/x86/sad16-a.asm
--- a/source/common/x86/sad16-a.asm Thu Mar 13 18:29:54 2014 -0500
+++ b/source/common/x86/sad16-a.asm Fri Mar 14 12:05:53 2014 +0530
@@ -321,6 +321,7 @@
SAD 16, 12
SAD 16, 16
SAD 16, 32
+SAD 16, 64
INIT_XMM sse2
SAD 8, 4
More information about the x265-devel
mailing list