[x265] [PATCH] asm: code for pixel_var_32x32 and 64x64 blocks
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Wed Nov 27 10:49:49 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1385545753 -19800
# Wed Nov 27 15:19:13 2013 +0530
# Branch stable
# Node ID d770e8e65dc41c224cdea78efd588c5b2155c606
# Parent 417f794274e5692851b558eaa609e6fbdac1d50f
asm: code for pixel_var_32x32 and 64x64 blocks
diff -r 417f794274e5 -r d770e8e65dc4 source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Nov 27 01:49:09 2013 -0600
+++ b/source/common/pixel.cpp Wed Nov 27 15:19:13 2013 +0530
@@ -985,6 +985,8 @@
p.var[BLOCK_8x8] = pixel_var<8>;
p.var[BLOCK_16x16] = pixel_var<16>;
+ p.var[BLOCK_32x32] = pixel_var<32>;
+ p.var[BLOCK_64x64] = pixel_var<64>;
p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
}
}
diff -r 417f794274e5 -r d770e8e65dc4 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Nov 27 01:49:09 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Wed Nov 27 15:19:13 2013 +0530
@@ -440,7 +440,9 @@
#define LUMA_VAR(cpu) \
SETUP_PIXEL_VAR_DEF(8, 8, cpu); \
- SETUP_PIXEL_VAR_DEF(16, 16, cpu);
+ SETUP_PIXEL_VAR_DEF(16, 16, cpu); \
+ SETUP_PIXEL_VAR_DEF(32, 32, cpu); \
+ SETUP_PIXEL_VAR_DEF(64, 64, cpu);
namespace x265 {
// private x265 namespace
diff -r 417f794274e5 -r d770e8e65dc4 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Wed Nov 27 01:49:09 2013 -0600
+++ b/source/common/x86/pixel-a.asm Wed Nov 27 15:19:13 2013 +0530
@@ -1612,7 +1612,13 @@
HADDW m5, m2
%endif
%else ; !HIGH_BIT_DEPTH
+%if %1 == 64
+ HADDW m5, m2
+ movd m7, r4d
+ paddd m5, m7
+%else
HADDW m5, m2
+%endif
%endif ; HIGH_BIT_DEPTH
HADDD m6, m1
%if ARCH_X86_64
@@ -1738,9 +1744,7 @@
VAR_CORE
VAR_END 8, 8
-cglobal pixel_var_16x16, 2,3,8
- VAR_START 1
- lea r2, [r1 * 3]
+cglobal pixel_var_16x16_internal
mova m0, [r0]
mova m3, [r0 + r1]
DEINTB 1, 0, 4, 3, 7
@@ -1776,7 +1780,74 @@
mova m3, [r0 + r2]
DEINTB 1, 0, 4, 3, 7
VAR_CORE
+ ret
+
+cglobal pixel_var_16x16, 2,3,8
+ VAR_START 1
+ lea r2, [r1 * 3]
+ call pixel_var_16x16_internal
VAR_END 16, 16
+
+cglobal pixel_var_32x32, 2,4,8
+ VAR_START 1
+ lea r2, [r1 * 3]
+ mov r3, r0
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r3 + 16]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ VAR_END 32, 32
+
+cglobal pixel_var_64x64, 2,6,8
+ VAR_START 1
+ lea r2, [r1 * 3]
+ mov r3, r0
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ HADDW m5, m2
+ movd r4d, m5
+ pxor m5, m5
+ lea r0, [r3 + 16]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ HADDW m5, m2
+ movd r5d, m5
+ add r4, r5
+ pxor m5, m5
+ lea r0, [r3 + 32]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r3 + 48]
+ HADDW m5, m2
+ movd r5d, m5
+ add r4, r5
+ pxor m5, m5
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ lea r0, [r0 + r1 * 4]
+ call pixel_var_16x16_internal
+ VAR_END 64, 64
%endmacro ; VAR
INIT_XMM sse2
diff -r 417f794274e5 -r d770e8e65dc4 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Nov 27 01:49:09 2013 -0600
+++ b/source/common/x86/pixel.h Wed Nov 27 15:19:13 2013 +0530
@@ -357,7 +357,9 @@
#define LUMA_PIXELVAR_DEF(cpu) \
SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \
- SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu);
+ SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
+ SETUP_LUMA_PIXELVAR_FUNC(32, 32, cpu); \
+ SETUP_LUMA_PIXELVAR_FUNC(64, 64, cpu);
LUMA_PIXELVAR_DEF(_sse2);
More information about the x265-devel
mailing list