[x265] [PATCH] asm: 10bpp code for scale1D_128to64 module
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Wed Dec 4 14:15:35 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1386162865 -19800
# Wed Dec 04 18:44:25 2013 +0530
# Node ID a525d3fde24f8c076def1b67122e6f7f69e60d35
# Parent 4347192eae502a5f963d7e79655ba753e677b58b
asm: 10bpp code for scale1D_128to64 module
diff -r 4347192eae50 -r a525d3fde24f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Dec 04 17:11:43 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Dec 04 18:44:25 2013 +0530
@@ -576,6 +576,7 @@
}
if (cpuMask & X265_CPU_SSSE3)
{
+ p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
}
if (cpuMask & X265_CPU_SSE4)
{
diff -r 4347192eae50 -r a525d3fde24f source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Wed Dec 04 17:11:43 2013 +0530
+++ b/source/common/x86/pixel-util8.asm Wed Dec 04 18:44:25 2013 +0530
@@ -44,6 +44,7 @@
mask_ff: times 16 db 0xff
times 16 db 0
deinterleave_shuf: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
+deinterleave_word_shuf: db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 15, 15
hmul_16p: times 16 db 1
times 8 db 1, -1
@@ -1643,7 +1644,98 @@
;-----------------------------------------------------------------
INIT_XMM ssse3
cglobal scale1D_128to64, 2, 2, 8, dest, src1, stride
-
+%if HIGH_BIT_DEPTH
+ mova m7, [deinterleave_word_shuf]
+
+ movu m0, [r1]
+ palignr m1, m0, 2
+ movu m2, [r1 + 16]
+ palignr m3, m2, 2
+ movu m4, [r1 + 32]
+ palignr m5, m4, 2
+ movu m6, [r1 + 48]
+ pavgw m0, m1
+ palignr m1, m6, 2
+ pavgw m2, m3
+ pavgw m4, m5
+ pavgw m6, m1
+ pshufb m0, m0, m7
+ pshufb m2, m2, m7
+ pshufb m4, m4, m7
+ pshufb m6, m6, m7
+ punpcklqdq m0, m2
+ movu [r0], m0
+ punpcklqdq m4, m6
+ movu [r0 + 16], m4
+
+
+
+ movu m0, [r1 + 64]
+ palignr m1, m0, 2
+ movu m2, [r1 + 80]
+ palignr m3, m2, 2
+ movu m4, [r1 + 96]
+ palignr m5, m4, 2
+ movu m6, [r1 + 112]
+ pavgw m0, m1
+ palignr m1, m6, 2
+ pavgw m2, m3
+ pavgw m4, m5
+ pavgw m6, m1
+ pshufb m0, m0, m7
+ pshufb m2, m2, m7
+ pshufb m4, m4, m7
+ pshufb m6, m6, m7
+ punpcklqdq m0, m2
+ movu [r0 + 32], m0
+ punpcklqdq m4, m6
+ movu [r0 + 48], m4
+
+ movu m0, [r1 + 128]
+ palignr m1, m0, 2
+ movu m2, [r1 + 144]
+ palignr m3, m2, 2
+ movu m4, [r1 + 160]
+ palignr m5, m4, 2
+ movu m6, [r1 + 176]
+ pavgw m0, m1
+ palignr m1, m6, 2
+ pavgw m2, m3
+ pavgw m4, m5
+ pavgw m6, m1
+ pshufb m0, m0, m7
+ pshufb m2, m2, m7
+ pshufb m4, m4, m7
+ pshufb m6, m6, m7
+
+ punpcklqdq m0, m2
+ movu [r0 + 64], m0
+ punpcklqdq m4, m6
+ movu [r0 + 80], m4
+
+ movu m0, [r1 + 192]
+ palignr m1, m0, 2
+ movu m2, [r1 + 208]
+ palignr m3, m2, 2
+ movu m4, [r1 + 224]
+ palignr m5, m4, 2
+ movu m6, [r1 + 240]
+ pavgw m0, m1
+ palignr m1, m6, 2
+ pavgw m2, m3
+ pavgw m4, m5
+ pavgw m6, m1
+ pshufb m0, m0, m7
+ pshufb m2, m2, m7
+ pshufb m4, m4, m7
+ pshufb m6, m6, m7
+
+ punpcklqdq m0, m2
+ movu [r0 + 96], m0
+ punpcklqdq m4, m6
+ movu [r0 + 112], m4
+
+%else
mova m7, [deinterleave_shuf]
movu m0, [r1]
@@ -1697,7 +1789,7 @@
movu [r0 + 32], m0
punpcklqdq m4, m6
movu [r0 + 48], m4
-
+%endif
RET
;-----------------------------------------------------------------
More information about the x265-devel
mailing list