[x265] [PATCH] asm: assembly code for IntraPred_DC[16x16]
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Thu Nov 21 12:07:22 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385030994 -19800
# Thu Nov 21 16:19:54 2013 +0530
# Node ID 0cc83d3c357a5541bd7c159c4af1d1a3063860ae
# Parent 5768141583e8a6a828bb1837a789b9efd2f0493c
asm: assembly code for IntraPred_DC[16x16]
diff -r 5768141583e8 -r 0cc83d3c357a source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Nov 21 15:55:57 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Nov 21 16:19:54 2013 +0530
@@ -657,6 +657,7 @@
p.quant = x265_quant_sse4;
p.intra_pred_dc[BLOCK_4x4] = x265_intra_pred_dc4_sse4;
p.intra_pred_dc[BLOCK_8x8] = x265_intra_pred_dc8_sse4;
+ p.intra_pred_dc[BLOCK_16x16] = x265_intra_pred_dc16_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r 5768141583e8 -r 0cc83d3c357a source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm Thu Nov 21 15:55:57 2013 +0530
+++ b/source/common/x86/intrapred.asm Thu Nov 21 16:19:54 2013 +0530
@@ -173,3 +173,119 @@
.end
RET
+
+;-------------------------------------------------------------------------------------------
+; void intra_pred_dc(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter)
+;-------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_dc16, 5, 7, 4, above, left, dst, dstStride, filter
+
+ pxor m0, m0
+ movu m1, [r0]
+ movu m2, [r1]
+ psadbw m1, m0
+ psadbw m2, m0
+ paddw m1, m2
+ pshufd m2, m1, 2
+ paddw m1, m2
+
+ movd r5d, m1
+ add r5d, 16
+ shr r5d, 5 ; sum = sum / 32
+ movd m1, r5d
+ pshufb m1, m0 ; m1 = byte [dc_val ...]
+
+ test r4d, r4d
+
+ ; store DC 16x16
+ mov r6, r2
+ movu [r2], m1
+ movu [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+
+ ; Do DC Filter
+ jz .end
+ lea r4d, [r5d * 2 + 2] ; r4d = DC * 2 + 2
+ add r5d, r4d ; r5d = DC * 3 + 2
+ movd m1, r5d
+ pshuflw m1, m1, 0 ; m1 = pixDCx3
+ pshufd m1, m1, 0
+
+ ; filter top
+ pmovzxbw m2, [r0]
+ paddw m2, m1
+ psraw m2, 2
+ packuswb m2, m2
+ movh [r6], m2
+ pmovzxbw m3, [r0 + 8]
+ paddw m3, m1
+ psraw m3, 2
+ packuswb m3, m3
+ movh [r6 + 8], m3
+
+ ; filter top-left
+ movzx r0d, byte [r0]
+ add r4d, r0d
+ movzx r0d, byte [r1]
+ add r0d, r4d
+ shr r0d, 2
+ mov [r6], r0b
+
+ ; filter left
+ add r6, r3
+ pmovzxbw m2, [r1 + 1]
+ paddw m2, m1
+ psraw m2, 2
+ packuswb m2, m2
+ pextrb [r6], m2, 0
+ pextrb [r6 + r3], m2, 1
+ pextrb [r6 + r3 * 2], m2, 2
+ lea r6, [r6 + r3 * 2]
+ add r6, r3
+ pextrb [r6], m2, 3
+ add r6, r3
+ pextrb [r6], m2, 4
+ pextrb [r6 + r3], m2, 5
+ pextrb [r6 + r3 * 2], m2, 6
+ lea r6, [r6 + r3 * 2]
+ add r6, r3
+ pextrb [r6], m2, 7
+
+ add r6, r3
+ pmovzxbw m3, [r1 + 9]
+ paddw m3, m1
+ psraw m3, 2
+ packuswb m3, m3
+ pextrb [r6], m3, 0
+ pextrb [r6 + r3], m3, 1
+ pextrb [r6 + r3 * 2], m3, 2
+ lea r6, [r6 + r3 * 2]
+ add r6, r3
+ pextrb [r6], m3, 3
+ add r6, r3
+ pextrb [r6], m3, 4
+ pextrb [r6 + r3], m3, 5
+ pextrb [r6 + r3 * 2], m3, 6
+
+.end
+ RET
diff -r 5768141583e8 -r 0cc83d3c357a source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Thu Nov 21 15:55:57 2013 +0530
+++ b/source/common/x86/intrapred.h Thu Nov 21 16:19:54 2013 +0530
@@ -28,5 +28,6 @@
void x265_intra_pred_dc4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
void x265_intra_pred_dc8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
+void x265_intra_pred_dc16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
#endif // ifndef X265_INTRAPRED_H
More information about the x265-devel
mailing list