[x265] [PATCH] asm: assembly code for IntraPred_DC[8x8]
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Thu Nov 21 11:41:26 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385029557 -19800
# Thu Nov 21 15:55:57 2013 +0530
# Node ID 5768141583e8a6a828bb1837a789b9efd2f0493c
# Parent db1151bb4974f1288745ba39dfd6e1838113feb7
asm: assembly code for IntraPred_DC[8x8]
diff -r db1151bb4974 -r 5768141583e8 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Nov 20 18:36:04 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Thu Nov 21 15:55:57 2013 +0530
@@ -656,6 +656,7 @@
p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4;
p.quant = x265_quant_sse4;
p.intra_pred_dc[BLOCK_4x4] = x265_intra_pred_dc4_sse4;
+ p.intra_pred_dc[BLOCK_8x8] = x265_intra_pred_dc8_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r db1151bb4974 -r 5768141583e8 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm Wed Nov 20 18:36:04 2013 -0600
+++ b/source/common/x86/intrapred.asm Thu Nov 21 15:55:57 2013 +0530
@@ -94,3 +94,82 @@
.end:
RET
+
+
+;-------------------------------------------------------------------------------------------
+; void intra_pred_dc(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter)
+;-------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_dc8, 5, 7, 3, above, left, dst, dstStride, filter
+
+ pxor m0, m0
+ movh m1, [r0]
+ movh m2, [r1]
+ punpcklqdq m1, m2
+ psadbw m1, m0
+ pshufd m2, m1, 2
+ paddw m1, m2
+
+ movd r5d, m1
+ add r5d, 8
+ shr r5d, 4 ; sum = sum / 16
+ movd m1, r5d
+ pshufb m1, m0 ; m1 = byte [dc_val ...]
+
+ test r4d, r4d
+
+ ; store DC 8x8
+ mov r6, r2
+ movh [r2], m1
+ movh [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movh [r2], m1
+ movh [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movh [r2], m1
+ movh [r2 + r3], m1
+ lea r2, [r2 + 2 * r3]
+ movh [r2], m1
+ movh [r2 + r3], m1
+
+ ; Do DC Filter
+ jz .end
+ lea r4d, [r5d * 2 + 2] ; r4d = DC * 2 + 2
+ add r5d, r4d ; r5d = DC * 3 + 2
+ movd m1, r5d
+ pshuflw m1, m1, 0 ; m1 = pixDCx3
+ pshufd m1, m1, 0
+
+ ; filter top
+ pmovzxbw m2, [r0]
+ paddw m2, m1
+ psraw m2, 2
+ packuswb m2, m2
+ movh [r6], m2
+
+ ; filter top-left
+ movzx r0d, byte [r0]
+ add r4d, r0d
+ movzx r0d, byte [r1]
+ add r0d, r4d
+ shr r0d, 2
+ mov [r6], r0b
+
+ ; filter left
+ add r6, r3
+ pmovzxbw m2, [r1 + 1]
+ paddw m2, m1
+ psraw m2, 2
+ packuswb m2, m2
+ pextrb [r6], m2, 0
+ pextrb [r6 + r3], m2, 1
+ pextrb [r6 + 2 * r3], m2, 2
+ lea r6, [r6 + r3 * 2]
+ pextrb [r6 + r3], m2, 3
+ pextrb [r6 + 2 * r3], m2, 4
+ pextrb [r6 + 4 * r3], m2, 6
+ lea r3, [r3 * 3]
+ pextrb [r6 + r3], m2, 5
+
+.end
+ RET
diff -r db1151bb4974 -r 5768141583e8 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Wed Nov 20 18:36:04 2013 -0600
+++ b/source/common/x86/intrapred.h Thu Nov 21 15:55:57 2013 +0530
@@ -27,5 +27,6 @@
#define X265_INTRAPRED_H
void x265_intra_pred_dc4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
+void x265_intra_pred_dc8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
#endif // ifndef X265_INTRAPRED_H
More information about the x265-devel
mailing list