[x265] [PATCH] asm: assembly code for IntraPred_DC[32x32], Disabled DC filter for cuSize > 16 in testbench
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Thu Nov 21 12:13:59 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385032379 -19800
# Thu Nov 21 16:42:59 2013 +0530
# Node ID 2a7d0c62bff43cd72ef6162a8197b3046e6c175c
# Parent 0cc83d3c357a5541bd7c159c4af1d1a3063860ae
asm: assembly code for IntraPred_DC[32x32], Disabled DC filter for cuSize > 16 in testbench
diff -r 0cc83d3c357a -r 2a7d0c62bff4 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Nov 21 16:19:54 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Nov 21 16:42:59 2013 +0530
@@ -658,6 +658,7 @@
p.intra_pred_dc[BLOCK_4x4] = x265_intra_pred_dc4_sse4;
p.intra_pred_dc[BLOCK_8x8] = x265_intra_pred_dc8_sse4;
p.intra_pred_dc[BLOCK_16x16] = x265_intra_pred_dc16_sse4;
+ p.intra_pred_dc[BLOCK_32x32] = x265_intra_pred_dc32_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r 0cc83d3c357a -r 2a7d0c62bff4 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm Thu Nov 21 16:19:54 2013 +0530
+++ b/source/common/x86/intrapred.asm Thu Nov 21 16:42:59 2013 +0530
@@ -289,3 +289,76 @@
.end
RET
+
+;-------------------------------------------------------------------------------------------
+; void intra_pred_dc(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter)
+;-------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_dc32, 4, 5, 3, above, left, dst, dstStride, filter
+
+ pxor m0, m0
+ movu m1, [r0]
+ movu m2, [r0 + 16]
+ movu m3, [r1]
+ movu m4, [r1 + 16]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ psadbw m4, m0
+ paddw m1, m2
+ paddw m3, m4
+ paddw m1, m3
+ pshufd m2, m1, 2
+ paddw m1, m2
+
+ movd r4d, m1
+ add r4d, 32
+ shr r4d, 6 ; sum = sum / 64
+ movd m1, r4d
+ pshufb m1, m0 ; m1 = byte [dc_val ...]
+
+%rep 2
+ ; store DC 16x16
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+ movu [r2], m1
+ movu [r2 + r3], m1
+ movu [r2 + 16], m1
+ movu [r2 + r3 + 16],m1
+ lea r2, [r2 + 2 * r3]
+%endrep
+
+ RET
diff -r 0cc83d3c357a -r 2a7d0c62bff4 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Thu Nov 21 16:19:54 2013 +0530
+++ b/source/common/x86/intrapred.h Thu Nov 21 16:42:59 2013 +0530
@@ -29,5 +29,6 @@
void x265_intra_pred_dc4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
void x265_intra_pred_dc8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
void x265_intra_pred_dc16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
+void x265_intra_pred_dc32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
#endif // ifndef X265_INTRAPRED_H
diff -r 0cc83d3c357a -r 2a7d0c62bff4 source/test/intrapredharness.cpp
--- a/source/test/intrapredharness.cpp Thu Nov 21 16:19:54 2013 +0530
+++ b/source/test/intrapredharness.cpp Thu Nov 21 16:42:59 2013 +0530
@@ -75,6 +75,8 @@
for (int i = 0; i <= 100; i++)
{
int rand_filter = rand() & 1;
+ if (width > 16)
+ rand_filter = 0;
pixel left[MAX_CU_SIZE * 2 + 1];
for (int k = 0; k < width * 2 + 1; k++)
@@ -296,9 +298,12 @@
printf("intra_dc_%dx%d[filter=0]", size, size);
REPORT_SPEEDUP(opt.intra_pred_dc[i], ref.intra_pred_dc[i],
pixel_buff + srcStride, pixel_buff, pixel_out_vec, FENC_STRIDE, 0);
- printf("intra_dc_%dx%d[filter=1]", size, size);
- REPORT_SPEEDUP(opt.intra_pred_dc[i], ref.intra_pred_dc[i],
+ if (size <= 16)
+ {
+ printf("intra_dc_%dx%d[filter=1]", size, size);
+ REPORT_SPEEDUP(opt.intra_pred_dc[i], ref.intra_pred_dc[i],
pixel_buff + srcStride, pixel_buff, pixel_out_vec, FENC_STRIDE, 1);
+ }
}
}
if (opt.intra_pred_planar)
More information about the x265-devel
mailing list