[x265] [PATCH] blockcopy_ps_16x4, asm code is now sse4
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Mon Nov 11 12:24:39 CET 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1384169067 -19800
# Node ID 6ca58aa3d85bc962510509c7f04be76c9a9cf591
# Parent bce503510c7262f2e12583b57b2a03370cecd33a
blockcopy_ps_16x4, asm code is now sse4
diff -r bce503510c72 -r 6ca58aa3d85b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 11 16:44:45 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 11 16:54:27 2013 +0530
@@ -366,12 +366,6 @@
p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2;
p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2;
-
- p.chroma_copy_ps[CHROMA_16x4] = x265_blockcopy_ps_16x4_sse2;
- p.chroma_copy_ps[CHROMA_16x8] = x265_blockcopy_ps_16x8_sse2;
- p.chroma_copy_ps[CHROMA_16x12] = x265_blockcopy_ps_16x12_sse2;
- p.chroma_copy_ps[CHROMA_16x16] = x265_blockcopy_ps_16x16_sse2;
- p.chroma_copy_ps[CHROMA_16x32] = x265_blockcopy_ps_16x32_sse2;
#if X86_64
p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
@@ -467,6 +461,11 @@
p.chroma_copy_ps[CHROMA_8x8] = x265_blockcopy_ps_8x8_sse4;
p.chroma_copy_ps[CHROMA_8x16] = x265_blockcopy_ps_8x16_sse4;
p.chroma_copy_ps[CHROMA_8x32] = x265_blockcopy_ps_8x32_sse4;
+ p.chroma_copy_ps[CHROMA_16x4] = x265_blockcopy_ps_16x4_sse4;
+ p.chroma_copy_ps[CHROMA_16x8] = x265_blockcopy_ps_16x8_sse4;
+ p.chroma_copy_ps[CHROMA_16x12] = x265_blockcopy_ps_16x12_sse4;
+ p.chroma_copy_ps[CHROMA_16x16] = x265_blockcopy_ps_16x16_sse4;
+ p.chroma_copy_ps[CHROMA_16x32] = x265_blockcopy_ps_16x32_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r bce503510c72 -r 6ca58aa3d85b source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Mon Nov 11 16:44:45 2013 +0530
+++ b/source/common/x86/blockcopy8.asm Mon Nov 11 16:54:27 2013 +0530
@@ -1790,7 +1790,7 @@
;-----------------------------------------------------------------------------
; void blockcopy_ps_16x4(int16_t *dest, intptr_t destStride, pixel *src, intptr_t srcStride);
;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal blockcopy_ps_16x4, 4, 4, 3, dest, destStride, src, srcStride
add r1, r1
@@ -1829,7 +1829,7 @@
; void blockcopy_ps_%1x%2(int16_t *dest, intptr_t destStride, pixel *src, intptr_t srcStride);
;-----------------------------------------------------------------------------
%macro BLOCKCOPY_PS_W16_H4 2
-INIT_XMM sse2
+INIT_XMM sse4
cglobal blockcopy_ps_%1x%2, 4, 5, 3, dest, destStride, src, srcStride
add r1, r1
diff -r bce503510c72 -r 6ca58aa3d85b source/common/x86/blockcopy8.h
--- a/source/common/x86/blockcopy8.h Mon Nov 11 16:44:45 2013 +0530
+++ b/source/common/x86/blockcopy8.h Mon Nov 11 16:54:27 2013 +0530
@@ -99,7 +99,12 @@
SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 6, cpu); \
SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 8, cpu); \
SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 16, cpu); \
- SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 32, cpu);
+ SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(8, 32, cpu); \
+ SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(16, 4, cpu); \
+ SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(16, 8, cpu); \
+ SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(16, 12, cpu); \
+ SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(16, 16, cpu); \
+ SETUP_CHROMA_BLOCKCOPY_FUNC_SSE4(16, 32, cpu);
CHROMA_BLOCKCOPY_DEF_SSE4(_sse4);
@@ -112,12 +117,6 @@
void x265_blockfill_s_16x16_sse2(int16_t *dst, intptr_t dstride, int16_t val);
void x265_blockfill_s_32x32_sse2(int16_t *dst, intptr_t dstride, int16_t val);
-void x265_blockcopy_ps_16x4_sse2(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
-void x265_blockcopy_ps_16x8_sse2(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
-void x265_blockcopy_ps_16x12_sse2(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
-void x265_blockcopy_ps_16x16_sse2(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
-void x265_blockcopy_ps_16x32_sse2(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
-
#undef SETUP_CHROMA_BLOCKCOPY_FUNC
#undef SETUP_LUMA_BLOCK_FUNC
#undef CHROMA_BLOCKCOPY_DEF
More information about the x265-devel
mailing list