[x265] [PATCH] used sse4 for 2x4, 2x8 and 6x8
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Wed Nov 6 16:06:35 CET 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1383750385 -19800
# Node ID 43a7ee44f88fbbdb3c1b6e3fc00b523e20f4f9b7
# Parent c6c2f999c37ee5846f056183fbbfccf1a8e8a82d
used sse4 for 2x4, 2x8 and 6x8
diff -r c6c2f999c37e -r 43a7ee44f88f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Nov 06 20:28:15 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Nov 06 20:36:25 2013 +0530
@@ -330,13 +330,13 @@
//This function pointer initaliztion is temprory will be remove later with macro definitions.
//It is used to avoid linker error until all partion are coded and commit smaller patches, easier to review.
- p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse2;
- p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse2;
+ p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
+ p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
p.chroma_copy_sp[CHROMA_4x2] = x265_blockcopy_sp_4x2_sse2;
p.chroma_copy_sp[CHROMA_4x4] = x265_blockcopy_sp_4x4_sse2;
p.chroma_copy_sp[CHROMA_4x8] = x265_blockcopy_sp_4x8_sse2;
p.chroma_copy_sp[CHROMA_4x16] = x265_blockcopy_sp_4x16_sse2;
- p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse2;
+ p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
p.chroma_copy_sp[CHROMA_8x2] = x265_blockcopy_sp_8x2_sse2;
p.chroma_copy_sp[CHROMA_8x4] = x265_blockcopy_sp_8x4_sse2;
p.chroma_copy_sp[CHROMA_8x6] = x265_blockcopy_sp_8x6_sse2;
diff -r c6c2f999c37e -r 43a7ee44f88f source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Wed Nov 06 20:28:15 2013 +0530
+++ b/source/common/x86/blockcopy8.asm Wed Nov 06 20:36:25 2013 +0530
@@ -802,7 +802,7 @@
;-----------------------------------------------------------------------------
; void blockcopy_sp_2x4(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal blockcopy_sp_2x4, 4, 6, 5, dest, destStride, src, srcStride
add r3, r3
@@ -839,7 +839,7 @@
;-----------------------------------------------------------------------------
; void blockcopy_sp_2x8(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal blockcopy_sp_2x8, 4, 7, 8, dest, destStride, src, srcStride
add r3, r3
@@ -1058,7 +1058,7 @@
; void blockcopy_sp_6x8(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
;-----------------------------------------------------------------------------
%macro BLOCKCOPY_SP_W6_H4 2
-INIT_XMM sse2
+INIT_XMM sse4
cglobal blockcopy_sp_6x8, 4, 7, 8, dest, destStride, src, srcStride
mov r5d, %2
diff -r c6c2f999c37e -r 43a7ee44f88f source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Nov 06 20:28:15 2013 +0530
+++ b/source/common/x86/pixel.h Wed Nov 06 20:36:25 2013 +0530
@@ -332,6 +332,10 @@
CHROMA_BLOCKCOPY_DEF(_sse2);
LUMA_BLOCKCOPY_DEF(_sse2);
+void x265_blockcopy_sp_2x4_sse4(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb);
+void x265_blockcopy_sp_2x8_sse4(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb);
+void x265_blockcopy_sp_6x8_sse4(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb);
+
#undef DECL_PIXELS
#undef DECL_SUF
#undef DECL_HEVC_SSD
More information about the x265-devel
mailing list