[x265] [PATCH] used sse4 for 2x4, 2x8 and 6x8

praveen at multicorewareinc.com praveen at multicorewareinc.com
Wed Nov 6 16:06:35 CET 2013


# HG changeset patch
# User Praveen Tiwari
# Date 1383750385 -19800
# Node ID 43a7ee44f88fbbdb3c1b6e3fc00b523e20f4f9b7
# Parent  c6c2f999c37ee5846f056183fbbfccf1a8e8a82d
used sse4 for 2x4, 2x8 and 6x8

diff -r c6c2f999c37e -r 43a7ee44f88f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Nov 06 20:28:15 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Nov 06 20:36:25 2013 +0530
@@ -330,13 +330,13 @@
 //This function pointer initaliztion is temprory will be remove later with macro definitions.
 //It is used to avoid linker error until all partion are coded and commit smaller patches, easier to review.
 
-        p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse2;
-        p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse2;
+        p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
+        p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
         p.chroma_copy_sp[CHROMA_4x2] = x265_blockcopy_sp_4x2_sse2;
         p.chroma_copy_sp[CHROMA_4x4] = x265_blockcopy_sp_4x4_sse2;
         p.chroma_copy_sp[CHROMA_4x8] = x265_blockcopy_sp_4x8_sse2;
         p.chroma_copy_sp[CHROMA_4x16] = x265_blockcopy_sp_4x16_sse2;
-        p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse2;
+        p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
         p.chroma_copy_sp[CHROMA_8x2] = x265_blockcopy_sp_8x2_sse2;
         p.chroma_copy_sp[CHROMA_8x4] = x265_blockcopy_sp_8x4_sse2;
         p.chroma_copy_sp[CHROMA_8x6] = x265_blockcopy_sp_8x6_sse2;
diff -r c6c2f999c37e -r 43a7ee44f88f source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Wed Nov 06 20:28:15 2013 +0530
+++ b/source/common/x86/blockcopy8.asm	Wed Nov 06 20:36:25 2013 +0530
@@ -802,7 +802,7 @@
 ;-----------------------------------------------------------------------------
 ; void blockcopy_sp_2x4(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
 ;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
 cglobal blockcopy_sp_2x4, 4, 6, 5, dest, destStride, src, srcStride
 
 add        r3,     r3
@@ -839,7 +839,7 @@
 ;-----------------------------------------------------------------------------
 ; void blockcopy_sp_2x8(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
 ;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
 cglobal blockcopy_sp_2x8, 4, 7, 8, dest, destStride, src, srcStride
 
 add        r3,      r3
@@ -1058,7 +1058,7 @@
 ; void blockcopy_sp_6x8(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
 ;-----------------------------------------------------------------------------
 %macro BLOCKCOPY_SP_W6_H4 2
-INIT_XMM sse2
+INIT_XMM sse4
 cglobal blockcopy_sp_6x8, 4, 7, 8, dest, destStride, src, srcStride
 
 mov       r5d,    %2
diff -r c6c2f999c37e -r 43a7ee44f88f source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Wed Nov 06 20:28:15 2013 +0530
+++ b/source/common/x86/pixel.h	Wed Nov 06 20:36:25 2013 +0530
@@ -332,6 +332,10 @@
 CHROMA_BLOCKCOPY_DEF(_sse2);
 LUMA_BLOCKCOPY_DEF(_sse2);
 
+void x265_blockcopy_sp_2x4_sse4(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb);
+void x265_blockcopy_sp_2x8_sse4(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb);
+void x265_blockcopy_sp_6x8_sse4(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb);
+
 #undef DECL_PIXELS
 #undef DECL_SUF
 #undef DECL_HEVC_SSD


More information about the x265-devel mailing list