[x265] [PATCH] asm: enable 16bpp primitives of cvt32to16 and cvt16to32 for all block sizes

murugan at multicorewareinc.com murugan at multicorewareinc.com
Wed Sep 3 15:07:49 CEST 2014


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1409748896 -19800
#      Wed Sep 03 18:24:56 2014 +0530
# Node ID f00888b9e54ff40367cf5a5d3380bca0142d9ba6
# Parent  62c4779fb0bb35d5d8a69678e9e8aa81272f0115
asm: enable 16bpp primitives of cvt32to16 and cvt16to32 for all block sizes

diff -r 62c4779fb0bb -r f00888b9e54f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Aug 28 13:14:34 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Sep 03 18:24:56 2014 +0530
@@ -1337,6 +1337,10 @@
         p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_mmx2;
 
         p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
+        p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_sse2;
+        p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_sse2;
+        p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_sse2;
+        p.cvt32to16_shl[BLOCK_32x32] = x265_cvt32to16_shl_32_sse2;
 
         CHROMA_PIXELSUB_PS(_sse2);
         CHROMA_PIXELSUB_PS_422(_sse2);
@@ -1409,6 +1413,10 @@
         p.nquant = x265_nquant_sse4;
         p.dequant_normal = x265_dequant_normal_sse4;
         p.cvt16to32_shl = x265_cvt16to32_shl_sse4;
+        p.cvt16to32_shr[BLOCK_4x4] = x265_cvt16to32_shr_4_sse4;
+        p.cvt16to32_shr[BLOCK_8x8] = x265_cvt16to32_shr_8_sse4;
+        p.cvt16to32_shr[BLOCK_16x16] = x265_cvt16to32_shr_16_sse4;
+        p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4;
         p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
         p.intra_pred[BLOCK_8x8][0] = x265_intra_pred_planar8_sse4;
         p.intra_pred[BLOCK_16x16][0] = x265_intra_pred_planar16_sse4;


More information about the x265-devel mailing list