[x265] [PATCH] pixel-sse3.cpp: Replace convert32to16_shr vector class function with intrinsic

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Fri Oct 11 13:49:11 CEST 2013


# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1381492124 -19800
#      Fri Oct 11 17:18:44 2013 +0530
# Node ID 6217616334e4a26c024f11dfdd94c872ac2e0da3
# Parent  e0adda303a27534d6a5310753c257a85bc54bb8b
pixel-sse3.cpp: Replace convert32to16_shr vector class function with intrinsic.

diff -r e0adda303a27 -r 6217616334e4 source/common/vec/pixel-sse3.cpp
--- a/source/common/vec/pixel-sse3.cpp	Fri Oct 11 16:13:21 2013 +0530
+++ b/source/common/vec/pixel-sse3.cpp	Fri Oct 11 17:18:44 2013 +0530
@@ -619,17 +619,17 @@
 void convert32to16_shr(short *dst, int *org, int shift, int num)
 {
     int i;
-    Vec4i round = _mm_set1_epi32(1 << (shift - 1));
+    __m128i round = _mm_set1_epi32(1 << (shift - 1));
 
     for (i = 0; i < num; i += 4)
     {
-        Vec4i im32;
-        Vec8s im16;
+        __m128i im32;
+        __m128i im16;
 
-        im32.load(org);
-        im32 = (im32 + round) >> shift;
-        im16 = compress_saturated(im32, im32);
-        store_partial(const_int(8), dst, im16);
+        im32 = _mm_loadu_si128((__m128i const*)org);
+        im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
+        im16 = _mm_packs_epi32(im32, im32);
+        _mm_storeu_si128((__m128i*)dst, im16);
 
         org += 4;
         dst += 4;


More information about the x265-devel mailing list