[x265] [PATCH] pixel-sse3.cpp: Replace convert32to16_shr vector class function with intrinsic
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Oct 11 13:49:11 CEST 2013
# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1381492124 -19800
# Fri Oct 11 17:18:44 2013 +0530
# Node ID 6217616334e4a26c024f11dfdd94c872ac2e0da3
# Parent e0adda303a27534d6a5310753c257a85bc54bb8b
pixel-sse3.cpp: Replace convert32to16_shr vector class function with intrinsic.
diff -r e0adda303a27 -r 6217616334e4 source/common/vec/pixel-sse3.cpp
--- a/source/common/vec/pixel-sse3.cpp Fri Oct 11 16:13:21 2013 +0530
+++ b/source/common/vec/pixel-sse3.cpp Fri Oct 11 17:18:44 2013 +0530
@@ -619,17 +619,17 @@
void convert32to16_shr(short *dst, int *org, int shift, int num)
{
int i;
- Vec4i round = _mm_set1_epi32(1 << (shift - 1));
+ __m128i round = _mm_set1_epi32(1 << (shift - 1));
for (i = 0; i < num; i += 4)
{
- Vec4i im32;
- Vec8s im16;
+ __m128i im32;
+ __m128i im16;
- im32.load(org);
- im32 = (im32 + round) >> shift;
- im16 = compress_saturated(im32, im32);
- store_partial(const_int(8), dst, im16);
+ im32 = _mm_loadu_si128((__m128i const*)org);
+ im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
+ im16 = _mm_packs_epi32(im32, im32);
+ _mm_storeu_si128((__m128i*)dst, im16);
org += 4;
dst += 4;
More information about the x265-devel
mailing list