[x265] [PATCH] pixel: cleared the bug in sse_sp8
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Tue Oct 15 07:50:31 CEST 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1381816184 -19800
# Tue Oct 15 11:19:44 2013 +0530
# Node ID 84f270db613a098be715a71d1e70cc084dcea9a4
# Parent 28d86f3516ed9616cb49c72619e8438e84a87c77
pixel: cleared the bug in sse_sp8
diff -r 28d86f3516ed -r 84f270db613a source/common/vec/pixel-sse41.cpp
--- a/source/common/vec/pixel-sse41.cpp Tue Oct 15 11:10:18 2013 +0530
+++ b/source/common/vec/pixel-sse41.cpp Tue Oct 15 11:19:44 2013 +0530
@@ -4995,12 +4995,13 @@
}
#define SSE_SP8x1 \
- T10 = _mm_unpacklo_epi16(T00, _mm_setzero_si128()); \
+ sign = _mm_srai_epi16(T00, 15); \
+ T10 = _mm_unpacklo_epi16(T00, sign); \
T11 = _mm_unpacklo_epi16(T02, _mm_setzero_si128()); \
T12 = _mm_sub_epi32(T10, T11); \
T13 = _mm_mullo_epi32(T12, T12); \
sum0 = _mm_add_epi32(sum0, T13); \
- T10 = _mm_unpackhi_epi16(T00, _mm_setzero_si128()); \
+ T10 = _mm_unpackhi_epi16(T00, sign); \
T11 = _mm_unpackhi_epi16(T02, _mm_setzero_si128()); \
T12 = _mm_sub_epi32(T10, T11); \
T13 = _mm_mullo_epi32(T12, T12); \
@@ -5016,6 +5017,7 @@
{
__m128i T00, T01, T02;
__m128i T10, T11, T12, T13;
+ __m128i sign;
T00 = _mm_loadu_si128((__m128i*)(fenc));
T01 = _mm_loadu_si128((__m128i*)(fref));
More information about the x265-devel
mailing list