[x265] [PATCH] pixel: modified sse_sp12 to clear the bug
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Tue Oct 15 08:00:32 CEST 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1381816463 -19800
# Tue Oct 15 11:24:23 2013 +0530
# Node ID 48263b45f563a0e1bbd09dfef96b8db71cbefd78
# Parent 84f270db613a098be715a71d1e70cc084dcea9a4
pixel: modified sse_sp12 to clear the bug.
diff -r 84f270db613a -r 48263b45f563 source/common/vec/pixel-sse41.cpp
--- a/source/common/vec/pixel-sse41.cpp Tue Oct 15 11:19:44 2013 +0530
+++ b/source/common/vec/pixel-sse41.cpp Tue Oct 15 11:24:23 2013 +0530
@@ -5045,18 +5045,20 @@
{
__m128i T00, T01;
__m128i T10, T11, T12, T13;
+ __m128i sign;
T00 = _mm_loadu_si128((__m128i*)(fenc));
T01 = _mm_loadu_si128((__m128i*)(fref));
T01 = _mm_srli_si128(_mm_slli_si128(T01, 4), 4); //masking last 4 8-bit integers
- T10 = _mm_unpacklo_epi16(T00, _mm_setzero_si128());
+ sign = _mm_srai_epi16(T00, 15);
+ T10 = _mm_unpacklo_epi16(T00, sign);
T11 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
T11 = _mm_unpacklo_epi16(T11, _mm_setzero_si128());
T12 = _mm_sub_epi32(T10, T11);
T13 = _mm_mullo_epi32(T12, T12);
sum0 = _mm_add_epi32(sum0, T13);
- T10 = _mm_unpackhi_epi16(T00, _mm_setzero_si128());
+ T10 = _mm_unpackhi_epi16(T00, sign);
T11 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
T11 = _mm_unpackhi_epi16(T11, _mm_setzero_si128());
T12 = _mm_sub_epi32(T10, T11);
@@ -5065,7 +5067,7 @@
T00 = _mm_loadu_si128((__m128i*)(fenc + 8));
- T10 = _mm_unpacklo_epi16(T00, _mm_setzero_si128());
+ T10 = _mm_unpacklo_epi16(T00, sign);
T11 = _mm_unpackhi_epi8(T01, _mm_setzero_si128());
T11 = _mm_unpacklo_epi16(T11, _mm_setzero_si128());
T12 = _mm_sub_epi32(T10, T11);
More information about the x265-devel
mailing list