[x265] [PATCH] pixel: modified sse_sp12 to clear the bug

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Oct 15 08:00:32 CEST 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1381816463 -19800
#      Tue Oct 15 11:24:23 2013 +0530
# Node ID 48263b45f563a0e1bbd09dfef96b8db71cbefd78
# Parent  84f270db613a098be715a71d1e70cc084dcea9a4
pixel: modified sse_sp12 to clear the bug.

diff -r 84f270db613a -r 48263b45f563 source/common/vec/pixel-sse41.cpp
--- a/source/common/vec/pixel-sse41.cpp	Tue Oct 15 11:19:44 2013 +0530
+++ b/source/common/vec/pixel-sse41.cpp	Tue Oct 15 11:24:23 2013 +0530
@@ -5045,18 +5045,20 @@
     {
         __m128i T00, T01;
         __m128i T10, T11, T12, T13;
+        __m128i sign;
         T00 = _mm_loadu_si128((__m128i*)(fenc));
         T01 = _mm_loadu_si128((__m128i*)(fref));
         T01 = _mm_srli_si128(_mm_slli_si128(T01, 4), 4);    //masking last 4 8-bit integers
 
-        T10 = _mm_unpacklo_epi16(T00, _mm_setzero_si128());
+        sign = _mm_srai_epi16(T00, 15);
+        T10 = _mm_unpacklo_epi16(T00, sign);
         T11 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
         T11 = _mm_unpacklo_epi16(T11, _mm_setzero_si128());
         T12 = _mm_sub_epi32(T10, T11);
         T13 = _mm_mullo_epi32(T12, T12);
         sum0 = _mm_add_epi32(sum0, T13);
 
-        T10 = _mm_unpackhi_epi16(T00, _mm_setzero_si128());
+        T10 = _mm_unpackhi_epi16(T00, sign);
         T11 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
         T11 = _mm_unpackhi_epi16(T11, _mm_setzero_si128());
         T12 = _mm_sub_epi32(T10, T11);
@@ -5065,7 +5067,7 @@
 
         T00 = _mm_loadu_si128((__m128i*)(fenc + 8));
 
-        T10 = _mm_unpacklo_epi16(T00, _mm_setzero_si128());
+        T10 = _mm_unpacklo_epi16(T00, sign);
         T11 = _mm_unpackhi_epi8(T01, _mm_setzero_si128());
         T11 = _mm_unpacklo_epi16(T11, _mm_setzero_si128());
         T12 = _mm_sub_epi32(T10, T11);


More information about the x265-devel mailing list