[x265] [PATCH] pixel: cleared the bug in sse_sp8

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Oct 15 07:50:31 CEST 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1381816184 -19800
#      Tue Oct 15 11:19:44 2013 +0530
# Node ID 84f270db613a098be715a71d1e70cc084dcea9a4
# Parent  28d86f3516ed9616cb49c72619e8438e84a87c77
pixel: cleared the bug in sse_sp8

diff -r 28d86f3516ed -r 84f270db613a source/common/vec/pixel-sse41.cpp
--- a/source/common/vec/pixel-sse41.cpp	Tue Oct 15 11:10:18 2013 +0530
+++ b/source/common/vec/pixel-sse41.cpp	Tue Oct 15 11:19:44 2013 +0530
@@ -4995,12 +4995,13 @@
 }
 
 #define SSE_SP8x1 \
-    T10 = _mm_unpacklo_epi16(T00, _mm_setzero_si128()); \
+    sign = _mm_srai_epi16(T00, 15); \
+    T10 = _mm_unpacklo_epi16(T00, sign); \
     T11 = _mm_unpacklo_epi16(T02, _mm_setzero_si128()); \
     T12 = _mm_sub_epi32(T10, T11); \
     T13 = _mm_mullo_epi32(T12, T12); \
     sum0 = _mm_add_epi32(sum0, T13); \
-    T10 = _mm_unpackhi_epi16(T00, _mm_setzero_si128()); \
+    T10 = _mm_unpackhi_epi16(T00, sign); \
     T11 = _mm_unpackhi_epi16(T02, _mm_setzero_si128()); \
     T12 = _mm_sub_epi32(T10, T11); \
     T13 = _mm_mullo_epi32(T12, T12); \
@@ -5016,6 +5017,7 @@
     {
         __m128i T00, T01, T02;
         __m128i T10, T11, T12, T13;
+        __m128i sign;
 
         T00 = _mm_loadu_si128((__m128i*)(fenc));
         T01 = _mm_loadu_si128((__m128i*)(fref));


More information about the x265-devel mailing list