[x265] [PATCH] pixel: modified weightUnidir to clear the bug

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Oct 15 08:10:41 CEST 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1381817402 -19800
#      Tue Oct 15 11:40:02 2013 +0530
# Node ID 7d7fb8a60c5d39d7f55261359560b84a7f0f8138
# Parent  9493e8bb3581b547a5c9f2e13cffb83958b24654
pixel: modified weightUnidir to clear the bug.

diff -r 9493e8bb3581 -r 7d7fb8a60c5d source/common/vec/pixel-sse41.cpp
--- a/source/common/vec/pixel-sse41.cpp	Tue Oct 15 11:29:00 2013 +0530
+++ b/source/common/vec/pixel-sse41.cpp	Tue Oct 15 11:40:02 2013 +0530
@@ -4883,7 +4883,7 @@
 
 void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
 {
-    __m128i w00, roundoff, ofs, fs, tmpsrc, tmpdst, tmp;
+    __m128i w00, roundoff, ofs, fs, tmpsrc, tmpdst, tmp, sign;
     int x, y;
 
     w00 = _mm_set1_epi32(w0);
@@ -4895,7 +4895,8 @@
         for (x = 0; x <= width - 4; x += 4)
         {
             tmpsrc = _mm_loadl_epi64((__m128i*)(src + x));
-            tmpsrc = _mm_unpacklo_epi16(tmpsrc, _mm_setzero_si128());
+            sign = _mm_srai_epi16(tmpsrc, 15);
+            tmpsrc = _mm_unpacklo_epi16(tmpsrc, sign);
             tmpdst = _mm_add_epi32(_mm_srai_epi32(_mm_add_epi32(_mm_mullo_epi32(w00, _mm_add_epi32(tmpsrc, ofs)), roundoff), shift), fs);
             *(uint32_t*)(dst + x) = _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(tmpdst, tmpdst), _mm_setzero_si128()));
         }
@@ -4903,7 +4904,8 @@
         if (width > x)
         {
             tmpsrc = _mm_loadl_epi64((__m128i*)(src + x));
-            tmpsrc = _mm_unpacklo_epi16(tmpsrc, _mm_setzero_si128());
+            sign = _mm_srai_epi16(tmpsrc, 15);
+            tmpsrc = _mm_unpacklo_epi16(tmpsrc, sign);
             tmpdst = _mm_add_epi32(_mm_srai_epi32(_mm_add_epi32(_mm_mullo_epi32(w00, _mm_add_epi32(tmpsrc, ofs)), roundoff), shift), fs);
             tmp = _mm_packus_epi16(_mm_packs_epi32(tmpdst, tmpdst), _mm_setzero_si128());
             union


More information about the x265-devel mailing list