[x265] [PATCH] pixel: modified weightUnidir to clear the bug

Steve Borho steve at borho.org
Tue Oct 15 08:28:22 CEST 2013


On Tue, Oct 15, 2013 at 1:10 AM, <yuvaraj at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
> # Date 1381817402 -19800
> #      Tue Oct 15 11:40:02 2013 +0530
> # Node ID 7d7fb8a60c5d39d7f55261359560b84a7f0f8138
> # Parent  9493e8bb3581b547a5c9f2e13cffb83958b24654
> pixel: modified weightUnidir to clear the bug.
>

Queued.  Was the unit test for this primitive detecting this bug, or did
you find it by examination?


>
> diff -r 9493e8bb3581 -r 7d7fb8a60c5d source/common/vec/pixel-sse41.cpp
> --- a/source/common/vec/pixel-sse41.cpp Tue Oct 15 11:29:00 2013 +0530
> +++ b/source/common/vec/pixel-sse41.cpp Tue Oct 15 11:40:02 2013 +0530
> @@ -4883,7 +4883,7 @@
>
>  void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
>  {
> -    __m128i w00, roundoff, ofs, fs, tmpsrc, tmpdst, tmp;
> +    __m128i w00, roundoff, ofs, fs, tmpsrc, tmpdst, tmp, sign;
>      int x, y;
>
>      w00 = _mm_set1_epi32(w0);
> @@ -4895,7 +4895,8 @@
>          for (x = 0; x <= width - 4; x += 4)
>          {
>              tmpsrc = _mm_loadl_epi64((__m128i*)(src + x));
> -            tmpsrc = _mm_unpacklo_epi16(tmpsrc, _mm_setzero_si128());
> +            sign = _mm_srai_epi16(tmpsrc, 15);
> +            tmpsrc = _mm_unpacklo_epi16(tmpsrc, sign);
>              tmpdst =
> _mm_add_epi32(_mm_srai_epi32(_mm_add_epi32(_mm_mullo_epi32(w00,
> _mm_add_epi32(tmpsrc, ofs)), roundoff), shift), fs);
>              *(uint32_t*)(dst + x) =
> _mm_cvtsi128_si32(_mm_packus_epi16(_mm_packs_epi32(tmpdst, tmpdst),
> _mm_setzero_si128()));
>          }
> @@ -4903,7 +4904,8 @@
>          if (width > x)
>          {
>              tmpsrc = _mm_loadl_epi64((__m128i*)(src + x));
> -            tmpsrc = _mm_unpacklo_epi16(tmpsrc, _mm_setzero_si128());
> +            sign = _mm_srai_epi16(tmpsrc, 15);
> +            tmpsrc = _mm_unpacklo_epi16(tmpsrc, sign);
>              tmpdst =
> _mm_add_epi32(_mm_srai_epi32(_mm_add_epi32(_mm_mullo_epi32(w00,
> _mm_add_epi32(tmpsrc, ofs)), roundoff), shift), fs);
>              tmp = _mm_packus_epi16(_mm_packs_epi32(tmpdst, tmpdst),
> _mm_setzero_si128());
>              union
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131015/bac7ecaa/attachment.html>


More information about the x265-devel mailing list