<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Oct 8, 2013 at 3:38 AM,  <span dir="ltr"><<a href="mailto:dnyaneshwar@multicorewareinc.com" target="_blank">dnyaneshwar@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Dnyaneshwar Gorade <<a href="mailto:dnyaneshwar@multicorewareinc.com">dnyaneshwar@multicorewareinc.com</a>><br>
# Date 1381221459 -19800<br>
#      Tue Oct 08 14:07:39 2013 +0530<br>
# Node ID 9d22be0b84ff2d5f3a8d4ee4d319a75f7f9c73a4<br>
# Parent  d85c49059b6a30af455cf47ad38ea172c579cb9e<br>
pixel8.inc: replace calcRecons vector class function with intrinsic.<br></blockquote><div><br></div><div>I've moved these functions to pixel-sse3.cpp since that is their minimum SIMD requirement.</div><div><br></div>
<div>As a followup, can you remove the hungarian prefixes from all the function arguments?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
diff -r d85c49059b6a -r 9d22be0b84ff source/common/vec/pixel8.inc<br>
--- a/source/common/vec/pixel8.inc      Tue Oct 08 13:52:58 2013 +0530<br>
+++ b/source/common/vec/pixel8.inc      Tue Oct 08 14:07:39 2013 +0530<br>
@@ -190,26 +190,34 @@<br>
     {<br>
         for (int x = 0; x < blockSize; x += 16)<br>
         {<br>
-            Vec8s vresi, vpred, vres, vsum1, vsum2;<br>
-            Vec16uc tmp;<br>
+            __m128i resi, pred, sum1, sum2;<br>
+            __m128i temp;<br>
<br>
-            tmp.load(pPred + x);<br>
+            temp = _mm_loadu_si128((__m128i const*)(pPred + x));<br>
+            pred = _mm_unpacklo_epi8(temp, _mm_setzero_si128());         // interleave with zero extensions<br>
<br>
-            vpred = extend_low(tmp);<br>
-            vresi.load(pResi + x);<br>
-            vsum1 = vpred + vresi;<br>
-            vsum1 = min(255, max(vsum1, 0));<br>
-            vsum1.store(pRecQt + x);<br>
+            resi = _mm_loadu_si128((__m128i const*)(pResi + x));<br>
+            sum1 = _mm_add_epi16(pred, resi);<br>
<br>
-            vpred = extend_high(tmp);<br>
-            vresi.load(pResi + x + 8);<br>
-            vsum2 = vpred + vresi;<br>
-            vsum2 = min(255, max(vsum2, 0));<br>
-            vsum2.store(pRecQt + x + 8);<br>
+            __m128i maxval = _mm_set1_epi16(0xff);                       // broadcast value 255(32-bit integer) to all elements of maxval<br>
+            __m128i minval = _mm_set1_epi16(0x00);                       // broadcast value 0(32-bit integer) to all elements of minval<br>
+            sum1 = _mm_min_epi16(maxval, _mm_max_epi16(sum1, minval));<br>
+            _mm_storeu_si128((__m128i*)(pRecQt + x), sum1);<br>
<br>
-            tmp = compress(vsum1, vsum2);<br>
-            tmp.store(pReco + x);<br>
-            tmp.store(pRecIPred + x);<br>
+            pred = _mm_unpackhi_epi8(temp, _mm_setzero_si128());         // interleave with zero extensions<br>
+            resi = _mm_loadu_si128((__m128i const*)(pResi + x + 8));<br>
+            sum2 = _mm_add_epi16(pred, resi);<br>
+<br>
+            sum2 = _mm_min_epi16(maxval, _mm_max_epi16(sum2, minval));<br>
+            _mm_storeu_si128((__m128i*)(pRecQt + x + 8), sum2);<br>
+<br>
+            __m128i mask = _mm_set1_epi32(0x00FF00FF);                   // mask for low bytes<br>
+            __m128i low_mask  = _mm_and_si128(sum1, mask);               // bytes of low<br>
+            __m128i high_mask = _mm_and_si128(sum2, mask);               // bytes of high<br>
+            temp = _mm_packus_epi16(low_mask, high_mask);                // unsigned pack<br>
+<br>
+            _mm_storeu_si128((__m128i*)(pReco + x), temp);<br>
+            _mm_storeu_si128((__m128i*)(pRecIPred + x), temp);<br>
         }<br>
<br>
         pPred     += stride;<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>