<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Oct 7, 2013 at 8:45 AM,  <span dir="ltr"><<a href="mailto:yuvaraj@multicorewareinc.com" target="_blank">yuvaraj@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Yuvaraj Venkatesh <<a href="mailto:yuvaraj@multicorewareinc.com">yuvaraj@multicorewareinc.com</a>><br>
# Date 1381153465 -19800<br>
#      Mon Oct 07 19:14:25 2013 +0530<br>
# Node ID 52ee436b58f9aa48757063bd678672d0ab56be01<br>
# Parent  c010342f7605c86867824f5b525a8f84c0d2de1c<br>
Replacing Residual4 from vector class to intrinsic.<br></blockquote><div><br></div><div>these two patches have tab-stops.  I've queued them for default after change tabs to spaces and improving the commit messages</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
diff -r c010342f7605 -r 52ee436b58f9 source/common/vec/pixel8.inc<br>
--- a/source/common/vec/pixel8.inc      Sun Oct 06 02:09:00 2013 -0500<br>
+++ b/source/common/vec/pixel8.inc      Mon Oct 07 19:14:25 2013 +0530<br>
@@ -29,19 +29,35 @@<br>
<br>
 void getResidual4(pixel *fenc, pixel *pred, short *resi, int stride)<br>
 {<br>
-    for (int y = 0; y < 4; y++)<br>
-    {<br>
-        Vec16uc f;<br>
-        f.fromUint32(*(uint32_t*)fenc);<br>
-        Vec16uc p;<br>
-        p.fromUint32(*(uint32_t*)pred);<br>
-        Vec8s r = extend_low(f) - extend_low(p);<br>
-        store_partial(const_int(8), resi, r);<br>
+    __m128i T00, T01, T02;<br>
<br>
-        fenc += stride;<br>
-        pred += stride;<br>
-        resi += stride;<br>
-    }<br>
+    T00 = _mm_cvtsi32_si128(*(uint32_t*)fenc);<br>
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)pred);<br>
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());<br>
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());<br>
+       T02 = _mm_sub_epi16(T00, T01);<br>
+    _mm_storel_epi64((__m128i*)resi, T02);<br>
+<br>
+       T00 = _mm_cvtsi32_si128(*(uint32_t*)(fenc + stride));<br>
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)(pred + stride));<br>
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());<br>
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());<br>
+       T02 = _mm_sub_epi16(T00, T01);<br>
+    _mm_storel_epi64((__m128i*)(resi + stride), T02);<br>
+<br>
+       T00 = _mm_cvtsi32_si128(*(uint32_t*)(fenc + (2) * stride));<br>
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)(pred + (2) * stride));<br>
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());<br>
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());<br>
+       T02 = _mm_sub_epi16(T00, T01);<br>
+    _mm_storel_epi64((__m128i*)(resi + (2) * stride), T02);<br>
+<br>
+       T00 = _mm_cvtsi32_si128(*(uint32_t*)(fenc + (3) * stride));<br>
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)(pred + (3) * stride));<br>
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());<br>
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());<br>
+       T02 = _mm_sub_epi16(T00, T01);<br>
+    _mm_storel_epi64((__m128i*)(resi + (3) * stride), T02);<br>
 }<br>
<br>
 void getResidual8(pixel *fenc, pixel *pred, short *resi, int stride)<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>