[x265] [PATCH] +1x for all versions of sse_pp8

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Aug 6 12:09:25 CEST 2013


# HG changeset patch
# User praveentiwari
# Date 1375783754 -19800
# Node ID ac392e714e8d3c2b566f60092e4edbfb449ce15d
# Parent  c9149cee2317cfc7a604ffa92f7746e22f0f226b
+1x for all versions of sse_pp8

diff -r c9149cee2317 -r ac392e714e8d source/common/vec/sse.inc
--- a/source/common/vec/sse.inc	Tue Aug 06 01:56:50 2013 -0500
+++ b/source/common/vec/sse.inc	Tue Aug 06 15:39:14 2013 +0530
@@ -51,22 +51,25 @@
 int sse_pp8(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
     int rows = ly;
-    Vec16uc m1, n1;
+    __m128i sum = _mm_set1_epi32(0);
 
-    Vec8us diff(0);
-    Vec4i sum(0);
     for (; rows != 0; rows--)
     {
-        m1.load(Org);
-        n1.load(Cur);
-        diff = extend_low(m1) - extend_low(n1);
-        diff = diff * diff;
-        sum += (extend_low(diff) + (extend_high(diff)));
+        __m128i m1 = _mm_loadu_si128((__m128i const*)(Org));
+        m1 = _mm_cvtepu8_epi16(m1);
+        __m128i n1 = _mm_loadu_si128((__m128i const*)(Cur));
+        n1 = _mm_cvtepu8_epi16(n1);
+
+        __m128i diff = _mm_sub_epi16(m1, n1);
+        sum = _mm_add_epi32(sum, _mm_madd_epi16(diff, diff));
+
         Org += strideOrg;
         Cur += strideCur;
     }
 
-    return horizontal_add(sum);
+    sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 8));
+    sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 4));
+    return _mm_cvtsi128_si32(sum);
 }
 
 template<int ly>


More information about the x265-devel mailing list