[x265] [PATCH] +1x for all versions of sse_pp8
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Aug 6 12:09:25 CEST 2013
# HG changeset patch
# User praveentiwari
# Date 1375783754 -19800
# Node ID ac392e714e8d3c2b566f60092e4edbfb449ce15d
# Parent c9149cee2317cfc7a604ffa92f7746e22f0f226b
+1x for all versions of sse_pp8
diff -r c9149cee2317 -r ac392e714e8d source/common/vec/sse.inc
--- a/source/common/vec/sse.inc Tue Aug 06 01:56:50 2013 -0500
+++ b/source/common/vec/sse.inc Tue Aug 06 15:39:14 2013 +0530
@@ -51,22 +51,25 @@
int sse_pp8(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
{
int rows = ly;
- Vec16uc m1, n1;
+ __m128i sum = _mm_set1_epi32(0);
- Vec8us diff(0);
- Vec4i sum(0);
for (; rows != 0; rows--)
{
- m1.load(Org);
- n1.load(Cur);
- diff = extend_low(m1) - extend_low(n1);
- diff = diff * diff;
- sum += (extend_low(diff) + (extend_high(diff)));
+ __m128i m1 = _mm_loadu_si128((__m128i const*)(Org));
+ m1 = _mm_cvtepu8_epi16(m1);
+ __m128i n1 = _mm_loadu_si128((__m128i const*)(Cur));
+ n1 = _mm_cvtepu8_epi16(n1);
+
+ __m128i diff = _mm_sub_epi16(m1, n1);
+ sum = _mm_add_epi32(sum, _mm_madd_epi16(diff, diff));
+
Org += strideOrg;
Cur += strideCur;
}
- return horizontal_add(sum);
+ sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 8));
+ sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 4));
+ return _mm_cvtsi128_si32(sum);
}
template<int ly>
More information about the x265-devel
mailing list