[x265] [PATCH] filterHorizontal_p_s: saving instructions with control execution
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Mon Aug 5 08:18:08 CEST 2013
# HG changeset patch
# User praveentiwari
# Date 1375683478 -19800
# Node ID d1fb0c00c4bba5c66af936c3387d6046b5416537
# Parent 498d4e97c74ce5bb12a49f933969529214ac0472
filterHorizontal_p_s: saving instructions with control execution
diff -r 498d4e97c74c -r d1fb0c00c4bb source/common/vec/ipfilter8.inc
--- a/source/common/vec/ipfilter8.inc Mon Aug 05 11:40:20 2013 +0530
+++ b/source/common/vec/ipfilter8.inc Mon Aug 05 11:47:58 2013 +0530
@@ -1175,22 +1175,25 @@
for (; col < (width - 7); col += 8)
{
__m128i srcCoeff = _mm_loadu_si128((__m128i*)(src + col));
+ __m128i sum;
- __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
- __m128i T20 = _mm_maddubs_epi16(T00, S);
+ if (N == 4)
+ {
+ __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
+ __m128i T20 = _mm_maddubs_epi16(T00, S);
- __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
- __m128i T40 = _mm_maddubs_epi16(T30, S);
+ __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
+ __m128i T40 = _mm_maddubs_epi16(T30, S);
- __m128i sum = _mm_hadd_epi16(T20, T40);
+ sum = _mm_hadd_epi16(T20, T40);
+ }
+ else // (N == 8)
+ {
+ __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
+ __m128i T20 = _mm_maddubs_epi16(T00, T10);
- if (N == 8)
- {
- T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
- T20 = _mm_maddubs_epi16(T00, T10);
-
- T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
- T40 = _mm_maddubs_epi16(T30, T10);
+ __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
+ __m128i T40 = _mm_maddubs_epi16(T30, T10);
__m128i T50 = _mm_shuffle_epi8(srcCoeff, Tm3);
__m128i T60 = _mm_maddubs_epi16(T50, T10);
More information about the x265-devel
mailing list