[x265] [PATCH] filterHorizontal_p_p: saving instruction with control execution
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Mon Aug 5 08:03:00 CEST 2013
# HG changeset patch
# User praveentiwari
# Date 1375682565 -19800
# Node ID 4bef3786de8b374a0936c190beaac74d7fbd465e
# Parent 37cbf6432e63b88044a718b6bd5c73d61e52262d
filterHorizontal_p_p: saving instruction with control execution
diff -r 37cbf6432e63 -r 4bef3786de8b source/common/vec/ipfilter8.inc
--- a/source/common/vec/ipfilter8.inc Sun Aug 04 15:32:40 2013 +0530
+++ b/source/common/vec/ipfilter8.inc Mon Aug 05 11:32:45 2013 +0530
@@ -749,6 +749,8 @@
__m128i Tm5 = _mm_setr_epi8(0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6);
__m128i Tm6 = _mm_setr_epi8(4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10);
+ __m128i sum;
+
for (row = 0; row < height; row++)
{
col = 0;
@@ -756,21 +758,24 @@
{
__m128i srcCoeff = _mm_loadu_si128((__m128i*)(src + col));
- __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
- __m128i T20 = _mm_maddubs_epi16(T00, S);
+ if (N == 4)
+ {
+ __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
+ __m128i T20 = _mm_maddubs_epi16(T00, S);
- __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
- __m128i T40 = _mm_maddubs_epi16(T30, S);
+ __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
+ __m128i T40 = _mm_maddubs_epi16(T30, S);
- __m128i sum = _mm_hadd_epi16(T20, T40);
+ sum = _mm_hadd_epi16(T20, T40);
+ }
- if (N == 8)
+ else // (N == 8)
{
- T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
- T20 = _mm_maddubs_epi16(T00, T10);
+ __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
+ __m128i T20 = _mm_maddubs_epi16(T00, T10);
- T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
- T40 = _mm_maddubs_epi16(T30, T10);
+ __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
+ __m128i T40 = _mm_maddubs_epi16(T30, T10);
__m128i T50 = _mm_shuffle_epi8(srcCoeff, Tm3);
__m128i T60 = _mm_maddubs_epi16(T50, T10);
More information about the x265-devel
mailing list