[x265] [PATCH] filterHorizontal_p_s: saving instructions with control execution

praveen at multicorewareinc.com praveen at multicorewareinc.com
Mon Aug 5 08:18:08 CEST 2013


# HG changeset patch
# User praveentiwari
# Date 1375683478 -19800
# Node ID d1fb0c00c4bba5c66af936c3387d6046b5416537
# Parent  498d4e97c74ce5bb12a49f933969529214ac0472
filterHorizontal_p_s: saving instructions with control execution

diff -r 498d4e97c74c -r d1fb0c00c4bb source/common/vec/ipfilter8.inc
--- a/source/common/vec/ipfilter8.inc	Mon Aug 05 11:40:20 2013 +0530
+++ b/source/common/vec/ipfilter8.inc	Mon Aug 05 11:47:58 2013 +0530
@@ -1175,22 +1175,25 @@
         for (; col < (width - 7); col += 8)
         {
             __m128i srcCoeff = _mm_loadu_si128((__m128i*)(src + col));
+            __m128i sum;
 
-            __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
-            __m128i T20 = _mm_maddubs_epi16(T00, S);
+            if (N == 4)
+            {
+                __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
+                __m128i T20 = _mm_maddubs_epi16(T00, S);
 
-            __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
-            __m128i T40 = _mm_maddubs_epi16(T30, S);
+                __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
+                __m128i T40 = _mm_maddubs_epi16(T30, S);
 
-            __m128i sum = _mm_hadd_epi16(T20, T40);
+                sum = _mm_hadd_epi16(T20, T40);
+            }
+            else  // (N == 8)
+            {
+                __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
+                __m128i T20 = _mm_maddubs_epi16(T00, T10);
 
-            if (N == 8)
-            {
-                T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
-                T20 = _mm_maddubs_epi16(T00, T10);
-
-                T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
-                T40 = _mm_maddubs_epi16(T30, T10);
+                __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
+                __m128i T40 = _mm_maddubs_epi16(T30, T10);
 
                 __m128i T50 = _mm_shuffle_epi8(srcCoeff, Tm3);
                 __m128i T60 = _mm_maddubs_epi16(T50, T10);


More information about the x265-devel mailing list