[x265] [PATCH] filterHorizontal_p_p: saving instruction with control execution

praveen at multicorewareinc.com praveen at multicorewareinc.com
Mon Aug 5 08:03:00 CEST 2013


# HG changeset patch
# User praveentiwari
# Date 1375682565 -19800
# Node ID 4bef3786de8b374a0936c190beaac74d7fbd465e
# Parent  37cbf6432e63b88044a718b6bd5c73d61e52262d
filterHorizontal_p_p: saving instruction with control execution

diff -r 37cbf6432e63 -r 4bef3786de8b source/common/vec/ipfilter8.inc
--- a/source/common/vec/ipfilter8.inc	Sun Aug 04 15:32:40 2013 +0530
+++ b/source/common/vec/ipfilter8.inc	Mon Aug 05 11:32:45 2013 +0530
@@ -749,6 +749,8 @@
     __m128i Tm5 = _mm_setr_epi8(0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6);
     __m128i Tm6 = _mm_setr_epi8(4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10);
 
+    __m128i sum;
+
     for (row = 0; row < height; row++)
     {
         col = 0;
@@ -756,21 +758,24 @@
         {
             __m128i srcCoeff = _mm_loadu_si128((__m128i*)(src + col));
 
-            __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
-            __m128i T20 = _mm_maddubs_epi16(T00, S);
+            if (N == 4)
+            {
+                __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm5);
+                __m128i T20 = _mm_maddubs_epi16(T00, S);
 
-            __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
-            __m128i T40 = _mm_maddubs_epi16(T30, S);
+                __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm6);
+                __m128i T40 = _mm_maddubs_epi16(T30, S);
 
-            __m128i sum = _mm_hadd_epi16(T20, T40);
+                sum = _mm_hadd_epi16(T20, T40);
+            }
 
-            if (N == 8)
+            else // (N == 8)
             {
-                T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
-                T20 = _mm_maddubs_epi16(T00, T10);
+                __m128i T00 = _mm_shuffle_epi8(srcCoeff, Tm1);
+                __m128i T20 = _mm_maddubs_epi16(T00, T10);
 
-                T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
-                T40 = _mm_maddubs_epi16(T30, T10);
+                __m128i T30 = _mm_shuffle_epi8(srcCoeff, Tm2);
+                __m128i T40 = _mm_maddubs_epi16(T30, T10);
 
                 __m128i T50 = _mm_shuffle_epi8(srcCoeff, Tm3);
                 __m128i T60 = _mm_maddubs_epi16(T50, T10);


More information about the x265-devel mailing list