[x265] [PATCH] pixel8.inc: sad_x3 64-bit build fail fixed

praveen at multicorewareinc.com praveen at multicorewareinc.com
Sun Aug 25 13:06:01 CEST 2013


# HG changeset patch
# User praveentiwari
# Date 1377428751 -19800
# Node ID c79295ecc2b8d85c08643442e628f25f916a64a2
# Parent  97ec335a940f7e2768c7cc311e078d19b12a8168
pixel8.inc: sad_x3 64-bit build fail fixed

diff -r 97ec335a940f -r c79295ecc2b8 source/common/vec/pixel8.inc
--- a/source/common/vec/pixel8.inc	Sun Aug 25 16:02:25 2013 +0530
+++ b/source/common/vec/pixel8.inc	Sun Aug 25 16:35:51 2013 +0530
@@ -1367,17 +1367,17 @@
         T20 = _mm_sad_epu8(R00, R01);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[0] = _mm_cvtsi128_si32(sum0, 0);
+        res[0] = _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R02);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[1] = _mm_cvtsi128_si32(sum0, 0);
+        res[1] = _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R03);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[2] = _mm_cvtsi128_si32(sum0, 0);
+        res[2] = _mm_cvtsi128_si32(sum0);
     }
 
     else if (ly == 8)
@@ -1418,17 +1418,17 @@
         T20 = _mm_sad_epu8(R00, R01);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[0] = _mm_cvtsi128_si32(sum0, 0);
+        res[0] = _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R02);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[1] = _mm_cvtsi128_si32(sum0, 0);
+        res[1] = _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R03);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[2] = _mm_cvtsi128_si32(sum0, 0);
+        res[2] = _mm_cvtsi128_si32(sum0);
 
         T00 = _mm_loadl_epi64((__m128i*)(fenc + (4) * FENC_STRIDE));
         T01 = _mm_loadl_epi64((__m128i*)(fenc + (5) * FENC_STRIDE));
@@ -1465,17 +1465,17 @@
         T20 = _mm_sad_epu8(R00, R01);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+        res[0] = res[0] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R02);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+        res[1] = res[1] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R03);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+        res[2] = res[2] + _mm_cvtsi128_si32(sum0);
     }
 
     else if (ly == 16)
@@ -1516,17 +1516,17 @@
         T20 = _mm_sad_epu8(R00, R01);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[0] = _mm_cvtsi128_si32(sum0, 0);
+        res[0] = _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R02);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[1] = _mm_cvtsi128_si32(sum0, 0);
+        res[1] = _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R03);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[2] = _mm_cvtsi128_si32(sum0, 0);
+        res[2] = _mm_cvtsi128_si32(sum0);
 
         T00 = _mm_loadl_epi64((__m128i*)(fenc + (4) * FENC_STRIDE));
         T01 = _mm_loadl_epi64((__m128i*)(fenc + (5) * FENC_STRIDE));
@@ -1563,17 +1563,17 @@
         T20 = _mm_sad_epu8(R00, R01);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+        res[0] = res[0] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R02);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+        res[1] = res[1] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R03);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+        res[2] = res[2] + _mm_cvtsi128_si32(sum0);
 
         T00 = _mm_loadl_epi64((__m128i*)(fenc + (8) * FENC_STRIDE));
         T01 = _mm_loadl_epi64((__m128i*)(fenc + (9) * FENC_STRIDE));
@@ -1610,17 +1610,17 @@
         T20 = _mm_sad_epu8(R00, R01);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+        res[0] = res[0] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R02);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+        res[1] = res[1] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R03);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+        res[2] = res[2] + _mm_cvtsi128_si32(sum0);
 
         T00 = _mm_loadl_epi64((__m128i*)(fenc + (12) * FENC_STRIDE));
         T01 = _mm_loadl_epi64((__m128i*)(fenc + (13) * FENC_STRIDE));
@@ -1657,17 +1657,17 @@
         T20 = _mm_sad_epu8(R00, R01);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+        res[0] = res[0] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R02);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+        res[1] = res[1] + _mm_cvtsi128_si32(sum0);
 
         T20 = _mm_sad_epu8(R00, R03);
         sum0 = _mm_shuffle_epi32(T20, 2);
         sum0 = _mm_add_epi32(sum0, T20);
-        res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+        res[2] = res[2] + _mm_cvtsi128_si32(sum0);
     }
     else if ((ly % 8) == 0)
     {
@@ -1709,17 +1709,17 @@
             T20 = _mm_sad_epu8(R00, R01);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+            res[0] = res[0] + _mm_cvtsi128_si32(sum0);
 
             T20 = _mm_sad_epu8(R00, R02);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+            res[1] = res[1] + _mm_cvtsi128_si32(sum0);
 
             T20 = _mm_sad_epu8(R00, R03);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+            res[2] = res[2] + _mm_cvtsi128_si32(sum0);
 
             T00 = _mm_loadl_epi64((__m128i*)(fenc + (i + 4) * FENC_STRIDE));
             T01 = _mm_loadl_epi64((__m128i*)(fenc + (i + 5) * FENC_STRIDE));
@@ -1756,17 +1756,17 @@
             T20 = _mm_sad_epu8(R00, R01);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+            res[0] = res[0] + _mm_cvtsi128_si32(sum0);
 
             T20 = _mm_sad_epu8(R00, R02);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+            res[1] = res[1] + _mm_cvtsi128_si32(sum0);
 
             T20 = _mm_sad_epu8(R00, R03);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+            res[2] = res[2] + _mm_cvtsi128_si32(sum0);
         }
     }
     else
@@ -1809,17 +1809,17 @@
             T20 = _mm_sad_epu8(R00, R01);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+            res[0] = res[0] + _mm_cvtsi128_si32(sum0);
 
             T20 = _mm_sad_epu8(R00, R02);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+            res[1] = res[1] + _mm_cvtsi128_si32(sum0);
 
             T20 = _mm_sad_epu8(R00, R03);
             sum0 = _mm_shuffle_epi32(T20, 2);
             sum0 = _mm_add_epi32(sum0, T20);
-            res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+            res[2] = res[2] + _mm_cvtsi128_si32(sum0);
         }
     }
 }
@@ -2186,7 +2186,7 @@
 
 #endif /* ifdef X86_64 */
 
-#ifdef x86_64
+#ifdef X86_64
 template<int ly>
 void sad_x3_8(pixel *fenc, pixel *fref1, pixel *fref2, pixel *fref3, intptr_t frefstride, int *res)
 {


More information about the x265-devel mailing list