[x265] [PATCH] pixel8.inc: sad_x3 64-bit build fail fixed
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Sun Aug 25 13:06:01 CEST 2013
# HG changeset patch
# User praveentiwari
# Date 1377428751 -19800
# Node ID c79295ecc2b8d85c08643442e628f25f916a64a2
# Parent 97ec335a940f7e2768c7cc311e078d19b12a8168
pixel8.inc: sad_x3 64-bit build fail fixed
diff -r 97ec335a940f -r c79295ecc2b8 source/common/vec/pixel8.inc
--- a/source/common/vec/pixel8.inc Sun Aug 25 16:02:25 2013 +0530
+++ b/source/common/vec/pixel8.inc Sun Aug 25 16:35:51 2013 +0530
@@ -1367,17 +1367,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = _mm_cvtsi128_si32(sum0, 0);
+ res[0] = _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = _mm_cvtsi128_si32(sum0, 0);
+ res[1] = _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = _mm_cvtsi128_si32(sum0, 0);
+ res[2] = _mm_cvtsi128_si32(sum0);
}
else if (ly == 8)
@@ -1418,17 +1418,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = _mm_cvtsi128_si32(sum0, 0);
+ res[0] = _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = _mm_cvtsi128_si32(sum0, 0);
+ res[1] = _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = _mm_cvtsi128_si32(sum0, 0);
+ res[2] = _mm_cvtsi128_si32(sum0);
T00 = _mm_loadl_epi64((__m128i*)(fenc + (4) * FENC_STRIDE));
T01 = _mm_loadl_epi64((__m128i*)(fenc + (5) * FENC_STRIDE));
@@ -1465,17 +1465,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+ res[0] = res[0] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+ res[1] = res[1] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+ res[2] = res[2] + _mm_cvtsi128_si32(sum0);
}
else if (ly == 16)
@@ -1516,17 +1516,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = _mm_cvtsi128_si32(sum0, 0);
+ res[0] = _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = _mm_cvtsi128_si32(sum0, 0);
+ res[1] = _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = _mm_cvtsi128_si32(sum0, 0);
+ res[2] = _mm_cvtsi128_si32(sum0);
T00 = _mm_loadl_epi64((__m128i*)(fenc + (4) * FENC_STRIDE));
T01 = _mm_loadl_epi64((__m128i*)(fenc + (5) * FENC_STRIDE));
@@ -1563,17 +1563,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+ res[0] = res[0] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+ res[1] = res[1] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+ res[2] = res[2] + _mm_cvtsi128_si32(sum0);
T00 = _mm_loadl_epi64((__m128i*)(fenc + (8) * FENC_STRIDE));
T01 = _mm_loadl_epi64((__m128i*)(fenc + (9) * FENC_STRIDE));
@@ -1610,17 +1610,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+ res[0] = res[0] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+ res[1] = res[1] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+ res[2] = res[2] + _mm_cvtsi128_si32(sum0);
T00 = _mm_loadl_epi64((__m128i*)(fenc + (12) * FENC_STRIDE));
T01 = _mm_loadl_epi64((__m128i*)(fenc + (13) * FENC_STRIDE));
@@ -1657,17 +1657,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+ res[0] = res[0] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+ res[1] = res[1] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+ res[2] = res[2] + _mm_cvtsi128_si32(sum0);
}
else if ((ly % 8) == 0)
{
@@ -1709,17 +1709,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+ res[0] = res[0] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+ res[1] = res[1] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+ res[2] = res[2] + _mm_cvtsi128_si32(sum0);
T00 = _mm_loadl_epi64((__m128i*)(fenc + (i + 4) * FENC_STRIDE));
T01 = _mm_loadl_epi64((__m128i*)(fenc + (i + 5) * FENC_STRIDE));
@@ -1756,17 +1756,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+ res[0] = res[0] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+ res[1] = res[1] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+ res[2] = res[2] + _mm_cvtsi128_si32(sum0);
}
}
else
@@ -1809,17 +1809,17 @@
T20 = _mm_sad_epu8(R00, R01);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[0] = res[0] + _mm_cvtsi128_si32(sum0, 0);
+ res[0] = res[0] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R02);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[1] = res[1] + _mm_cvtsi128_si32(sum0, 0);
+ res[1] = res[1] + _mm_cvtsi128_si32(sum0);
T20 = _mm_sad_epu8(R00, R03);
sum0 = _mm_shuffle_epi32(T20, 2);
sum0 = _mm_add_epi32(sum0, T20);
- res[2] = res[2] + _mm_cvtsi128_si32(sum0, 0);
+ res[2] = res[2] + _mm_cvtsi128_si32(sum0);
}
}
}
@@ -2186,7 +2186,7 @@
#endif /* ifdef X86_64 */
-#ifdef x86_64
+#ifdef X86_64
template<int ly>
void sad_x3_8(pixel *fenc, pixel *fref1, pixel *fref2, pixel *fref3, intptr_t frefstride, int *res)
{
More information about the x265-devel
mailing list