[x265] [PATCH] filterVertical_s_p: VC9 fix

praveen at multicorewareinc.com praveen at multicorewareinc.com
Thu Aug 8 12:28:30 CEST 2013


# HG changeset patch
# User praveentiwari
# Date 1375957700 -19800
# Node ID 5f97f9ddd0780efb7d679b6be0659ab872d7c707
# Parent  33aa6210de6d486b413f0a6ef82750a89d76c981
filterVertical_s_p: VC9 fix

diff -r 33aa6210de6d -r 5f97f9ddd078 source/common/vec/ipfilter8.inc
--- a/source/common/vec/ipfilter8.inc	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/common/vec/ipfilter8.inc	Thu Aug 08 15:58:20 2013 +0530
@@ -42,22 +42,17 @@
     coeffTemp = _mm_srli_si128(coeffTemp, 8);
     __m128i coeffTempHigh = _mm_cvtepi16_epi32(coeffTemp);
 
-    __m128i vm0 = _mm_setr_epi8(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3);
-    __m128i vm1 = _mm_setr_epi8(4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
-    __m128i vm2 = _mm_setr_epi8(8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11);
-    __m128i vm3 = _mm_setr_epi8(12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15);
+    __m128i filterCoeff0 = _mm_shuffle_epi32(coeffTempLow, 0x00);
+    __m128i filterCoeff1 = _mm_shuffle_epi32(coeffTempLow, 0x55);
+    __m128i filterCoeff2 = _mm_shuffle_epi32(coeffTempLow, 0xAA);
+    __m128i filterCoeff3 = _mm_shuffle_epi32(coeffTempLow, 0xFF);
 
-    __m128i filterCoeff0 = _mm_shuffle_epi8(coeffTempLow, vm0);
-    __m128i filterCoeff1 = _mm_shuffle_epi8(coeffTempLow, vm1);
-    __m128i filterCoeff2 = _mm_shuffle_epi8(coeffTempLow, vm2);
-    __m128i filterCoeff3 = _mm_shuffle_epi8(coeffTempLow, vm3);
+    __m128i filterCoeff4 = _mm_shuffle_epi32(coeffTempHigh, 0x00);
+    __m128i filterCoeff5 = _mm_shuffle_epi32(coeffTempHigh, 0x55);
+    __m128i filterCoeff6 = _mm_shuffle_epi32(coeffTempHigh, 0xAA);
+    __m128i filterCoeff7 = _mm_shuffle_epi32(coeffTempHigh, 0xFF);
 
-    __m128i filterCoeff4 = _mm_shuffle_epi8(coeffTempHigh, vm0);
-    __m128i filterCoeff5 = _mm_shuffle_epi8(coeffTempHigh, vm1);
-    __m128i filterCoeff6 = _mm_shuffle_epi8(coeffTempHigh, vm2);
-    __m128i filterCoeff7 = _mm_shuffle_epi8(coeffTempHigh, vm3);
-
-    __m128i mask4 = _mm_setr_epi8(-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    __m128i mask4 = _mm_cvtsi32_si128(0xFFFFFFFF);
 
     int row, col;
 
@@ -140,7 +135,6 @@
                 sum67 = _mm_add_epi32(T21, T31);
                 sumhi0123 = _mm_add_epi32(sumhi0123, _mm_add_epi32(sum45, sum67));
             }
-            __m128i zero = _mm_set1_epi16(0);
             __m128i sumOffset = _mm_set1_epi32(offset);
 
             __m128i val1 = _mm_add_epi32(sumlo0123, sumOffset);
@@ -150,7 +144,7 @@
             val2 = _mm_srai_epi32(val2, shift);
 
             __m128i val = _mm_packs_epi32(val1, val2);
-            __m128i res = _mm_packus_epi16(val, zero);
+            __m128i res = _mm_packus_epi16(val, val);
             _mm_storel_epi64((__m128i*)&dst[col], res);
         }
 


More information about the x265-devel mailing list