[x265] [PATCH] filterVertical_s_p: fix for VC9 test bench fail

praveen at multicorewareinc.com praveen at multicorewareinc.com
Fri Aug 2 15:20:28 CEST 2013


# HG changeset patch
# User praveentiwari
# Date 1375449612 -19800
# Node ID d77acebe970b03fbaf7d9b2bd0ede6cbac351989
# Parent  de3e6c30815ccc11ed6a33835f7d0c0d13e07f8c
filterVertical_s_p: fix for VC9 test bench fail

diff -r de3e6c30815c -r d77acebe970b source/common/vec/ipfilter8.inc
--- a/source/common/vec/ipfilter8.inc	Fri Aug 02 12:36:59 2013 +0530
+++ b/source/common/vec/ipfilter8.inc	Fri Aug 02 18:50:12 2013 +0530
@@ -46,6 +46,8 @@
     __m128i filterCoeff6 = _mm_set1_epi32(coeff[6]);
     __m128i filterCoeff7 = _mm_set1_epi32(coeff[7]);
 
+    __m128i mask4 = _mm_setr_epi8(-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
     int row, col;
 
     for (row = 0; row < height; row++)
@@ -197,25 +199,23 @@
             __m128i res = _mm_packus_epi16(val, zero16);
 
             int n = width - col;
+            __m128i mask1, mask2, mask3;
 
             switch (n)   // store either 1, 2, 3 or 4 8-bit results in dst
             {
-            case 1: dst[col] = _mm_extract_epi8(res, 0);
+            case 1: mask1 = _mm_srli_si128(mask4, 3);
+                _mm_maskmoveu_si128(res, mask1, (char*)&dst[col]);
                 break;
 
-            case 2: dst[col] = _mm_extract_epi8(res, 0);
-                dst[col + 1] = _mm_extract_epi8(res, 1);
+            case 2:  mask2 = _mm_srli_si128(mask4, 2);
+                _mm_maskmoveu_si128(res, mask2, (char*)&dst[col]);
                 break;
 
-            case 3: dst[col] = _mm_extract_epi8(res, 0);
-                dst[col + 1] = _mm_extract_epi8(res, 1);
-                dst[col + 2] = _mm_extract_epi8(res, 2);
+            case 3: mask3 = _mm_srli_si128(mask4, 1);
+                _mm_maskmoveu_si128(res, mask3, (char*)&dst[col]);
                 break;
 
-            default:  dst[col] = _mm_extract_epi8(res, 0);
-                dst[col + 1] = _mm_extract_epi8(res, 1);
-                dst[col + 2] = _mm_extract_epi8(res, 2);
-                dst[col + 3] = _mm_extract_epi8(res, 3);
+            default: _mm_maskmoveu_si128(res, mask4, (char*)&dst[col]);
                 break;
             }
         }


More information about the x265-devel mailing list