[x265] [PATCH] filterVertical_s_p: fix for VC9 test bench fail
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Fri Aug 2 15:20:28 CEST 2013
# HG changeset patch
# User praveentiwari
# Date 1375449612 -19800
# Node ID d77acebe970b03fbaf7d9b2bd0ede6cbac351989
# Parent de3e6c30815ccc11ed6a33835f7d0c0d13e07f8c
filterVertical_s_p: fix for VC9 test bench fail
diff -r de3e6c30815c -r d77acebe970b source/common/vec/ipfilter8.inc
--- a/source/common/vec/ipfilter8.inc Fri Aug 02 12:36:59 2013 +0530
+++ b/source/common/vec/ipfilter8.inc Fri Aug 02 18:50:12 2013 +0530
@@ -46,6 +46,8 @@
__m128i filterCoeff6 = _mm_set1_epi32(coeff[6]);
__m128i filterCoeff7 = _mm_set1_epi32(coeff[7]);
+ __m128i mask4 = _mm_setr_epi8(-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
int row, col;
for (row = 0; row < height; row++)
@@ -197,25 +199,23 @@
__m128i res = _mm_packus_epi16(val, zero16);
int n = width - col;
+ __m128i mask1, mask2, mask3;
switch (n) // store either 1, 2, 3 or 4 8-bit results in dst
{
- case 1: dst[col] = _mm_extract_epi8(res, 0);
+ case 1: mask1 = _mm_srli_si128(mask4, 3);
+ _mm_maskmoveu_si128(res, mask1, (char*)&dst[col]);
break;
- case 2: dst[col] = _mm_extract_epi8(res, 0);
- dst[col + 1] = _mm_extract_epi8(res, 1);
+ case 2: mask2 = _mm_srli_si128(mask4, 2);
+ _mm_maskmoveu_si128(res, mask2, (char*)&dst[col]);
break;
- case 3: dst[col] = _mm_extract_epi8(res, 0);
- dst[col + 1] = _mm_extract_epi8(res, 1);
- dst[col + 2] = _mm_extract_epi8(res, 2);
+ case 3: mask3 = _mm_srli_si128(mask4, 1);
+ _mm_maskmoveu_si128(res, mask3, (char*)&dst[col]);
break;
- default: dst[col] = _mm_extract_epi8(res, 0);
- dst[col + 1] = _mm_extract_epi8(res, 1);
- dst[col + 2] = _mm_extract_epi8(res, 2);
- dst[col + 3] = _mm_extract_epi8(res, 3);
+ default: _mm_maskmoveu_si128(res, mask4, (char*)&dst[col]);
break;
}
}
More information about the x265-devel
mailing list