[x265] [PATCH] intra-sse3.cpp: Replace PredIntraAng4_m_32 vector class function with intrinsic
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Oct 11 12:43:43 CEST 2013
# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1381488201 -19800
# Fri Oct 11 16:13:21 2013 +0530
# Node ID e0adda303a27534d6a5310753c257a85bc54bb8b
# Parent 29c8344ad787504e1534027aa11fef82efd5e261
intra-sse3.cpp: Replace PredIntraAng4_m_32 vector class function with intrinsic.
diff -r 29c8344ad787 -r e0adda303a27 source/common/vec/intra-sse3.cpp
--- a/source/common/vec/intra-sse3.cpp Fri Oct 11 16:08:52 2013 +0530
+++ b/source/common/vec/intra-sse3.cpp Fri Oct 11 16:13:21 2013 +0530
@@ -1900,16 +1900,16 @@
void PredIntraAng4_m_32(pixel* dst, int dstStride, pixel *refMain, int /*dirMode*/)
{
- Vec16uc tmp16_1, tmp16_2;
-
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain); //-1,0,1,2
- store_partial(const_int(4), dst, tmp16_1);
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 1); //-2,-1,0,1
- store_partial(const_int(4), dst + dstStride, tmp16_2);
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 2);
- store_partial(const_int(4), dst + 2 * dstStride, tmp16_2);
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 3);
- store_partial(const_int(4), dst + 3 * dstStride, tmp16_2);
+ __m128i tmp16_1;
+
+ tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain));
+ *(uint32_t*)(dst) = _mm_cvtsi128_si32(tmp16_1);
+ tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain - 1));
+ *(uint32_t*)(dst + dstStride) = _mm_cvtsi128_si32(tmp16_1);
+ tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain - 2));
+ *(uint32_t*)(dst + 2 * dstStride) = _mm_cvtsi128_si32(tmp16_1);
+ tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain - 3));
+ *(uint32_t*)(dst + 3 * dstStride) = _mm_cvtsi128_si32(tmp16_1);
}
typedef void (*PredIntraAng4x4_table)(pixel* dst, int dstStride, pixel *refMain, int dirMode);
More information about the x265-devel
mailing list