[x265] [PATCH] intra-sse3.cpp: Replace PredIntraAng4_32 vector class function with intrinsic

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Thu Oct 10 12:45:49 CEST 2013


# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1381401927 -19800
#      Thu Oct 10 16:15:27 2013 +0530
# Node ID c6bd100fcf17b007b24db2a1abdb9fd10efa7535
# Parent  12d098e5d907249d2f450ddc329cbeede88f9e1c
intra-sse3.cpp: Replace PredIntraAng4_32 vector class function with intrinsic.

diff -r 12d098e5d907 -r c6bd100fcf17 source/common/vec/intra-sse3.cpp
--- a/source/common/vec/intra-sse3.cpp	Thu Oct 10 15:20:57 2013 +0530
+++ b/source/common/vec/intra-sse3.cpp	Thu Oct 10 16:15:27 2013 +0530
@@ -1386,16 +1386,16 @@
 
 void PredIntraAng4_32(pixel* dst, int dstStride, pixel *refMain, int /*dirMode*/)
 {
-    Vec16uc tmp16_1, tmp16_2;
-
-    tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 2);
-    store_partial(const_int(4), dst, tmp16_1);
-    tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 3);
-    store_partial(const_int(4), dst + dstStride, tmp16_2);
-    tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 4);
-    store_partial(const_int(4), dst + 2 * dstStride, tmp16_2);
-    tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 5);
-    store_partial(const_int(4), dst + 3 * dstStride, tmp16_2);
+    __m128i tmp16_1;
+
+    tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain + 2));
+    *(uint32_t*)(dst) = _mm_cvtsi128_si32(tmp16_1);
+    tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain + 3));
+    *(uint32_t*)(dst + dstStride) = _mm_cvtsi128_si32(tmp16_1);
+    tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain + 4));
+    *(uint32_t*)(dst + 2 * dstStride) = _mm_cvtsi128_si32(tmp16_1);
+    tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain + 5));
+    *(uint32_t*)(dst + 3 * dstStride) = _mm_cvtsi128_si32(tmp16_1);
 }
 
 void PredIntraAng4_26(pixel* dst, int dstStride, pixel *refMain, int dirMode)


More information about the x265-devel mailing list