[x265] [PATCH] intra-sse3.cpp: Replace PredIntraAng4_21 vector class function with intrinsic

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Fri Oct 11 09:47:39 CEST 2013


# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1381477617 -19800
#      Fri Oct 11 13:16:57 2013 +0530
# Node ID a9e57e75bf1311d0e611ed3e9729984ec063a2da
# Parent  12cc7f903547ff3aeeb6bf68484b8447b813a8a2
intra-sse3.cpp: Replace PredIntraAng4_21 vector class function with intrinsic.

diff -r 12cc7f903547 -r a9e57e75bf13 source/common/vec/intra-sse3.cpp
--- a/source/common/vec/intra-sse3.cpp	Fri Oct 11 12:59:03 2013 +0530
+++ b/source/common/vec/intra-sse3.cpp	Fri Oct 11 13:16:57 2013 +0530
@@ -1498,38 +1498,39 @@
     PRED_INTRA_ANGLE_4_END
 }
 
-void PredIntraAng4_21(pixel* dst, int dstStride, pixel *refMain, int dirMode)
-{
-    PRED_INTRA_ANG4_START
-
-        tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
-
-    row11 = extend_low(tmp16_1);    //offsets(0,1,2,3)
-
-    tmp2uq = reinterpret_i(tmp16_1);
-    tmp2uq = tmp2uq >> 8;
-    tmp16_2 = reinterpret_i(tmp2uq);
-    row12 = extend_low(tmp16_2);    //offsets(1,2,3,4)
-
-    row21 = row12;
-
-    tmp2uq = reinterpret_i(tmp16_1);
-    tmp2uq = tmp2uq >> 16;
-    tmp16_2 = reinterpret_i(tmp2uq);
-    row22 = extend_low(tmp16_2);    //offsets(2,3,4,5)
-
-    row31 = row21;
-    row32 = row22;
-
-    row41 = row22;
-    tmp2uq = reinterpret_i(tmp16_1);
-    tmp2uq = tmp2uq >> 24;
-    tmp16_2 = reinterpret_i(tmp2uq);
-    row42 = extend_low(tmp16_2);    //offsets(3,4,5,6)
-
-    v_deltaPos = v_ipAngle = 21;
-
-    PRED_INTRA_ANG4_END
+void PredIntraAng4_21(pixel* dst, int dstStride, pixel *refMain, int dirMode)
+{
+    PRED_INTRA_ANGLE_4_START
+
+    tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain + 1));
+    row11   =_mm_unpacklo_epi8(tmp16_1, _mm_setzero_si128());
+
+    tmp2 = tmp16_1;
+    tmp2 = _mm_srl_epi64(tmp2,_mm_cvtsi32_si128(8));
+
+    tmp16_2 = tmp2;
+    row12 = _mm_unpacklo_epi8(tmp16_2, _mm_setzero_si128());
+
+    row21 = row12;
+    tmp2 = tmp16_1;
+    tmp2 = _mm_srl_epi64(tmp2, _mm_cvtsi32_si128(16));
+
+    tmp16_2 = tmp2;
+    row22 = _mm_unpacklo_epi8(tmp16_2, _mm_setzero_si128());
+
+    row31 = row21;
+    row32 = row22;
+
+    row41 = row22;
+    tmp2 = tmp16_1;
+    tmp2 = _mm_srl_epi64(tmp2, _mm_cvtsi32_si128(24));
+
+    tmp16_2 = tmp2;
+    row42 = _mm_unpacklo_epi8(tmp16_2, _mm_setzero_si128());
+
+    deltaPos = ipAngle = _mm_set1_epi16(21);
+
+    PRED_INTRA_ANGLE_4_END
 }
 
 void PredIntraAng4_17(pixel* dst, int dstStride, pixel *refMain, int dirMode)


More information about the x265-devel mailing list