[x265] [PATCH] intra-sse3.cpp: Replace PredIntraAng4_26 vector class function with intrinsic using intrinsic macros PRED_INTRA_ANGLE_4_START and PRED_INTRA_ANGLE_4_END
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Oct 11 09:31:02 CEST 2013
# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1381476543 -19800
# Fri Oct 11 12:59:03 2013 +0530
# Node ID 12cc7f903547ff3aeeb6bf68484b8447b813a8a2
# Parent 0f5a6fd46f0acb3b401f9fe8c026ec97e1021cfd
intra-sse3.cpp: Replace PredIntraAng4_26 vector class function with intrinsic using intrinsic macros PRED_INTRA_ANGLE_4_START and PRED_INTRA_ANGLE_4_END.
diff -r 0f5a6fd46f0a -r 12cc7f903547 source/common/vec/intra-sse3.cpp
--- a/source/common/vec/intra-sse3.cpp Fri Oct 11 12:41:47 2013 +0530
+++ b/source/common/vec/intra-sse3.cpp Fri Oct 11 12:59:03 2013 +0530
@@ -1459,41 +1459,43 @@
*(uint32_t*)(dst + 3 * dstStride) = _mm_cvtsi128_si32(tmp16_1);
}
-void PredIntraAng4_26(pixel* dst, int dstStride, pixel *refMain, int dirMode)
-{
- PRED_INTRA_ANG4_START
-
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
-
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
-
- tmp2uq = reinterpret_i(tmp16_1);
- tmp2uq = tmp2uq >> 8;
- tmp16_2 = reinterpret_i(tmp2uq);
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
-
- row21 = row12;
-
- tmp2uq = reinterpret_i(tmp16_1);
- tmp2uq = tmp2uq >> 16;
- tmp16_2 = reinterpret_i(tmp2uq);
- row22 = extend_low(tmp16_2); //offsets(2,3,4,5)
-
- row31 = row22;
- tmp2uq = reinterpret_i(tmp16_1);
- tmp2uq = tmp2uq >> 24;
- tmp16_2 = reinterpret_i(tmp2uq);
- row32 = extend_low(tmp16_2); //offsets(3,4,5,6)
-
- row41 = row32;
- tmp2uq = reinterpret_i(tmp16_1);
- tmp2uq = tmp2uq >> 32;
- tmp16_2 = reinterpret_i(tmp2uq);
- row42 = extend_low(tmp16_2); //offsets(4,5,6,7)
-
- v_deltaPos = v_ipAngle = 26;
-
- PRED_INTRA_ANG4_END
+void PredIntraAng4_26(pixel* dst, int dstStride, pixel *refMain, int dirMode)
+{
+ PRED_INTRA_ANGLE_4_START
+
+ tmp16_1 = _mm_loadl_epi64((__m128i*)(refMain + 1));
+ row11 =_mm_unpacklo_epi8(tmp16_1, _mm_setzero_si128());
+
+ tmp2 = tmp16_1;
+ tmp2 = _mm_srl_epi64(tmp2,_mm_cvtsi32_si128(8));
+
+ tmp16_2 = tmp2;
+ row12 = _mm_unpacklo_epi8(tmp16_2, _mm_setzero_si128());
+
+ row21 = row12;
+ tmp2 = tmp16_1;
+ tmp2 = _mm_srl_epi64(tmp2, _mm_cvtsi32_si128(16));
+
+ tmp16_2 = tmp2;
+ row22 = _mm_unpacklo_epi8(tmp16_2, _mm_setzero_si128());
+
+ row31 = row22;
+ tmp2 = tmp16_1;
+ tmp2 = _mm_srl_epi64(tmp2, _mm_cvtsi32_si128(24));
+
+ tmp16_2 = tmp2;
+ row32 = _mm_unpacklo_epi8(tmp16_2, _mm_setzero_si128());
+
+ row41 = row32;
+ tmp2 = tmp16_1;
+ tmp2 = _mm_srl_epi64(tmp2, _mm_cvtsi32_si128(32));
+
+ tmp16_2 = tmp2;
+ row42 = _mm_unpacklo_epi8(tmp16_2, _mm_setzero_si128());
+
+ deltaPos = ipAngle = _mm_set1_epi16(26);
+
+ PRED_INTRA_ANGLE_4_END
}
void PredIntraAng4_21(pixel* dst, int dstStride, pixel *refMain, int dirMode)
More information about the x265-devel
mailing list