<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, Jun 27, 2013 at 2:37 AM, <span dir="ltr"><<a href="mailto:mandar@multicorewareinc.com" target="_blank">mandar@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Mandar Gurav<br>
# Date 1372240787 25200<br>
# Node ID e156dc24f05f4c2e6770fde1b46754cce640a96b<br>
# Parent 7a2555036e8db57557f655f3ed49e38ab6d784dd<br>
primitves: 8 bit : PredIntraAng4x4 function table implementation<br></blockquote><div><br></div><div style>folded, tweaked, and pushed</div><div><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
diff -r 7a2555036e8d -r e156dc24f05f source/common/vec/intrapred.inc<br>
--- a/source/common/vec/intrapred.inc Mon Jun 24 22:26:33 2013 -0500<br>
+++ b/source/common/vec/intrapred.inc Wed Jun 26 02:59:47 2013 -0700<br>
@@ -1672,33 +1672,1229 @@<br>
}<br>
<br>
#else /* if HIGH_BIT_DEPTH */<br>
+<br>
+void PredIntraAng4_32(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ dirMode++;<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 2);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+ tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 3);<br>
+ store_partial(const_int(4), pDst + dstStride, tmp16_2);<br>
+ tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 4);<br>
+ store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);<br>
+ tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 5);<br>
+ store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);<br>
+}<br>
+<br>
+void PredIntraAng4_26(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
+<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+<br>
+ row21 = row12;<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row22 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
+<br>
+ row31 = row22;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 24;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row32 = extend_low(tmp16_2); //offsets(3,4,5,6)<br>
+<br>
+ row41 = row32;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 32;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(4,5,6,7)<br>
+<br>
+ v_deltaPos = v_ipAngle = 26;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_21(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
+<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+<br>
+ row21 = row12;<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row22 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
+<br>
+ row31 = row21;<br>
+ row32 = row22;<br>
+<br>
+ row41 = row22;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 24;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(3,4,5,6)<br>
+<br>
+ v_deltaPos = v_ipAngle = 21;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_17(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
+<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+<br>
+ row21 = row12;<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row22 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
+<br>
+ row31 = row21;<br>
+ row32 = row22;<br>
+<br>
+ row41 = row22;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 24;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(3,4,5,6)<br>
+<br>
+ v_deltaPos = v_ipAngle = 17;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_13(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
+<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+<br>
+ row21 = row11; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row12; //offsets(1,2,3,4)<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row32 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
+<br>
+ row41 = row31; //offsets(1,2,3,4)<br>
+ row42 = row32;<br>
+<br>
+ v_deltaPos = v_ipAngle = 13;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_9(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+ row21 = row11; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row11;<br>
+ row32 = row12;<br>
+ row41 = row12;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2);<br>
+<br>
+ v_deltaPos = v_ipAngle = 9;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_5(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+ row21 = row11; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row11;<br>
+ row32 = row12;<br>
+ row41 = row11;<br>
+ row42 = row12;<br>
+<br>
+ v_deltaPos = v_ipAngle = 5;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_2(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+ row21 = row11; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row11;<br>
+ row32 = row12;<br>
+ row41 = row11;<br>
+ row42 = row12;<br>
+<br>
+ v_deltaPos = v_ipAngle = 2;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_2(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+ row21 = row11; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row11;<br>
+ row32 = row12;<br>
+ row41 = row11;<br>
+ row42 = row12;<br>
+<br>
+ v_deltaPos = v_ipAngle = -2;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_5(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);<br>
+ row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
+ row21 = row11; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row11;<br>
+ row32 = row12;<br>
+ row41 = row11;<br>
+ row42 = row12;<br>
+<br>
+ v_deltaPos = v_ipAngle = -5;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_9(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);<br>
+ row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ row11 = row42;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ row21 = row42; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row42;<br>
+ row32 = row12;<br>
+<br>
+ v_deltaPos = v_ipAngle = -9;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_13(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);<br>
+ row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ row11 = row42;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ row21 = row42; //offsets(0,1,2,3)<br>
+ row22 = row12;<br>
+ row31 = row41;<br>
+ row32 = row42;<br>
+<br>
+ v_deltaPos = v_ipAngle = -13;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_17(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);<br>
+ row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)<br>
+<br>
+ row31 = row42; //offsets(-2,-1,0,1)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ row21 = row31; //offsets(-2,-1,0,1)<br>
+ row22 = row32;<br>
+<br>
+ row11 = row32;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 24;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ v_deltaPos = v_ipAngle = -17;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_21(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);<br>
+ row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)<br>
+<br>
+ row31 = row42; //offsets(-2,-1,0,1)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ row21 = row31; //offsets(-2,-1,0,1)<br>
+ row22 = row32;<br>
+<br>
+ row11 = row32;<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 24;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ v_deltaPos = v_ipAngle = -21;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_26(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ Vec2uq tmp2uq;<br>
+ Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
+ bool modeHor = (dirMode < 18);<br>
+<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 3);<br>
+ row41 = extend_low(tmp16_1); //offsets(-4,-3,-2,-1)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 8;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row42 = extend_low(tmp16_2); //offsets(-3,-2,-1,0)<br>
+<br>
+ row31 = row42; //offsets(-3,-2,-1,0)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 16;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row32 = extend_low(tmp16_2); //offsets(-2,-1,0,1)<br>
+<br>
+ row21 = row32; //offsets(-2,-1,0,1)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 24;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row22 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
+<br>
+ row11 = row22; //offsets(-1,0,1,2)<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq = tmp2uq >> 32;<br>
+ tmp16_2 = reinterpret_i(tmp2uq);<br>
+ row12 = extend_low(tmp16_2); //offsets(0,1,2,3)<br>
+<br>
+ v_deltaPos = v_ipAngle = -26;<br>
+<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
+<br>
+ //row2<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
+<br>
+ //row3<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
+<br>
+ //row4<br>
+ v_deltaPos += v_ipAngle;<br>
+ v_deltaFract = v_deltaPos & thirty1;<br>
+ row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
+<br>
+ // Flip the block<br>
+ if (modeHor)<br>
+ {<br>
+ Vec8s tmp1, tmp2, tmp3, tmp4;<br>
+<br>
+ tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
+ tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
+<br>
+ tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
+ tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
+<br>
+ tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
+<br>
+ tmp2uq = reinterpret_i(tmp16_1);<br>
+ tmp2uq >>= 32;<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
+ }<br>
+ else<br>
+ {<br>
+ store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
+ store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
+ store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
+ store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
+ }<br>
+}<br>
+<br>
+void PredIntraAng4_m_32(pixel* pDst, int dstStride, pixel *refMain, int dirMode)<br>
+{<br>
+ Vec16uc tmp16_1, tmp16_2;<br>
+ dirMode++;<br>
+ tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain); //-1,0,1,2<br>
+ store_partial(const_int(4), pDst, tmp16_1);<br>
+ tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 1); //-2,-1,0,1<br>
+ store_partial(const_int(4), pDst + dstStride, tmp16_2);<br>
+ tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 2);<br>
+ store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);<br>
+ tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 3);<br>
+ store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);<br>
+}<br>
+<br>
+typedef void (*PredIntraAng4x4_table)(pixel* pDst, int dstStride, pixel *refMain, int dirMode);<br>
+PredIntraAng4x4_table PredIntraAng[] = {<br>
+ /* PredIntraAng4_0 is replaced with PredIntraAng4_2. For PredIntraAng4_0 we are going through default path in the xPredIntraAng4x4 because we cannot afford to pass large number arguments for this function. */<br>
+ PredIntraAng4_32,<br>
+ PredIntraAng4_26,<br>
+ PredIntraAng4_21,<br>
+ PredIntraAng4_17,<br>
+ PredIntraAng4_13,<br>
+ PredIntraAng4_9,<br>
+ PredIntraAng4_5,<br>
+ PredIntraAng4_2,<br>
+ PredIntraAng4_2, //Intentionally wrong! It should be "PredIntraAng4_0" here.<br>
+ PredIntraAng4_m_2,<br>
+ PredIntraAng4_m_5,<br>
+ PredIntraAng4_m_9,<br>
+ PredIntraAng4_m_13,<br>
+ PredIntraAng4_m_17,<br>
+ PredIntraAng4_m_21,<br>
+ PredIntraAng4_m_26,<br>
+ PredIntraAng4_m_32,<br>
+ PredIntraAng4_m_26,<br>
+ PredIntraAng4_m_21,<br>
+ PredIntraAng4_m_17,<br>
+ PredIntraAng4_m_13,<br>
+ PredIntraAng4_m_9,<br>
+ PredIntraAng4_m_5,<br>
+ PredIntraAng4_m_2,<br>
+ PredIntraAng4_2, //Intentionally wrong! It should be "PredIntraAng4_0" here.<br>
+ PredIntraAng4_2,<br>
+ PredIntraAng4_5,<br>
+ PredIntraAng4_9,<br>
+ PredIntraAng4_13,<br>
+ PredIntraAng4_17,<br>
+ PredIntraAng4_21,<br>
+ PredIntraAng4_26,<br>
+ PredIntraAng4_32<br>
+};<br>
void xPredIntraAng4x4(int /*bitDepth*/, pixel* pDst, int dstStride, int width, int dirMode, pixel *refLeft, pixel *refAbove, bool bFilter = true)<br>
{<br>
- int blkSize = width;<br>
-<br>
- // Map the mode index to main prediction direction and angle<br>
assert(dirMode > 1); //no planar and dc<br>
+ int mode_to_angle_table[] = {32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32};<br>
+ int mode_to_invAng_table[] = {256, 315, 390, 482, 630, 910, 1638, 4096, 0, 4096, 1638, 910, 630, 482, 390, 315, 256, 315, 390, 482, 630, 910, 1638, 4096, 0, 4096, 1638, 910, 630, 482, 390, 315, 256};<br>
+ int intraPredAngle = mode_to_angle_table[dirMode-2];<br>
+ int invAngle = mode_to_invAng_table[dirMode-2];<br>
+<br>
bool modeHor = (dirMode < 18);<br>
bool modeVer = !modeHor;<br>
- int intraPredAngle = modeVer ? (int)dirMode - VER_IDX : modeHor ? -((int)dirMode - HOR_IDX) : 0;<br>
- int absAng = abs(intraPredAngle);<br>
- int signAng = intraPredAngle < 0 ? -1 : 1;<br>
-<br>
- // Set bitshifts and scale the angle parameter to block size<br>
- int angTable[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };<br>
- int invAngTable[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / Angle<br>
- int invAngle = invAngTable[absAng];<br>
- absAng = angTable[absAng];<br>
- intraPredAngle = signAng * absAng;<br>
<br>
// Do angular predictions<br>
-<br>
pixel* refMain;<br>
pixel* refSide;<br>
<br>
// Initialise the Main and Left reference array.<br>
if (intraPredAngle < 0)<br>
{<br>
+ int blkSize = width;<br>
refMain = (modeVer ? refAbove : refLeft); // + (blkSize - 1);<br>
refSide = (modeVer ? refLeft : refAbove); // + (blkSize - 1);<br>
<br>
@@ -1785,415 +2981,7 @@<br>
}<br>
else<br>
{<br>
- Vec8s row11, row12, row21, row22, row31, row32, row41, row42;<br>
- Vec16uc tmp16_1, tmp16_2;<br>
- Vec2uq tmp2uq;<br>
- Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31), v_ipAngle(0);<br>
- switch (intraPredAngle)<br>
- {<br>
- case -32:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain); //-1,0,1,2<br>
- store_partial(const_int(4), pDst, tmp16_1);<br>
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 1); //-2,-1,0,1<br>
- store_partial(const_int(4), pDst + dstStride, tmp16_2);<br>
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 2);<br>
- store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);<br>
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 3);<br>
- store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);<br>
- return;<br>
-<br>
- case -26:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 3);<br>
- row41 = extend_low(tmp16_1); //offsets(-4,-3,-2,-1)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(-3,-2,-1,0)<br>
-<br>
- row31 = row42; //offsets(-3,-2,-1,0)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row32 = extend_low(tmp16_2); //offsets(-2,-1,0,1)<br>
-<br>
- row21 = row32; //offsets(-2,-1,0,1)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 24;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row22 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- row11 = row22; //offsets(-1,0,1,2)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 32;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(0,1,2,3)<br>
-<br>
- v_deltaPos = v_ipAngle = -26;<br>
- break;<br>
-<br>
- case -21:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);<br>
- row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)<br>
-<br>
- row31 = row42; //offsets(-2,-1,0,1)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- row21 = row31; //offsets(-2,-1,0,1)<br>
- row22 = row32;<br>
-<br>
- row11 = row32;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 24;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- v_deltaPos = v_ipAngle = -21;<br>
- break;<br>
-<br>
- case -17:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);<br>
- row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)<br>
-<br>
- row31 = row42; //offsets(-2,-1,0,1)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- row21 = row31; //offsets(-2,-1,0,1)<br>
- row22 = row32;<br>
-<br>
- row11 = row32;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 24;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- v_deltaPos = v_ipAngle = -17;<br>
- break;<br>
-<br>
- case -13:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);<br>
- row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- row11 = row42;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- row21 = row42; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row41;<br>
- row32 = row42;<br>
-<br>
- v_deltaPos = v_ipAngle = -13;<br>
- break;<br>
-<br>
- case -9:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);<br>
- row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- row11 = row42;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)<br>
-<br>
- row21 = row42; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row42;<br>
- row32 = row12;<br>
-<br>
- v_deltaPos = v_ipAngle = -9;<br>
- break;<br>
-<br>
- case -5:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
- row21 = row11; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row11;<br>
- row32 = row12;<br>
- row41 = row11;<br>
- row42 = row12;<br>
-<br>
- v_deltaPos = v_ipAngle = -5;<br>
- break;<br>
-<br>
- case -2:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
- row21 = row11; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row11;<br>
- row32 = row12;<br>
- row41 = row11;<br>
- row42 = row12;<br>
-<br>
- v_deltaPos = v_ipAngle = -2;<br>
- break;<br>
-<br>
- case 2:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
- row21 = row11; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row11;<br>
- row32 = row12;<br>
- row41 = row11;<br>
- row42 = row12;<br>
-<br>
- v_deltaPos = v_ipAngle = 2;<br>
- break;<br>
-<br>
- case 5:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
- row21 = row11; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row11;<br>
- row32 = row12;<br>
- row41 = row11;<br>
- row42 = row12;<br>
-<br>
- v_deltaPos = v_ipAngle = 5;<br>
- break;<br>
-<br>
- case 9:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
- row21 = row11; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row11;<br>
- row32 = row12;<br>
- row41 = row12;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2);<br>
-<br>
- v_deltaPos = v_ipAngle = 9;<br>
- break;<br>
-<br>
- case 13:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
-<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
-<br>
- row21 = row11; //offsets(0,1,2,3)<br>
- row22 = row12;<br>
- row31 = row12; //offsets(1,2,3,4)<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row32 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
-<br>
- row41 = row31; //offsets(1,2,3,4)<br>
- row42 = row32;<br>
-<br>
- v_deltaPos = v_ipAngle = 13;<br>
- break;<br>
-<br>
- case 17:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
-<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
-<br>
- row21 = row12;<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row22 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
-<br>
- row31 = row21;<br>
- row32 = row22;<br>
-<br>
- row41 = row22;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 24;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(3,4,5,6)<br>
-<br>
- v_deltaPos = v_ipAngle = 17;<br>
- break;<br>
-<br>
- case 21:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
-<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
-<br>
- row21 = row12;<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row22 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
-<br>
- row31 = row21;<br>
- row32 = row22;<br>
-<br>
- row41 = row22;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 24;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(3,4,5,6)<br>
-<br>
- v_deltaPos = v_ipAngle = 21;<br>
- break;<br>
-<br>
- case 26:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);<br>
-<br>
- row11 = extend_low(tmp16_1); //offsets(0,1,2,3)<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 8;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row12 = extend_low(tmp16_2); //offsets(1,2,3,4)<br>
-<br>
- row21 = row12;<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 16;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row22 = extend_low(tmp16_2); //offsets(2,3,4,5)<br>
-<br>
- row31 = row22;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 24;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row32 = extend_low(tmp16_2); //offsets(3,4,5,6)<br>
-<br>
- row41 = row32;<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq = tmp2uq >> 32;<br>
- tmp16_2 = reinterpret_i(tmp2uq);<br>
- row42 = extend_low(tmp16_2); //offsets(4,5,6,7)<br>
-<br>
- v_deltaPos = v_ipAngle = 26;<br>
- break;<br>
-<br>
- case 32:<br>
- tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 2);<br>
- store_partial(const_int(4), pDst, tmp16_1);<br>
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 3);<br>
- store_partial(const_int(4), pDst + dstStride, tmp16_2);<br>
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 4);<br>
- store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);<br>
- tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 5);<br>
- store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);<br>
- return;<br>
- }<br>
-<br>
- //row1<br>
- v_deltaFract = v_deltaPos & thirty1;<br>
- row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) + 16) >> 5;<br>
-<br>
- //row2<br>
- v_deltaPos += v_ipAngle;<br>
- v_deltaFract = v_deltaPos & thirty1;<br>
- row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) + 16) >> 5;<br>
-<br>
- //row3<br>
- v_deltaPos += v_ipAngle;<br>
- v_deltaFract = v_deltaPos & thirty1;<br>
- row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) + 16) >> 5;<br>
-<br>
- //row4<br>
- v_deltaPos += v_ipAngle;<br>
- v_deltaFract = v_deltaPos & thirty1;<br>
- row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) + 16) >> 5;<br>
-<br>
- // Flip the block<br>
-<br>
- if (modeHor)<br>
- {<br>
- Vec8s tmp1, tmp2, tmp3, tmp4;<br>
-<br>
- tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);<br>
- tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);<br>
-<br>
- tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);<br>
- tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);<br>
-<br>
- tmp16_1 = compress_unsafe(tmp3, tmp3);<br>
- store_partial(const_int(4), pDst, tmp16_1);<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq >>= 32;<br>
- store_partial(const_int(4), pDst + dstStride, tmp2uq);<br>
-<br>
- tmp16_1 = compress_unsafe(tmp4, tmp4);<br>
- store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);<br>
-<br>
- tmp2uq = reinterpret_i(tmp16_1);<br>
- tmp2uq >>= 32;<br>
- store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);<br>
- }<br>
- else<br>
- {<br>
- store_partial(const_int(4), pDst, compress_unsafe(row11, row11));<br>
- store_partial(const_int(4), pDst + (dstStride), compress_unsafe(row21, row21));<br>
- store_partial(const_int(4), pDst + (2 * dstStride), compress_unsafe(row31, row31));<br>
- store_partial(const_int(4), pDst + (3 * dstStride), compress_unsafe(row41, row41));<br>
- }<br>
+ PredIntraAng[dirMode-2](pDst, dstStride, refMain, dirMode);<br>
}<br>
}<br>
<br>
<br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="http://mailman.videolan.org/listinfo/x265-devel" target="_blank">http://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>