[x265] [PATCH 1 of 2] primitves: 8 bit : PredIntraAng4x4 function table implementation
Steve Borho
steve at borho.org
Wed Jun 26 09:26:14 CEST 2013
On Thu, Jun 27, 2013 at 2:37 AM, <mandar at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Mandar Gurav
> # Date 1372240787 25200
> # Node ID e156dc24f05f4c2e6770fde1b46754cce640a96b
> # Parent 7a2555036e8db57557f655f3ed49e38ab6d784dd
> primitves: 8 bit : PredIntraAng4x4 function table implementation
>
folded, tweaked, and pushed
> diff -r 7a2555036e8d -r e156dc24f05f source/common/vec/intrapred.inc
> --- a/source/common/vec/intrapred.inc Mon Jun 24 22:26:33 2013 -0500
> +++ b/source/common/vec/intrapred.inc Wed Jun 26 02:59:47 2013 -0700
> @@ -1672,33 +1672,1229 @@
> }
>
> #else /* if HIGH_BIT_DEPTH */
> +
> +void PredIntraAng4_32(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec16uc tmp16_1, tmp16_2;
> + dirMode++;
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 2);
> + store_partial(const_int(4), pDst, tmp16_1);
> + tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 3);
> + store_partial(const_int(4), pDst + dstStride, tmp16_2);
> + tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 4);
> + store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);
> + tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 5);
> + store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);
> +}
> +
> +void PredIntraAng4_26(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> +
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> +
> + row21 = row12;
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row22 = extend_low(tmp16_2); //offsets(2,3,4,5)
> +
> + row31 = row22;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 24;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row32 = extend_low(tmp16_2); //offsets(3,4,5,6)
> +
> + row41 = row32;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 32;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(4,5,6,7)
> +
> + v_deltaPos = v_ipAngle = 26;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_21(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> +
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> +
> + row21 = row12;
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row22 = extend_low(tmp16_2); //offsets(2,3,4,5)
> +
> + row31 = row21;
> + row32 = row22;
> +
> + row41 = row22;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 24;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(3,4,5,6)
> +
> + v_deltaPos = v_ipAngle = 21;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_17(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> +
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> +
> + row21 = row12;
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row22 = extend_low(tmp16_2); //offsets(2,3,4,5)
> +
> + row31 = row21;
> + row32 = row22;
> +
> + row41 = row22;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 24;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(3,4,5,6)
> +
> + v_deltaPos = v_ipAngle = 17;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_13(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> +
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> +
> + row21 = row11; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row12; //offsets(1,2,3,4)
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row32 = extend_low(tmp16_2); //offsets(2,3,4,5)
> +
> + row41 = row31; //offsets(1,2,3,4)
> + row42 = row32;
> +
> + v_deltaPos = v_ipAngle = 13;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_9(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> + row21 = row11; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row11;
> + row32 = row12;
> + row41 = row12;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2);
> +
> + v_deltaPos = v_ipAngle = 9;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_5(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> + row21 = row11; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row11;
> + row32 = row12;
> + row41 = row11;
> + row42 = row12;
> +
> + v_deltaPos = v_ipAngle = 5;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_2(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> + row21 = row11; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row11;
> + row32 = row12;
> + row41 = row11;
> + row42 = row12;
> +
> + v_deltaPos = v_ipAngle = 2;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_2(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> + row21 = row11; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row11;
> + row32 = row12;
> + row41 = row11;
> + row42 = row12;
> +
> + v_deltaPos = v_ipAngle = -2;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_5(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);
> + row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> + row21 = row11; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row11;
> + row32 = row12;
> + row41 = row11;
> + row42 = row12;
> +
> + v_deltaPos = v_ipAngle = -5;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_9(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);
> + row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + row11 = row42;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + row21 = row42; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row42;
> + row32 = row12;
> +
> + v_deltaPos = v_ipAngle = -9;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_13(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);
> + row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + row11 = row42;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + row21 = row42; //offsets(0,1,2,3)
> + row22 = row12;
> + row31 = row41;
> + row32 = row42;
> +
> + v_deltaPos = v_ipAngle = -13;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_17(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);
> + row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)
> +
> + row31 = row42; //offsets(-2,-1,0,1)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + row21 = row31; //offsets(-2,-1,0,1)
> + row22 = row32;
> +
> + row11 = row32;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 24;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + v_deltaPos = v_ipAngle = -17;
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_21(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);
> + row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)
> +
> + row31 = row42; //offsets(-2,-1,0,1)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + row21 = row31; //offsets(-2,-1,0,1)
> + row22 = row32;
> +
> + row11 = row32;
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 24;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + v_deltaPos = v_ipAngle = -21;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_26(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> + Vec16uc tmp16_1, tmp16_2;
> + Vec2uq tmp2uq;
> + Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> + bool modeHor = (dirMode < 18);
> +
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 3);
> + row41 = extend_low(tmp16_1); //offsets(-4,-3,-2,-1)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 8;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row42 = extend_low(tmp16_2); //offsets(-3,-2,-1,0)
> +
> + row31 = row42; //offsets(-3,-2,-1,0)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 16;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row32 = extend_low(tmp16_2); //offsets(-2,-1,0,1)
> +
> + row21 = row32; //offsets(-2,-1,0,1)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 24;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row22 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> +
> + row11 = row22; //offsets(-1,0,1,2)
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq = tmp2uq >> 32;
> + tmp16_2 = reinterpret_i(tmp2uq);
> + row12 = extend_low(tmp16_2); //offsets(0,1,2,3)
> +
> + v_deltaPos = v_ipAngle = -26;
> +
> + v_deltaFract = v_deltaPos & thirty1;
> + row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract * row12) +
> 16) >> 5;
> +
> + //row2
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract * row22) +
> 16) >> 5;
> +
> + //row3
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract * row32) +
> 16) >> 5;
> +
> + //row4
> + v_deltaPos += v_ipAngle;
> + v_deltaFract = v_deltaPos & thirty1;
> + row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract * row42) +
> 16) >> 5;
> +
> + // Flip the block
> + if (modeHor)
> + {
> + Vec8s tmp1, tmp2, tmp3, tmp4;
> +
> + tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> + tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> +
> + tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> + tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> +
> + tmp16_1 = compress_unsafe(tmp3, tmp3);
> + store_partial(const_int(4), pDst, tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + dstStride, tmp2uq);
> +
> + tmp16_1 = compress_unsafe(tmp4, tmp4);
> + store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> +
> + tmp2uq = reinterpret_i(tmp16_1);
> + tmp2uq >>= 32;
> + store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> + }
> + else
> + {
> + store_partial(const_int(4), pDst, compress_unsafe(row11, row11));
> + store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> + store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> + store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> + }
> +}
> +
> +void PredIntraAng4_m_32(pixel* pDst, int dstStride, pixel *refMain, int
> dirMode)
> +{
> + Vec16uc tmp16_1, tmp16_2;
> + dirMode++;
> + tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain); //-1,0,1,2
> + store_partial(const_int(4), pDst, tmp16_1);
> + tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 1);
> //-2,-1,0,1
> + store_partial(const_int(4), pDst + dstStride, tmp16_2);
> + tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 2);
> + store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);
> + tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 3);
> + store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);
> +}
> +
> +typedef void (*PredIntraAng4x4_table)(pixel* pDst, int dstStride, pixel
> *refMain, int dirMode);
> +PredIntraAng4x4_table PredIntraAng[] = {
> + /* PredIntraAng4_0 is replaced with PredIntraAng4_2. For
> PredIntraAng4_0 we are going through default path in the xPredIntraAng4x4
> because we cannot afford to pass large number arguments for this function.
> */
> + PredIntraAng4_32,
> + PredIntraAng4_26,
> + PredIntraAng4_21,
> + PredIntraAng4_17,
> + PredIntraAng4_13,
> + PredIntraAng4_9,
> + PredIntraAng4_5,
> + PredIntraAng4_2,
> + PredIntraAng4_2, //Intentionally wrong! It should be
> "PredIntraAng4_0" here.
> + PredIntraAng4_m_2,
> + PredIntraAng4_m_5,
> + PredIntraAng4_m_9,
> + PredIntraAng4_m_13,
> + PredIntraAng4_m_17,
> + PredIntraAng4_m_21,
> + PredIntraAng4_m_26,
> + PredIntraAng4_m_32,
> + PredIntraAng4_m_26,
> + PredIntraAng4_m_21,
> + PredIntraAng4_m_17,
> + PredIntraAng4_m_13,
> + PredIntraAng4_m_9,
> + PredIntraAng4_m_5,
> + PredIntraAng4_m_2,
> + PredIntraAng4_2, //Intentionally wrong! It should be
> "PredIntraAng4_0" here.
> + PredIntraAng4_2,
> + PredIntraAng4_5,
> + PredIntraAng4_9,
> + PredIntraAng4_13,
> + PredIntraAng4_17,
> + PredIntraAng4_21,
> + PredIntraAng4_26,
> + PredIntraAng4_32
> +};
> void xPredIntraAng4x4(int /*bitDepth*/, pixel* pDst, int dstStride, int
> width, int dirMode, pixel *refLeft, pixel *refAbove, bool bFilter = true)
> {
> - int blkSize = width;
> -
> - // Map the mode index to main prediction direction and angle
> assert(dirMode > 1); //no planar and dc
> + int mode_to_angle_table[] = {32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5,
> -9, -13, -17, -21, -26, -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9,
> 13, 17, 21, 26, 32};
> + int mode_to_invAng_table[] = {256, 315, 390, 482, 630, 910, 1638,
> 4096, 0, 4096, 1638, 910, 630, 482, 390, 315, 256, 315, 390, 482, 630, 910,
> 1638, 4096, 0, 4096, 1638, 910, 630, 482, 390, 315, 256};
> + int intraPredAngle = mode_to_angle_table[dirMode-2];
> + int invAngle = mode_to_invAng_table[dirMode-2];
> +
> bool modeHor = (dirMode < 18);
> bool modeVer = !modeHor;
> - int intraPredAngle = modeVer ? (int)dirMode - VER_IDX : modeHor ?
> -((int)dirMode - HOR_IDX) : 0;
> - int absAng = abs(intraPredAngle);
> - int signAng = intraPredAngle < 0 ? -1 : 1;
> -
> - // Set bitshifts and scale the angle parameter to block size
> - int angTable[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
> - int invAngTable[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 };
> // (256 * 32) / Angle
> - int invAngle = invAngTable[absAng];
> - absAng = angTable[absAng];
> - intraPredAngle = signAng * absAng;
>
> // Do angular predictions
> -
> pixel* refMain;
> pixel* refSide;
>
> // Initialise the Main and Left reference array.
> if (intraPredAngle < 0)
> {
> + int blkSize = width;
> refMain = (modeVer ? refAbove : refLeft); // + (blkSize - 1);
> refSide = (modeVer ? refLeft : refAbove); // + (blkSize - 1);
>
> @@ -1785,415 +2981,7 @@
> }
> else
> {
> - Vec8s row11, row12, row21, row22, row31, row32, row41, row42;
> - Vec16uc tmp16_1, tmp16_2;
> - Vec2uq tmp2uq;
> - Vec8s v_deltaFract, v_deltaPos(0), thirty2(32), thirty1(31),
> v_ipAngle(0);
> - switch (intraPredAngle)
> - {
> - case -32:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);
> //-1,0,1,2
> - store_partial(const_int(4), pDst, tmp16_1);
> - tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 1);
> //-2,-1,0,1
> - store_partial(const_int(4), pDst + dstStride, tmp16_2);
> - tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 2);
> - store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);
> - tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain - 3);
> - store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);
> - return;
> -
> - case -26:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 3);
> - row41 = extend_low(tmp16_1); //offsets(-4,-3,-2,-1)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(-3,-2,-1,0)
> -
> - row31 = row42; //offsets(-3,-2,-1,0)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row32 = extend_low(tmp16_2); //offsets(-2,-1,0,1)
> -
> - row21 = row32; //offsets(-2,-1,0,1)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 24;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row22 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - row11 = row22; //offsets(-1,0,1,2)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 32;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(0,1,2,3)
> -
> - v_deltaPos = v_ipAngle = -26;
> - break;
> -
> - case -21:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);
> - row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)
> -
> - row31 = row42; //offsets(-2,-1,0,1)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - row21 = row31; //offsets(-2,-1,0,1)
> - row22 = row32;
> -
> - row11 = row32;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 24;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - v_deltaPos = v_ipAngle = -21;
> - break;
> -
> - case -17:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 2);
> - row41 = extend_low(tmp16_1); //offsets(-3,-2,-1,0)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(-2,-1,0,1)
> -
> - row31 = row42; //offsets(-2,-1,0,1)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row32 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - row21 = row31; //offsets(-2,-1,0,1)
> - row22 = row32;
> -
> - row11 = row32;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 24;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - v_deltaPos = v_ipAngle = -17;
> - break;
> -
> - case -13:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);
> - row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - row11 = row42;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - row21 = row42; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row41;
> - row32 = row42;
> -
> - v_deltaPos = v_ipAngle = -13;
> - break;
> -
> - case -9:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain - 1);
> - row41 = extend_low(tmp16_1); //offsets(-2,-1,0,1)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - row11 = row42;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(-1,0,1,2)
> -
> - row21 = row42; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row42;
> - row32 = row12;
> -
> - v_deltaPos = v_ipAngle = -9;
> - break;
> -
> - case -5:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> - row21 = row11; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row11;
> - row32 = row12;
> - row41 = row11;
> - row42 = row12;
> -
> - v_deltaPos = v_ipAngle = -5;
> - break;
> -
> - case -2:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain);
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> - row21 = row11; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row11;
> - row32 = row12;
> - row41 = row11;
> - row42 = row12;
> -
> - v_deltaPos = v_ipAngle = -2;
> - break;
> -
> - case 2:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> - row21 = row11; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row11;
> - row32 = row12;
> - row41 = row11;
> - row42 = row12;
> -
> - v_deltaPos = v_ipAngle = 2;
> - break;
> -
> - case 5:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> - row21 = row11; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row11;
> - row32 = row12;
> - row41 = row11;
> - row42 = row12;
> -
> - v_deltaPos = v_ipAngle = 5;
> - break;
> -
> - case 9:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> - row21 = row11; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row11;
> - row32 = row12;
> - row41 = row12;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2);
> -
> - v_deltaPos = v_ipAngle = 9;
> - break;
> -
> - case 13:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> -
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> -
> - row21 = row11; //offsets(0,1,2,3)
> - row22 = row12;
> - row31 = row12; //offsets(1,2,3,4)
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row32 = extend_low(tmp16_2); //offsets(2,3,4,5)
> -
> - row41 = row31; //offsets(1,2,3,4)
> - row42 = row32;
> -
> - v_deltaPos = v_ipAngle = 13;
> - break;
> -
> - case 17:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> -
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> -
> - row21 = row12;
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row22 = extend_low(tmp16_2); //offsets(2,3,4,5)
> -
> - row31 = row21;
> - row32 = row22;
> -
> - row41 = row22;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 24;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(3,4,5,6)
> -
> - v_deltaPos = v_ipAngle = 17;
> - break;
> -
> - case 21:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> -
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> -
> - row21 = row12;
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row22 = extend_low(tmp16_2); //offsets(2,3,4,5)
> -
> - row31 = row21;
> - row32 = row22;
> -
> - row41 = row22;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 24;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(3,4,5,6)
> -
> - v_deltaPos = v_ipAngle = 21;
> - break;
> -
> - case 26:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 1);
> -
> - row11 = extend_low(tmp16_1); //offsets(0,1,2,3)
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 8;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row12 = extend_low(tmp16_2); //offsets(1,2,3,4)
> -
> - row21 = row12;
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 16;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row22 = extend_low(tmp16_2); //offsets(2,3,4,5)
> -
> - row31 = row22;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 24;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row32 = extend_low(tmp16_2); //offsets(3,4,5,6)
> -
> - row41 = row32;
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq = tmp2uq >> 32;
> - tmp16_2 = reinterpret_i(tmp2uq);
> - row42 = extend_low(tmp16_2); //offsets(4,5,6,7)
> -
> - v_deltaPos = v_ipAngle = 26;
> - break;
> -
> - case 32:
> - tmp16_1 = (Vec16uc)load_partial(const_int(8), refMain + 2);
> - store_partial(const_int(4), pDst, tmp16_1);
> - tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 3);
> - store_partial(const_int(4), pDst + dstStride, tmp16_2);
> - tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 4);
> - store_partial(const_int(4), pDst + 2 * dstStride, tmp16_2);
> - tmp16_2 = (Vec16uc)load_partial(const_int(8), refMain + 5);
> - store_partial(const_int(4), pDst + 3 * dstStride, tmp16_2);
> - return;
> - }
> -
> - //row1
> - v_deltaFract = v_deltaPos & thirty1;
> - row11 = ((thirty2 - v_deltaFract) * row11 + (v_deltaFract *
> row12) + 16) >> 5;
> -
> - //row2
> - v_deltaPos += v_ipAngle;
> - v_deltaFract = v_deltaPos & thirty1;
> - row21 = ((thirty2 - v_deltaFract) * row21 + (v_deltaFract *
> row22) + 16) >> 5;
> -
> - //row3
> - v_deltaPos += v_ipAngle;
> - v_deltaFract = v_deltaPos & thirty1;
> - row31 = ((thirty2 - v_deltaFract) * row31 + (v_deltaFract *
> row32) + 16) >> 5;
> -
> - //row4
> - v_deltaPos += v_ipAngle;
> - v_deltaFract = v_deltaPos & thirty1;
> - row41 = ((thirty2 - v_deltaFract) * row41 + (v_deltaFract *
> row42) + 16) >> 5;
> -
> - // Flip the block
> -
> - if (modeHor)
> - {
> - Vec8s tmp1, tmp2, tmp3, tmp4;
> -
> - tmp1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row11, row31);
> - tmp2 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(row21, row41);
> -
> - tmp3 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(tmp1, tmp2);
> - tmp4 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(tmp1, tmp2);
> -
> - tmp16_1 = compress_unsafe(tmp3, tmp3);
> - store_partial(const_int(4), pDst, tmp16_1);
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq >>= 32;
> - store_partial(const_int(4), pDst + dstStride, tmp2uq);
> -
> - tmp16_1 = compress_unsafe(tmp4, tmp4);
> - store_partial(const_int(4), pDst + (2 * dstStride), tmp16_1);
> -
> - tmp2uq = reinterpret_i(tmp16_1);
> - tmp2uq >>= 32;
> - store_partial(const_int(4), pDst + (3 * dstStride), tmp2uq);
> - }
> - else
> - {
> - store_partial(const_int(4), pDst, compress_unsafe(row11,
> row11));
> - store_partial(const_int(4), pDst + (dstStride),
> compress_unsafe(row21, row21));
> - store_partial(const_int(4), pDst + (2 * dstStride),
> compress_unsafe(row31, row31));
> - store_partial(const_int(4), pDst + (3 * dstStride),
> compress_unsafe(row41, row41));
> - }
> + PredIntraAng[dirMode-2](pDst, dstStride, refMain, dirMode);
> }
> }
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> http://mailman.videolan.org/listinfo/x265-devel
>
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130626/fdcc8a14/attachment-0001.html>
More information about the x265-devel
mailing list