<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Jul 8, 2013 at 7:11 AM, <span dir="ltr"><<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User praveentiwari<br>
# Date 1373285508 -19800<br>
# Node ID 449d22a2f7bc61037e0451c29eb240ef2c1fa83b<br>
# Parent 3aea9851338b800eb0c31e83c4075b7a47eefa92<br>
xDCT16 renamed and code cleanup<br></blockquote><div><br></div><div style>These patches are based on the assumption that the previous series had not been applied, but they were. So I had to fixup each one of these by hand. In the future, please ensure you are working from the latest tip of the repo.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
diff -r 3aea9851338b -r 449d22a2f7bc source/common/vec/dct.inc<br>
--- a/source/common/vec/dct.inc Mon Jul 08 17:27:35 2013 +0530<br>
+++ b/source/common/vec/dct.inc Mon Jul 08 17:41:48 2013 +0530<br>
@@ -638,32 +638,32 @@<br>
int j;<br>
int add = 1 << (shift - 1);<br>
<br>
- Vec4i g_aiT_zero_row(64, 64, 0, 0);<br>
- Vec4i g_aiT_four_row(83, 36, 0, 0);<br>
- Vec4i g_aiT_eight_row(64, -64, 0, 0);<br>
- Vec4i g_aiT_twelve_row(36, -83, 0, 0);<br>
-<br>
- Vec4i g_aiT_two_row(89, 75, 50, 18);<br>
- Vec4i g_aiT_six_row(75, -18, -89, -50);<br>
- Vec4i g_aiT_ten_row(50, -89, 18, 75);<br>
- Vec4i g_aiT_fourteen_row(18, -50, 75, -89);<br>
-<br>
- Vec4i g_aiT_one_row_first_half(90, 87, 80, 70);<br>
- Vec4i g_aiT_one_row_second_half(57, 43, 25, 9);<br>
- Vec4i g_aiT_three_row_first_half(87, 57, 9, -43);<br>
- Vec4i g_aiT_three_row_second_half(-80, -90, -70, -25);<br>
- Vec4i g_aiT_five_row_first_half(80, 9, -70, -87);<br>
- Vec4i g_aiT_five_row_second_half(-25, 57, 90, 43);<br>
- Vec4i g_aiT_seven_row_first_half(70, -43, -87, 9);<br>
- Vec4i g_aiT_seven_row_second_half(90, 25, -80, -57);<br>
- Vec4i g_aiT_nine_row_first_half(57, -80, -25, 90);<br>
- Vec4i g_aiT_nine_row_second_half(-9, -87, 43, 70);<br>
- Vec4i g_aiT_eleven_row_first_half(43, -90, 57, 25);<br>
- Vec4i g_aiT_eleven_row_second_half(-87, 70, 9, -80);<br>
- Vec4i g_aiT_thirteen_row_first_half(25, -70, 90, -80);<br>
- Vec4i g_aiT_thirteen_row_second_half(43, 9, -57, 87);<br>
- Vec4i g_aiT_fifteen_row_first_half(9, -25, 43, -57);<br>
- Vec4i g_aiT_fifteen_row_second_half(70, -80, 87, -90);<br>
+ Vec4i zero_row(64, 64, 0, 0);<br>
+ Vec4i four_row(83, 36, 0, 0);<br>
+ Vec4i eight_row(64, -64, 0, 0);<br>
+ Vec4i twelve_row(36, -83, 0, 0);<br>
+<br>
+ Vec4i two_row(89, 75, 50, 18);<br>
+ Vec4i six_row(75, -18, -89, -50);<br>
+ Vec4i ten_row(50, -89, 18, 75);<br>
+ Vec4i fourteen_row(18, -50, 75, -89);<br>
+<br>
+ Vec4i one_row_first_half(90, 87, 80, 70);<br>
+ Vec4i one_row_second_half(57, 43, 25, 9);<br>
+ Vec4i three_row_first_half(87, 57, 9, -43);<br>
+ Vec4i three_row_second_half(-80, -90, -70, -25);<br>
+ Vec4i five_row_first_half(80, 9, -70, -87);<br>
+ Vec4i five_row_second_half(-25, 57, 90, 43);<br>
+ Vec4i seven_row_first_half(70, -43, -87, 9);<br>
+ Vec4i seven_row_second_half(90, 25, -80, -57);<br>
+ Vec4i nine_row_first_half(57, -80, -25, 90);<br>
+ Vec4i nine_row_second_half(-9, -87, 43, 70);<br>
+ Vec4i eleven_row_first_half(43, -90, 57, 25);<br>
+ Vec4i eleven_row_second_half(-87, 70, 9, -80);<br>
+ Vec4i thirteen_row_first_half(25, -70, 90, -80);<br>
+ Vec4i thirteen_row_second_half(43, 9, -57, 87);<br>
+ Vec4i fifteen_row_first_half(9, -25, 43, -57);<br>
+ Vec4i fifteen_row_second_half(70, -80, 87, -90);<br>
<br>
for (j = 0; j < line; j++)<br>
{<br>
@@ -694,10 +694,10 @@<br>
Vec4i EEE = EE_first_half + EE_second_half;<br>
Vec4i EEO = EE_first_half - EE_second_half;<br>
<br>
- Vec4i dst_tmp0 = g_aiT_zero_row * EEE;<br>
- Vec4i dst_tmp4 = g_aiT_four_row * EEO;<br>
- Vec4i dst_tmp8 = g_aiT_eight_row * EEE;<br>
- Vec4i dst_tmp12 = g_aiT_twelve_row * EEO;<br>
+ Vec4i dst_tmp0 = zero_row * EEE;<br>
+ Vec4i dst_tmp4 = four_row * EEO;<br>
+ Vec4i dst_tmp8 = eight_row * EEE;<br>
+ Vec4i dst_tmp12 = twelve_row * EEO;<br>
<br>
int dst_zero = horizontal_add(dst_tmp0);<br>
int dst_four = horizontal_add(dst_tmp4);<br>
@@ -714,10 +714,10 @@<br>
dst[4 * line] = dst_shift_result[2];<br>
dst[12 * line] = dst_shift_result[3];<br>
<br>
- Vec4i dst_tmp2 = g_aiT_two_row * EO;<br>
- Vec4i dst_tmp6 = g_aiT_six_row * EO;<br>
- Vec4i dst_tmp10 = g_aiT_ten_row * EO;<br>
- Vec4i dst_tmp14 = g_aiT_fourteen_row * EO;<br>
+ Vec4i dst_tmp2 = two_row * EO;<br>
+ Vec4i dst_tmp6 = six_row * EO;<br>
+ Vec4i dst_tmp10 = ten_row * EO;<br>
+ Vec4i dst_tmp14 = fourteen_row * EO;<br>
<br>
int dst_two = horizontal_add(dst_tmp2);<br>
int dst_six = horizontal_add(dst_tmp6);<br>
@@ -733,22 +733,22 @@<br>
dst[10 * line] = dst_2_6_10_14[2];<br>
dst[14 * line] = dst_2_6_10_14[3];<br>
<br>
- Vec4i dst_tmp1_first_half = g_aiT_one_row_first_half * O_first_half;<br>
- Vec4i dst_tmp1_second_half = g_aiT_one_row_second_half * O_second_half;<br>
- Vec4i dst_tmp3_first_half = g_aiT_three_row_first_half * O_first_half;<br>
- Vec4i dst_tmp3_second_half = g_aiT_three_row_second_half * O_second_half;<br>
- Vec4i dst_tmp5_first_half = g_aiT_five_row_first_half * O_first_half;<br>
- Vec4i dst_tmp5_second_half = g_aiT_five_row_second_half * O_second_half;<br>
- Vec4i dst_tmp7_first_half = g_aiT_seven_row_first_half * O_first_half;<br>
- Vec4i dst_tmp7_second_half = g_aiT_seven_row_second_half * O_second_half;<br>
- Vec4i dst_tmp9_first_half = g_aiT_nine_row_first_half * O_first_half;<br>
- Vec4i dst_tmp9_second_half = g_aiT_nine_row_second_half * O_second_half;<br>
- Vec4i dst_tmp11_first_half = g_aiT_eleven_row_first_half * O_first_half;<br>
- Vec4i dst_tmp11_second_half = g_aiT_eleven_row_second_half * O_second_half;<br>
- Vec4i dst_tmp13_first_half = g_aiT_thirteen_row_first_half * O_first_half;<br>
- Vec4i dst_tmp13_second_half = g_aiT_thirteen_row_second_half * O_second_half;<br>
- Vec4i dst_tmp15_first_half = g_aiT_fifteen_row_first_half * O_first_half;<br>
- Vec4i dst_tmp15_second_half = g_aiT_fifteen_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp1_first_half = one_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp1_second_half = one_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp3_first_half = three_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp3_second_half = three_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp5_first_half = five_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp5_second_half = five_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp7_first_half = seven_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp7_second_half = seven_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp9_first_half = nine_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp9_second_half = nine_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp11_first_half = eleven_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp11_second_half = eleven_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp13_first_half = thirteen_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp13_second_half = thirteen_row_second_half * O_second_half;<br>
+ Vec4i dst_tmp15_first_half = fifteen_row_first_half * O_first_half;<br>
+ Vec4i dst_tmp15_second_half = fifteen_row_second_half * O_second_half;<br>
<br>
int dst_one = horizontal_add(dst_tmp1_first_half) + horizontal_add(dst_tmp1_second_half);<br>
int dst_three = horizontal_add(dst_tmp3_first_half) + horizontal_add(dst_tmp3_second_half);<br>
@@ -781,7 +781,7 @@<br>
}<br>
}<br>
<br>
-void xDCT16(short *src, int *dst, intptr_t nStride)<br>
+void dct16(short *src, int *dst, intptr_t stride)<br>
{<br>
const int shift_1st = 3;<br>
const int shift_2nd = 10;<br>
@@ -791,7 +791,7 @@<br>
<br>
for (int i = 0; i < 16; i++)<br>
{<br>
- memcpy(&block[i * 16], &src[i * nStride], 16 * sizeof(short));<br>
+ memcpy(&block[i * 16], &src[i * stride], 16 * sizeof(short));<br>
}<br>
<br>
partialButterfly16(block, coef, shift_1st, 16);<br>
@@ -851,7 +851,7 @@<br>
#undef MAKE_COEF<br>
};<br>
<br>
-void xDCT16(short *src, int *dst, intptr_t nStride)<br>
+void dct16(short *src, int *dst, intptr_t stride)<br>
{<br>
// Const<br>
__m128i c_4 = _mm_set1_epi32(4);<br>
@@ -874,22 +874,22 @@<br>
// DCT1<br>
for (i = 0; i < 16; i += 8)<br>
{<br>
- T00A = _mm_load_si128((__m128i*)&src[(i + 0) * nStride + 0]); // [07 06 05 04 03 02 01 00]<br>
- T00B = _mm_load_si128((__m128i*)&src[(i + 0) * nStride + 8]); // [0F 0E 0D 0C 0B 0A 09 08]<br>
- T01A = _mm_load_si128((__m128i*)&src[(i + 1) * nStride + 0]); // [17 16 15 14 13 12 11 10]<br>
- T01B = _mm_load_si128((__m128i*)&src[(i + 1) * nStride + 8]); // [1F 1E 1D 1C 1B 1A 19 18]<br>
- T02A = _mm_load_si128((__m128i*)&src[(i + 2) * nStride + 0]); // [27 26 25 24 23 22 21 20]<br>
- T02B = _mm_load_si128((__m128i*)&src[(i + 2) * nStride + 8]); // [2F 2E 2D 2C 2B 2A 29 28]<br>
- T03A = _mm_load_si128((__m128i*)&src[(i + 3) * nStride + 0]); // [37 36 35 34 33 32 31 30]<br>
- T03B = _mm_load_si128((__m128i*)&src[(i + 3) * nStride + 8]); // [3F 3E 3D 3C 3B 3A 39 38]<br>
- T04A = _mm_load_si128((__m128i*)&src[(i + 4) * nStride + 0]); // [47 46 45 44 43 42 41 40]<br>
- T04B = _mm_load_si128((__m128i*)&src[(i + 4) * nStride + 8]); // [4F 4E 4D 4C 4B 4A 49 48]<br>
- T05A = _mm_load_si128((__m128i*)&src[(i + 5) * nStride + 0]); // [57 56 55 54 53 52 51 50]<br>
- T05B = _mm_load_si128((__m128i*)&src[(i + 5) * nStride + 8]); // [5F 5E 5D 5C 5B 5A 59 58]<br>
- T06A = _mm_load_si128((__m128i*)&src[(i + 6) * nStride + 0]); // [67 66 65 64 63 62 61 60]<br>
- T06B = _mm_load_si128((__m128i*)&src[(i + 6) * nStride + 8]); // [6F 6E 6D 6C 6B 6A 69 68]<br>
- T07A = _mm_load_si128((__m128i*)&src[(i + 7) * nStride + 0]); // [77 76 75 74 73 72 71 70]<br>
- T07B = _mm_load_si128((__m128i*)&src[(i + 7) * nStride + 8]); // [7F 7E 7D 7C 7B 7A 79 78]<br>
+ T00A = _mm_load_si128((__m128i*)&src[(i + 0) * stride + 0]); // [07 06 05 04 03 02 01 00]<br>
+ T00B = _mm_load_si128((__m128i*)&src[(i + 0) * stride + 8]); // [0F 0E 0D 0C 0B 0A 09 08]<br>
+ T01A = _mm_load_si128((__m128i*)&src[(i + 1) * stride + 0]); // [17 16 15 14 13 12 11 10]<br>
+ T01B = _mm_load_si128((__m128i*)&src[(i + 1) * stride + 8]); // [1F 1E 1D 1C 1B 1A 19 18]<br>
+ T02A = _mm_load_si128((__m128i*)&src[(i + 2) * stride + 0]); // [27 26 25 24 23 22 21 20]<br>
+ T02B = _mm_load_si128((__m128i*)&src[(i + 2) * stride + 8]); // [2F 2E 2D 2C 2B 2A 29 28]<br>
+ T03A = _mm_load_si128((__m128i*)&src[(i + 3) * stride + 0]); // [37 36 35 34 33 32 31 30]<br>
+ T03B = _mm_load_si128((__m128i*)&src[(i + 3) * stride + 8]); // [3F 3E 3D 3C 3B 3A 39 38]<br>
+ T04A = _mm_load_si128((__m128i*)&src[(i + 4) * stride + 0]); // [47 46 45 44 43 42 41 40]<br>
+ T04B = _mm_load_si128((__m128i*)&src[(i + 4) * stride + 8]); // [4F 4E 4D 4C 4B 4A 49 48]<br>
+ T05A = _mm_load_si128((__m128i*)&src[(i + 5) * stride + 0]); // [57 56 55 54 53 52 51 50]<br>
+ T05B = _mm_load_si128((__m128i*)&src[(i + 5) * stride + 8]); // [5F 5E 5D 5C 5B 5A 59 58]<br>
+ T06A = _mm_load_si128((__m128i*)&src[(i + 6) * stride + 0]); // [67 66 65 64 63 62 61 60]<br>
+ T06B = _mm_load_si128((__m128i*)&src[(i + 6) * stride + 8]); // [6F 6E 6D 6C 6B 6A 69 68]<br>
+ T07A = _mm_load_si128((__m128i*)&src[(i + 7) * stride + 0]); // [77 76 75 74 73 72 71 70]<br>
+ T07B = _mm_load_si128((__m128i*)&src[(i + 7) * stride + 8]); // [7F 7E 7D 7C 7B 7A 79 78]<br>
<br>
T00B = _mm_shuffle_epi8(T00B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));<br>
T01B = _mm_shuffle_epi8(T01B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));<br>
@@ -4089,7 +4089,7 @@<br>
p.dct[DST_4x4] = dst4;<br>
p.dct[DCT_4x4] = dct4;<br>
p.dct[DCT_8x8] = dct8;<br>
- p.dct[DCT_16x16] = xDCT16;<br>
+ p.dct[DCT_16x16] = dct16;<br>
p.dct[DCT_32x32] = xDCT32;<br>
#endif<br>
// TODO: I am not sure the IDCT works on 16bpp mode<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="http://mailman.videolan.org/listinfo/x265-devel" target="_blank">http://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>