[x265] [PATCH] xDCT32 renamed and code cleanup

praveen at multicorewareinc.com praveen at multicorewareinc.com
Mon Jul 8 14:26:51 CEST 2013


# HG changeset patch
# User praveentiwari
# Date 1373286393 -19800
# Node ID 3b1d07683c7d5119c76ca3534f221eb483511e76
# Parent  09b2b2443374815174a7b41ad47c2676a9138a59
xDCT32 renamed and code cleanup

diff -r 09b2b2443374 -r 3b1d07683c7d source/common/vec/dct.inc
--- a/source/common/vec/dct.inc	Mon Jul 08 17:44:01 2013 +0530
+++ b/source/common/vec/dct.inc	Mon Jul 08 17:56:33 2013 +0530
@@ -1266,112 +1266,112 @@
     int j;
     int add = 1 << (shift - 1);
 
-    Vec4i g_aiT_zero_row_first_two(64, 64, 0, 0);
-    Vec4i g_aiT_eight_row_first_two(83, 36, 0, 0);
-    Vec4i g_aiT_sixten_row_first_two(64, -64, 0, 0);
-    Vec4i g_aiT_twentyfour_row_first_two(36, -83, 0, 0);
-
-    Vec4i g_aiT_four_row_first_four(89, 75, 50, 18);
-    Vec4i g_aiT_twelve_row_first_four(75, -18, -89, -50);
-    Vec4i g_aiT_twenty_row_first_four(50, -89, 18, 75);
-    Vec4i g_aiT_twentyeight_row_first_four(18, -50, 75, -89);
-
-    Vec4i g_aiT_two_row_first_four(90, 87, 80, 70);
-    Vec4i g_aiT_two_row_second_four(57, 43, 25,  9);
-    Vec4i g_aiT_six_row_first_four(87, 57,  9, -43);
-    Vec4i g_aiT_six_row_second_four(-80, -90, -70, -25);
-    Vec4i g_aiT_ten_row_first_four(80,  9, -70, -87);
-    Vec4i g_aiT_ten_row_second_four(-25, 57, 90, 43);
-    Vec4i g_aiT_fourteen_row_first_four(70, -43, -87,  9);
-    Vec4i g_aiT_fourteen_row_second_four(90, 25, -80, -57);
-    Vec4i g_aiT_eighteen_row_first_four(57, -80, -25, 90);
-    Vec4i g_aiT_eighteen_row_second_four(-9, -87, 43, 70);
-    Vec4i g_aiT_twentytwo_row_first_four(43, -90, 57, 25);
-    Vec4i g_aiT_twentytwo_row_second_four(-87, 70,  9, -80);
-    Vec4i g_aiT_twentysix_row_first_four(25, -70, 90, -80);
-    Vec4i g_aiT_twentysix_row_second_four(43,  9, -57, 87);
-    Vec4i g_aiT_thirty_row_first_four(9, -25, 43, -57);
-    Vec4i g_aiT_thirty_row_second_four(70, -80, 87, -90);
-
-    Vec4i g_aiT_one_row_first_four(90, 90, 88, 85);
-    Vec4i g_aiT_one_row_second_four(82, 78, 73, 67);
-    Vec4i g_aiT_one_row_third_four(61, 54, 46, 38);
-    Vec4i g_aiT_one_row_fourth_four(31, 22, 13,  4);
-
-    Vec4i g_aiT_three_row_first_four(90, 82, 67, 46);
-    Vec4i g_aiT_three_row_second_four(22, -4, -31, -54);
-    Vec4i g_aiT_three_row_third_four(-73, -85, -90, -88);
-    Vec4i g_aiT_three_row_fourth_four(-78, -61, -38, -13);
-
-    Vec4i g_aiT_five_row_first_four(88, 67, 31, -13);
-    Vec4i g_aiT_five_row_second_four(-54, -82, -90, -78);
-    Vec4i g_aiT_five_row_third_four(-46, -4, 38, 73);
-    Vec4i g_aiT_five_row_fourth_four(90, 85, 61, 22);
-
-    Vec4i g_aiT_seven_row_first_four(85, 46, -13, -67);
-    Vec4i g_aiT_seven_row_second_four(-90, -73, -22, 38);
-    Vec4i g_aiT_seven_row_third_four(82, 88, 54, -4);
-    Vec4i g_aiT_seven_row_fourth_four(-61, -90, -78, -31);
-
-    Vec4i g_aiT_nine_row_first_four(82, 22, -54, -90);
-    Vec4i g_aiT_nine_row_second_four(-61, 13, 78, 85);
-    Vec4i g_aiT_nine_row_third_four(31, -46, -90, -67);
-    Vec4i g_aiT_nine_row_fourth_four(4, 73, 88, 38);
-
-    Vec4i g_aiT_eleven_row_first_four(78, -4, -82, -73);
-    Vec4i g_aiT_eleven_row_second_four(13, 85, 67, -22);
-    Vec4i g_aiT_eleven_row_third_four(-88, -61, 31, 90);
-    Vec4i g_aiT_eleven_row_fourth_four(54, -38, -90, -46);
-
-    Vec4i g_aiT_thirteen_row_first_four(73, -31, -90, -22);
-    Vec4i g_aiT_thirteen_row_second_four(78, 67, -38, -90);
-    Vec4i g_aiT_thirteen_row_third_four(-13, 82, 61, -46);
-    Vec4i g_aiT_thirteen_row_fourth_four(-88, -4, 85, 54);
-
-    Vec4i g_aiT_fifteen_row_first_four(67, -54, -78, 38);
-    Vec4i g_aiT_fifteen_row_second_four(85, -22, -90,  4);
-    Vec4i g_aiT_fifteen_row_third_four(90, 13, -88, -31);
-    Vec4i g_aiT_fifteen_row_fourth_four(82, 46, -73, -61);
-
-    Vec4i g_aiT_seventeen_row_first_four(61, -73, -46, 82);
-    Vec4i g_aiT_seventeen_row_second_four(31, -88, -13, 90);
-    Vec4i g_aiT_seventeen_row_third_four(-4, -90, 22, 85);
-    Vec4i g_aiT_seventeen_row_fourth_four(-38, -78, 54, 67);
-
-    Vec4i g_aiT_nineteen_row_first_four(54, -85, -4, 88);
-    Vec4i g_aiT_nineteen_row_second_four(-46, -61, 82, 13);
-    Vec4i g_aiT_nineteen_row_third_four(-90, 38, 67, -78);
-    Vec4i g_aiT_nineteen_row_fourth_four(-22, 90, -31, -73);
-
-    Vec4i g_aiT_twentyone_row_first_four(46, -90, 38, 54);
-    Vec4i g_aiT_twentyone_row_second_four(-90, 31, 61, -88);
-    Vec4i g_aiT_twentyone_row_third_four(22, 67, -85, 13);
-    Vec4i g_aiT_twentyone_row_fourth_four(73, -82,  4, 78);
-
-    Vec4i g_aiT_twentythree_row_first_four(38, -88, 73, -4);
-    Vec4i g_aiT_twentythree_row_second_four(-67, 90, -46, -31);
-    Vec4i g_aiT_twentythree_row_third_four(85, -78, 13, 61);
-    Vec4i g_aiT_twentythree_row_fourth_four(-90, 54, 22, -82);
-
-    Vec4i g_aiT_twentyfive_row_first_four(31, -78, 90, -61);
-    Vec4i g_aiT_twentyfive_row_second_four(4, 54, -88, 82);
-    Vec4i g_aiT_twentyfive_row_third_four(-38, -22, 73, -90);
-    Vec4i g_aiT_twentyfive_row_fourth_four(67, -13, -46, 85);
-
-    Vec4i g_aiT_twentyseven_row_first_four(22, -61, 85, -90);
-    Vec4i g_aiT_twentyseven_row_second_four(73, -38, -4, 46);
-    Vec4i g_aiT_twentyseven_row_third_four(-78, 90, -82, 54);
-    Vec4i g_aiT_twentyseven_row_fourth_four(-13, -31, 67, -88);
-
-    Vec4i g_aiT_twentynine_row_first_four(13, -38, 61, -78);
-    Vec4i g_aiT_twentynine_row_second_four(88, -90, 85, -73);
-    Vec4i g_aiT_twentynine_row_third_four(54, -31,  4, 22);
-    Vec4i g_aiT_twentynine_row_fourth_four(-46, 67, -82, 90);
-
-    Vec4i g_aiT_thirtyone_row_first_four(4, -13, 22, -31);
-    Vec4i g_aiT_thirtyone_row_second_four(38, -46, 54, -61);
-    Vec4i g_aiT_thirtyone_row_third_four(67, -73, 78, -82);
-    Vec4i g_aiT_thirtyone_row_fourth_four(85, -88, 90, -90);
+    Vec4i zero_row_first_two(64, 64, 0, 0);
+    Vec4i eight_row_first_two(83, 36, 0, 0);
+    Vec4i sixten_row_first_two(64, -64, 0, 0);
+    Vec4i twentyfour_row_first_two(36, -83, 0, 0);
+
+    Vec4i four_row_first_four(89, 75, 50, 18);
+    Vec4i twelve_row_first_four(75, -18, -89, -50);
+    Vec4i twenty_row_first_four(50, -89, 18, 75);
+    Vec4i twentyeight_row_first_four(18, -50, 75, -89);
+
+    Vec4i two_row_first_four(90, 87, 80, 70);
+    Vec4i two_row_second_four(57, 43, 25,  9);
+    Vec4i six_row_first_four(87, 57,  9, -43);
+    Vec4i six_row_second_four(-80, -90, -70, -25);
+    Vec4i ten_row_first_four(80,  9, -70, -87);
+    Vec4i ten_row_second_four(-25, 57, 90, 43);
+    Vec4i fourteen_row_first_four(70, -43, -87,  9);
+    Vec4i fourteen_row_second_four(90, 25, -80, -57);
+    Vec4i eighteen_row_first_four(57, -80, -25, 90);
+    Vec4i eighteen_row_second_four(-9, -87, 43, 70);
+    Vec4i twentytwo_row_first_four(43, -90, 57, 25);
+    Vec4i twentytwo_row_second_four(-87, 70,  9, -80);
+    Vec4i twentysix_row_first_four(25, -70, 90, -80);
+    Vec4i twentysix_row_second_four(43,  9, -57, 87);
+    Vec4i thirty_row_first_four(9, -25, 43, -57);
+    Vec4i thirty_row_second_four(70, -80, 87, -90);
+
+    Vec4i one_row_first_four(90, 90, 88, 85);
+    Vec4i one_row_second_four(82, 78, 73, 67);
+    Vec4i one_row_third_four(61, 54, 46, 38);
+    Vec4i one_row_fourth_four(31, 22, 13,  4);
+
+    Vec4i three_row_first_four(90, 82, 67, 46);
+    Vec4i three_row_second_four(22, -4, -31, -54);
+    Vec4i three_row_third_four(-73, -85, -90, -88);
+    Vec4i three_row_fourth_four(-78, -61, -38, -13);
+
+    Vec4i five_row_first_four(88, 67, 31, -13);
+    Vec4i five_row_second_four(-54, -82, -90, -78);
+    Vec4i five_row_third_four(-46, -4, 38, 73);
+    Vec4i five_row_fourth_four(90, 85, 61, 22);
+
+    Vec4i seven_row_first_four(85, 46, -13, -67);
+    Vec4i seven_row_second_four(-90, -73, -22, 38);
+    Vec4i seven_row_third_four(82, 88, 54, -4);
+    Vec4i seven_row_fourth_four(-61, -90, -78, -31);
+
+    Vec4i nine_row_first_four(82, 22, -54, -90);
+    Vec4i nine_row_second_four(-61, 13, 78, 85);
+    Vec4i nine_row_third_four(31, -46, -90, -67);
+    Vec4i nine_row_fourth_four(4, 73, 88, 38);
+
+    Vec4i eleven_row_first_four(78, -4, -82, -73);
+    Vec4i eleven_row_second_four(13, 85, 67, -22);
+    Vec4i eleven_row_third_four(-88, -61, 31, 90);
+    Vec4i eleven_row_fourth_four(54, -38, -90, -46);
+
+    Vec4i thirteen_row_first_four(73, -31, -90, -22);
+    Vec4i thirteen_row_second_four(78, 67, -38, -90);
+    Vec4i thirteen_row_third_four(-13, 82, 61, -46);
+    Vec4i thirteen_row_fourth_four(-88, -4, 85, 54);
+
+    Vec4i fifteen_row_first_four(67, -54, -78, 38);
+    Vec4i fifteen_row_second_four(85, -22, -90,  4);
+    Vec4i fifteen_row_third_four(90, 13, -88, -31);
+    Vec4i fifteen_row_fourth_four(82, 46, -73, -61);
+
+    Vec4i seventeen_row_first_four(61, -73, -46, 82);
+    Vec4i seventeen_row_second_four(31, -88, -13, 90);
+    Vec4i seventeen_row_third_four(-4, -90, 22, 85);
+    Vec4i seventeen_row_fourth_four(-38, -78, 54, 67);
+
+    Vec4i nineteen_row_first_four(54, -85, -4, 88);
+    Vec4i nineteen_row_second_four(-46, -61, 82, 13);
+    Vec4i nineteen_row_third_four(-90, 38, 67, -78);
+    Vec4i nineteen_row_fourth_four(-22, 90, -31, -73);
+
+    Vec4i twentyone_row_first_four(46, -90, 38, 54);
+    Vec4i twentyone_row_second_four(-90, 31, 61, -88);
+    Vec4i twentyone_row_third_four(22, 67, -85, 13);
+    Vec4i twentyone_row_fourth_four(73, -82,  4, 78);
+
+    Vec4i twentythree_row_first_four(38, -88, 73, -4);
+    Vec4i twentythree_row_second_four(-67, 90, -46, -31);
+    Vec4i twentythree_row_third_four(85, -78, 13, 61);
+    Vec4i twentythree_row_fourth_four(-90, 54, 22, -82);
+
+    Vec4i twentyfive_row_first_four(31, -78, 90, -61);
+    Vec4i twentyfive_row_second_four(4, 54, -88, 82);
+    Vec4i twentyfive_row_third_four(-38, -22, 73, -90);
+    Vec4i twentyfive_row_fourth_four(67, -13, -46, 85);
+
+    Vec4i twentyseven_row_first_four(22, -61, 85, -90);
+    Vec4i twentyseven_row_second_four(73, -38, -4, 46);
+    Vec4i twentyseven_row_third_four(-78, 90, -82, 54);
+    Vec4i twentyseven_row_fourth_four(-13, -31, 67, -88);
+
+    Vec4i twentynine_row_first_four(13, -38, 61, -78);
+    Vec4i twentynine_row_second_four(88, -90, 85, -73);
+    Vec4i twentynine_row_third_four(54, -31,  4, 22);
+    Vec4i twentynine_row_fourth_four(-46, 67, -82, 90);
+
+    Vec4i thirtyone_row_first_four(4, -13, 22, -31);
+    Vec4i thirtyone_row_second_four(38, -46, 54, -61);
+    Vec4i thirtyone_row_third_four(67, -73, 78, -82);
+    Vec4i thirtyone_row_fourth_four(85, -88, 90, -90);
 
     for (j = 0; j < line; j++)
     {
@@ -1425,20 +1425,20 @@
         Vec4i EEEE = EEEE_first_half + EEEE_second_half;
         Vec4i EEEO = EEEE_first_half - EEEE_second_half;
 
-        int dst0_hresult = (horizontal_add(g_aiT_zero_row_first_two * EEEE) + add) >> shift;
-        int dst8_hresult = (horizontal_add(g_aiT_eight_row_first_two * EEEO) + add) >> shift;
-        int dst16_hresult = (horizontal_add(g_aiT_sixten_row_first_two * EEEE) + add) >> shift;
-        int dst24_hresult = (horizontal_add(g_aiT_twentyfour_row_first_two * EEEO) + add) >> shift;
+        int dst0_hresult = (horizontal_add(zero_row_first_two * EEEE) + add) >> shift;
+        int dst8_hresult = (horizontal_add(eight_row_first_two * EEEO) + add) >> shift;
+        int dst16_hresult = (horizontal_add(sixten_row_first_two * EEEE) + add) >> shift;
+        int dst24_hresult = (horizontal_add(twentyfour_row_first_two * EEEO) + add) >> shift;
 
         dst[0] = dst0_hresult;
         dst[8 * line] = dst8_hresult;
         dst[16 * line] = dst16_hresult;
         dst[24 * line] = dst24_hresult;
 
-        int dst4_hresult = (horizontal_add(g_aiT_four_row_first_four * EEO) + add) >> shift;
-        int dst12_hresult = (horizontal_add(g_aiT_twelve_row_first_four * EEO) + add) >> shift;
-        int dst20_hresult = (horizontal_add(g_aiT_twenty_row_first_four * EEO) + add) >> shift;
-        int dst28_hresult = (horizontal_add(g_aiT_twentyeight_row_first_four * EEO) + add) >> shift;
+        int dst4_hresult = (horizontal_add(four_row_first_four * EEO) + add) >> shift;
+        int dst12_hresult = (horizontal_add(twelve_row_first_four * EEO) + add) >> shift;
+        int dst20_hresult = (horizontal_add(twenty_row_first_four * EEO) + add) >> shift;
+        int dst28_hresult = (horizontal_add(twentyeight_row_first_four * EEO) + add) >> shift;
 
         dst[4 * line] = dst4_hresult;
         dst[12 * line] = dst12_hresult;
@@ -1446,29 +1446,29 @@
         dst[28 * line] = dst28_hresult;
 
         int dst2_hresult =
-            (horizontal_add((g_aiT_two_row_first_four *
-                             EO_first_four) + (g_aiT_two_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((two_row_first_four *
+                             EO_first_four) + (two_row_second_four * EO_last_four)) + add) >> shift;
         int dst6_hresult =
-            (horizontal_add((g_aiT_six_row_first_four *
-                             EO_first_four) + (g_aiT_six_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((six_row_first_four *
+                             EO_first_four) + (six_row_second_four * EO_last_four)) + add) >> shift;
         int dst10_hresult =
-            (horizontal_add((g_aiT_ten_row_first_four *
-                             EO_first_four) + (g_aiT_ten_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((ten_row_first_four *
+                             EO_first_four) + (ten_row_second_four * EO_last_four)) + add) >> shift;
         int dst14_hresult =
-            (horizontal_add((g_aiT_fourteen_row_first_four *
-                             EO_first_four) + (g_aiT_fourteen_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((fourteen_row_first_four *
+                             EO_first_four) + (fourteen_row_second_four * EO_last_four)) + add) >> shift;
         int dst18_hresult =
-            (horizontal_add((g_aiT_eighteen_row_first_four *
-                             EO_first_four) + (g_aiT_eighteen_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((eighteen_row_first_four *
+                             EO_first_four) + (eighteen_row_second_four * EO_last_four)) + add) >> shift;
         int dst22_hresult =
-            (horizontal_add((g_aiT_twentytwo_row_first_four *
-                             EO_first_four) + (g_aiT_twentytwo_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((twentytwo_row_first_four *
+                             EO_first_four) + (twentytwo_row_second_four * EO_last_four)) + add) >> shift;
         int dst26_hresult =
-            (horizontal_add((g_aiT_twentysix_row_first_four *
-                             EO_first_four) + (g_aiT_twentysix_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((twentysix_row_first_four *
+                             EO_first_four) + (twentysix_row_second_four * EO_last_four)) + add) >> shift;
         int dst30_hresult =
-            (horizontal_add((g_aiT_thirty_row_first_four *
-                             EO_first_four) + (g_aiT_thirty_row_second_four * EO_last_four)) + add) >> shift;
+            (horizontal_add((thirty_row_first_four *
+                             EO_first_four) + (thirty_row_second_four * EO_last_four)) + add) >> shift;
 
         dst[2 * line] = dst2_hresult;
         dst[6 * line] = dst6_hresult;
@@ -1479,42 +1479,42 @@
         dst[26 * line] = dst26_hresult;
         dst[30 * line] = dst30_hresult;
 
-        Vec4i dst1_temp = (g_aiT_one_row_first_four * O_first_four) + (g_aiT_one_row_second_four * O_second_four) +
-            (g_aiT_one_row_third_four * O_third_four) + (g_aiT_one_row_fourth_four * O_last_four);
-        Vec4i dst3_temp = (g_aiT_three_row_first_four * O_first_four) + (g_aiT_three_row_second_four * O_second_four) +
-            (g_aiT_three_row_third_four * O_third_four) + (g_aiT_three_row_fourth_four * O_last_four);
-        Vec4i dst5_temp = (g_aiT_five_row_first_four * O_first_four) + (g_aiT_five_row_second_four * O_second_four) +
-            (g_aiT_five_row_third_four * O_third_four) + (g_aiT_five_row_fourth_four * O_last_four);
-        Vec4i dst7_temp = (g_aiT_seven_row_first_four * O_first_four) + (g_aiT_seven_row_second_four * O_second_four) +
-            (g_aiT_seven_row_third_four * O_third_four) + (g_aiT_seven_row_fourth_four * O_last_four);
-        Vec4i dst9_temp = (g_aiT_nine_row_first_four * O_first_four) + (g_aiT_nine_row_second_four * O_second_four) +
-            (g_aiT_nine_row_third_four * O_third_four) + (g_aiT_nine_row_fourth_four * O_last_four);
-        Vec4i dst11_temp = (g_aiT_eleven_row_first_four * O_first_four) + (g_aiT_eleven_row_second_four * O_second_four) +
-            (g_aiT_eleven_row_third_four * O_third_four) + (g_aiT_eleven_row_fourth_four * O_last_four);
-        Vec4i dst13_temp = (g_aiT_thirteen_row_first_four * O_first_four) + (g_aiT_thirteen_row_second_four * O_second_four) +
-            (g_aiT_thirteen_row_third_four * O_third_four) + (g_aiT_thirteen_row_fourth_four * O_last_four);
-        Vec4i dst15_temp = (g_aiT_fifteen_row_first_four * O_first_four) + (g_aiT_fifteen_row_second_four * O_second_four) +
-            (g_aiT_fifteen_row_third_four * O_third_four) + (g_aiT_fifteen_row_fourth_four * O_last_four);
-        Vec4i dst17_temp = (g_aiT_seventeen_row_first_four * O_first_four) + (g_aiT_seventeen_row_second_four * O_second_four) +
-            (g_aiT_seventeen_row_third_four * O_third_four) + (g_aiT_seventeen_row_fourth_four * O_last_four);
-        Vec4i dst19_temp = (g_aiT_nineteen_row_first_four * O_first_four) + (g_aiT_nineteen_row_second_four * O_second_four) +
-            (g_aiT_nineteen_row_third_four * O_third_four) + (g_aiT_nineteen_row_fourth_four * O_last_four);
-        Vec4i dst21_temp = (g_aiT_twentyone_row_first_four * O_first_four) + (g_aiT_twentyone_row_second_four * O_second_four) +
-            (g_aiT_twentyone_row_third_four * O_third_four) + (g_aiT_twentyone_row_fourth_four * O_last_four);
+        Vec4i dst1_temp = (one_row_first_four * O_first_four) + (one_row_second_four * O_second_four) +
+            (one_row_third_four * O_third_four) + (one_row_fourth_four * O_last_four);
+        Vec4i dst3_temp = (three_row_first_four * O_first_four) + (three_row_second_four * O_second_four) +
+            (three_row_third_four * O_third_four) + (three_row_fourth_four * O_last_four);
+        Vec4i dst5_temp = (five_row_first_four * O_first_four) + (five_row_second_four * O_second_four) +
+            (five_row_third_four * O_third_four) + (five_row_fourth_four * O_last_four);
+        Vec4i dst7_temp = (seven_row_first_four * O_first_four) + (seven_row_second_four * O_second_four) +
+            (seven_row_third_four * O_third_four) + (seven_row_fourth_four * O_last_four);
+        Vec4i dst9_temp = (nine_row_first_four * O_first_four) + (nine_row_second_four * O_second_four) +
+            (nine_row_third_four * O_third_four) + (nine_row_fourth_four * O_last_four);
+        Vec4i dst11_temp = (eleven_row_first_four * O_first_four) + (eleven_row_second_four * O_second_four) +
+            (eleven_row_third_four * O_third_four) + (eleven_row_fourth_four * O_last_four);
+        Vec4i dst13_temp = (thirteen_row_first_four * O_first_four) + (thirteen_row_second_four * O_second_four) +
+            (thirteen_row_third_four * O_third_four) + (thirteen_row_fourth_four * O_last_four);
+        Vec4i dst15_temp = (fifteen_row_first_four * O_first_four) + (fifteen_row_second_four * O_second_four) +
+            (fifteen_row_third_four * O_third_four) + (fifteen_row_fourth_four * O_last_four);
+        Vec4i dst17_temp = (seventeen_row_first_four * O_first_four) + (seventeen_row_second_four * O_second_four) +
+            (seventeen_row_third_four * O_third_four) + (seventeen_row_fourth_four * O_last_four);
+        Vec4i dst19_temp = (nineteen_row_first_four * O_first_four) + (nineteen_row_second_four * O_second_four) +
+            (nineteen_row_third_four * O_third_four) + (nineteen_row_fourth_four * O_last_four);
+        Vec4i dst21_temp = (twentyone_row_first_four * O_first_four) + (twentyone_row_second_four * O_second_four) +
+            (twentyone_row_third_four * O_third_four) + (twentyone_row_fourth_four * O_last_four);
         Vec4i dst23_temp =
-            (g_aiT_twentythree_row_first_four * O_first_four) + (g_aiT_twentythree_row_second_four * O_second_four) +
-            (g_aiT_twentythree_row_third_four * O_third_four) + (g_aiT_twentythree_row_fourth_four * O_last_four);
+            (twentythree_row_first_four * O_first_four) + (twentythree_row_second_four * O_second_four) +
+            (twentythree_row_third_four * O_third_four) + (twentythree_row_fourth_four * O_last_four);
         Vec4i dst25_temp =
-            (g_aiT_twentyfive_row_first_four * O_first_four) + (g_aiT_twentyfive_row_second_four * O_second_four) +
-            (g_aiT_twentyfive_row_third_four * O_third_four) + (g_aiT_twentyfive_row_fourth_four * O_last_four);
+            (twentyfive_row_first_four * O_first_four) + (twentyfive_row_second_four * O_second_four) +
+            (twentyfive_row_third_four * O_third_four) + (twentyfive_row_fourth_four * O_last_four);
         Vec4i dst27_temp =
-            (g_aiT_twentyseven_row_first_four * O_first_four) + (g_aiT_twentyseven_row_second_four * O_second_four) +
-            (g_aiT_twentyseven_row_third_four * O_third_four) + (g_aiT_twentyseven_row_fourth_four * O_last_four);
+            (twentyseven_row_first_four * O_first_four) + (twentyseven_row_second_four * O_second_four) +
+            (twentyseven_row_third_four * O_third_four) + (twentyseven_row_fourth_four * O_last_four);
         Vec4i dst29_temp =
-            (g_aiT_twentynine_row_first_four * O_first_four) + (g_aiT_twentynine_row_second_four * O_second_four) +
-            (g_aiT_twentynine_row_third_four * O_third_four) + (g_aiT_twentynine_row_fourth_four * O_last_four);
-        Vec4i dst31_temp = (g_aiT_thirtyone_row_first_four * O_first_four) + (g_aiT_thirtyone_row_second_four * O_second_four) +
-            (g_aiT_thirtyone_row_third_four * O_third_four) + (g_aiT_thirtyone_row_fourth_four * O_last_four);
+            (twentynine_row_first_four * O_first_four) + (twentynine_row_second_four * O_second_four) +
+            (twentynine_row_third_four * O_third_four) + (twentynine_row_fourth_four * O_last_four);
+        Vec4i dst31_temp = (thirtyone_row_first_four * O_first_four) + (thirtyone_row_second_four * O_second_four) +
+            (thirtyone_row_third_four * O_third_four) + (thirtyone_row_fourth_four * O_last_four);
 
         dst[1 * line] = (horizontal_add(dst1_temp) + add) >> shift;
         dst[3 * line] = (horizontal_add(dst3_temp) + add) >> shift;
@@ -1538,7 +1538,7 @@
     }
 }
 
-void xDCT32(short *src, int *dst, intptr_t nStride)
+void dct32(short *src, int *dst, intptr_t stride)
 {
     const int shift_1st = 4;
     const int shift_2nd = 11;
@@ -1548,7 +1548,7 @@
 
     for (int i = 0; i < 32; i++)
     {
-        memcpy(&block[i * 32], &src[i * nStride], 32 * sizeof(short));
+        memcpy(&block[i * 32], &src[i * stride], 32 * sizeof(short));
     }
 
     partialButterfly32(block, coef, shift_1st, 32);
@@ -1718,7 +1718,7 @@
 #undef MAKE_COEF16
 };
 
-void xDCT32(short *src, int *dst, intptr_t nStride)
+void dct32(short *src, int *dst, intptr_t stride)
 {
     // Const
     __m128i c_8     = _mm_set1_epi32(8);
@@ -1742,38 +1742,38 @@
     // DCT1
     for (i = 0; i < 32 / 8; i++)
     {
-        T00A = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * nStride + 0]);    // [07 06 05 04 03 02 01 00]
-        T00B = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * nStride + 8]);    // [15 14 13 12 11 10 09 08]
-        T00C = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * nStride + 16]);    // [23 22 21 20 19 18 17 16]
-        T00D = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * nStride + 24]);    // [31 30 29 28 27 26 25 24]
-        T01A = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * nStride + 0]);
-        T01B = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * nStride + 8]);
-        T01C = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * nStride + 16]);
-        T01D = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * nStride + 24]);
-        T02A = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * nStride + 0]);
-        T02B = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * nStride + 8]);
-        T02C = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * nStride + 16]);
-        T02D = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * nStride + 24]);
-        T03A = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * nStride + 0]);
-        T03B = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * nStride + 8]);
-        T03C = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * nStride + 16]);
-        T03D = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * nStride + 24]);
-        T04A = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * nStride + 0]);
-        T04B = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * nStride + 8]);
-        T04C = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * nStride + 16]);
-        T04D = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * nStride + 24]);
-        T05A = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * nStride + 0]);
-        T05B = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * nStride + 8]);
-        T05C = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * nStride + 16]);
-        T05D = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * nStride + 24]);
-        T06A = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * nStride + 0]);
-        T06B = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * nStride + 8]);
-        T06C = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * nStride + 16]);
-        T06D = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * nStride + 24]);
-        T07A = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * nStride + 0]);
-        T07B = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * nStride + 8]);
-        T07C = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * nStride + 16]);
-        T07D = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * nStride + 24]);
+        T00A = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 0]);    // [07 06 05 04 03 02 01 00]
+        T00B = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 8]);    // [15 14 13 12 11 10 09 08]
+        T00C = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 16]);    // [23 22 21 20 19 18 17 16]
+        T00D = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 24]);    // [31 30 29 28 27 26 25 24]
+        T01A = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 0]);
+        T01B = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 8]);
+        T01C = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 16]);
+        T01D = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 24]);
+        T02A = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 0]);
+        T02B = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 8]);
+        T02C = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 16]);
+        T02D = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 24]);
+        T03A = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 0]);
+        T03B = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 8]);
+        T03C = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 16]);
+        T03D = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 24]);
+        T04A = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 0]);
+        T04B = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 8]);
+        T04C = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 16]);
+        T04D = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 24]);
+        T05A = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 0]);
+        T05B = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 8]);
+        T05C = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 16]);
+        T05D = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 24]);
+        T06A = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 0]);
+        T06B = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 8]);
+        T06C = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 16]);
+        T06D = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 24]);
+        T07A = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 0]);
+        T07B = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 8]);
+        T07C = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 16]);
+        T07D = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 24]);
 
         T00A = _mm_shuffle_epi8(T00A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));    // [05 02 06 01 04 03 07 00]
         T00B = _mm_shuffle_epi8(T00B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));    // [10 13 09 14 11 12 08 15]
@@ -4090,7 +4090,7 @@
     p.dct[DCT_4x4] = dct4;
     p.dct[DCT_8x8] = dct8;
     p.dct[DCT_16x16] = dct16;
-    p.dct[DCT_32x32] = xDCT32;
+    p.dct[DCT_32x32] = dct32;
 #endif
     // TODO: I am not sure the IDCT works on 16bpp mode
     p.idct[IDST_4x4] = xIDST4;


More information about the x265-devel mailing list