[x265] primitives: intra_pred[4][35] => intra_pred[35][4] (avoid *35)

Satoshi Nakagawa nakagawa424 at oki.com
Fri Sep 19 09:38:42 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1411112115 -32400
#      Fri Sep 19 16:35:15 2014 +0900
# Node ID 3a2c1caf0f80e4ee2c1216636a3f9d067f719d6f
# Parent  4680ab4f92b8cc809b1e8dbc927126ec70bcc5c5
primitives: intra_pred[4][35] => intra_pred[35][4] (avoid *35)

diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/Lib/TLibCommon/TComPattern.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -68,9 +68,9 @@
 
     fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
 
-    bool bUseFilteredPredictions = (dirMode == ALL_IDX || (g_intraFilterFlags[dirMode] & tuSize));
+    bool bUseFilteredPredictions = (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize);
 
-    if (bUseFilteredPredictions && 8 <= tuSize && tuSize <= 32)
+    if (bUseFilteredPredictions)
     {
         // generate filtered intra prediction samples
         // left and left above border + above and above right border + top left corner = length of 3. filter buffer
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/Lib/TLibCommon/TComRom.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -497,7 +497,7 @@
 };
 
 /* g_intraFilterFlags[dir] & trSize */
-const uint8_t g_intraFilterFlags[35] =
+const uint8_t g_intraFilterFlags[NUM_INTRA_MODE] =
 {
     0x38, 0x00,
     0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/Lib/TLibCommon/TComRom.h	Fri Sep 19 16:35:15 2014 +0900
@@ -153,7 +153,7 @@
 extern const uint8_t x265_exp2_lut[64];
 
 // Intra tables
-extern const uint8_t g_intraFilterFlags[35];
+extern const uint8_t g_intraFilterFlags[NUM_INTRA_MODE];
 
 extern const uint32_t g_depthInc[3][4];
 
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/intrapred.cpp
--- a/source/common/intrapred.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/common/intrapred.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -281,22 +281,22 @@
 
 void Setup_C_IPredPrimitives(EncoderPrimitives& p)
 {
-    p.intra_pred[BLOCK_4x4][0] = planar_pred_c<2>;
-    p.intra_pred[BLOCK_8x8][0] = planar_pred_c<3>;
-    p.intra_pred[BLOCK_16x16][0] = planar_pred_c<4>;
-    p.intra_pred[BLOCK_32x32][0] = planar_pred_c<5>;
+    p.intra_pred[0][BLOCK_4x4] = planar_pred_c<2>;
+    p.intra_pred[0][BLOCK_8x8] = planar_pred_c<3>;
+    p.intra_pred[0][BLOCK_16x16] = planar_pred_c<4>;
+    p.intra_pred[0][BLOCK_32x32] = planar_pred_c<5>;
 
     // Intra Prediction DC
-    p.intra_pred[BLOCK_4x4][1] = intra_pred_dc_c<4>;
-    p.intra_pred[BLOCK_8x8][1] = intra_pred_dc_c<8>;
-    p.intra_pred[BLOCK_16x16][1] = intra_pred_dc_c<16>;
-    p.intra_pred[BLOCK_32x32][1] = intra_pred_dc_c<32>;
+    p.intra_pred[1][BLOCK_4x4] = intra_pred_dc_c<4>;
+    p.intra_pred[1][BLOCK_8x8] = intra_pred_dc_c<8>;
+    p.intra_pred[1][BLOCK_16x16] = intra_pred_dc_c<16>;
+    p.intra_pred[1][BLOCK_32x32] = intra_pred_dc_c<32>;
     for (int i = 2; i < NUM_INTRA_MODE; i++)
     {
-        p.intra_pred[BLOCK_4x4][i] = intra_pred_ang_c<4>;
-        p.intra_pred[BLOCK_8x8][i] = intra_pred_ang_c<8>;
-        p.intra_pred[BLOCK_16x16][i] = intra_pred_ang_c<16>;
-        p.intra_pred[BLOCK_32x32][i] = intra_pred_ang_c<32>;
+        p.intra_pred[i][BLOCK_4x4] = intra_pred_ang_c<4>;
+        p.intra_pred[i][BLOCK_8x8] = intra_pred_ang_c<8>;
+        p.intra_pred[i][BLOCK_16x16] = intra_pred_ang_c<16>;
+        p.intra_pred[i][BLOCK_32x32] = intra_pred_ang_c<32>;
     }
 
     p.intra_pred_allangs[BLOCK_4x4] = all_angs_pred_c<2>;
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/primitives.h
--- a/source/common/primitives.h	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/common/primitives.h	Fri Sep 19 16:35:15 2014 +0900
@@ -91,6 +91,8 @@
     NUM_SQUARE_BLOCKS
 };
 
+enum { NUM_TR_SIZE = 4 };
+
 // NOTE: Not all DCT functions support dest stride
 enum Dcts
 {
@@ -145,7 +147,6 @@
 typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, intptr_t sstride0, pixel *src1, intptr_t sstride1, int weight);
 typedef void (*blockfill_s_t)(int16_t *dst, intptr_t dstride, int16_t val);
 
-typedef void (*intra_planar_t)(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
 typedef void (*intra_pred_t)(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter);
 typedef void (*intra_allangs_t)(pixel *dst, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma);
 
@@ -259,8 +260,8 @@
     pixelavg_pp_t   pixelavg_pp[NUM_LUMA_PARTITIONS];
     addAvg_t        luma_addAvg[NUM_LUMA_PARTITIONS];
 
-    intra_pred_t    intra_pred[NUM_SQUARE_BLOCKS - 1][NUM_INTRA_MODE];
-    intra_allangs_t intra_pred_allangs[NUM_SQUARE_BLOCKS - 1];
+    intra_pred_t    intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE];
+    intra_allangs_t intra_pred_allangs[NUM_TR_SIZE];
     scale_t         scale1D_128to64;
     scale_t         scale2D_64to32;
 
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -932,26 +932,26 @@
     SETUP_CHROMA_ADDAVG_FUNC_DEF_422(32, 64, cpu);
 
 #define SETUP_INTRA_ANG_COMMON(mode, fno, cpu) \
-    p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
-    p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
-    p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
-    p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
+    p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
+    p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
+    p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
+    p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
 
 #define SETUP_INTRA_ANG(mode, fno, cpu) \
-    p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
-    p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
-    p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
+    p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu; \
+    p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
+    p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
 
 #define SETUP_INTRA_ANG4(mode, fno, cpu) \
-    p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
+    p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
 
 #define SETUP_INTRA_ANG16_32(mode, fno, cpu) \
-    p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
-    p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
+    p.intra_pred[mode][BLOCK_16x16] = x265_intra_pred_ang16_ ## fno ## _ ## cpu; \
+    p.intra_pred[mode][BLOCK_32x32] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
 
 #define SETUP_INTRA_ANG4_8(mode, fno, cpu) \
-    p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
-    p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu;
+    p.intra_pred[mode][BLOCK_4x4] = x265_intra_pred_ang4_ ## fno ## _ ## cpu; \
+    p.intra_pred[mode][BLOCK_8x8] = x265_intra_pred_ang8_ ## fno ## _ ## cpu;
 
 #define INTRA_ANG_SSSE3(cpu) \
     SETUP_INTRA_ANG_COMMON(2, 2, cpu); \
@@ -1249,11 +1249,11 @@
 
         if (mode < 18)
         {
-            primitives.intra_pred[sizeIdx][mode](buffer, size, left, above, mode, bLuma);
+            primitives.intra_pred[mode][sizeIdx](buffer, size, left, above, mode, bLuma);
             primitives.transpose[sizeIdx](out, buffer, size);
         }
         else
-            primitives.intra_pred[sizeIdx][mode](out, size, left, above, mode, bLuma);
+            primitives.intra_pred[mode][sizeIdx](out, size, left, above, mode, bLuma);
     }
 }
 #endif
@@ -1417,15 +1417,15 @@
         p.cvt16to32_shr[BLOCK_8x8] = x265_cvt16to32_shr_8_sse4;
         p.cvt16to32_shr[BLOCK_16x16] = x265_cvt16to32_shr_16_sse4;
         p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4;
-        p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
-        p.intra_pred[BLOCK_8x8][0] = x265_intra_pred_planar8_sse4;
-        p.intra_pred[BLOCK_16x16][0] = x265_intra_pred_planar16_sse4;
-        p.intra_pred[BLOCK_32x32][0] = x265_intra_pred_planar32_sse4;
+        p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4;
+        p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4;
+        p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4;
+        p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4;
 
-        p.intra_pred[BLOCK_4x4][1] = x265_intra_pred_dc4_sse4;
-        p.intra_pred[BLOCK_8x8][1] = x265_intra_pred_dc8_sse4;
-        p.intra_pred[BLOCK_16x16][1] = x265_intra_pred_dc16_sse4;
-        p.intra_pred[BLOCK_32x32][1] = x265_intra_pred_dc32_sse4;
+        p.intra_pred[1][BLOCK_4x4] = x265_intra_pred_dc4_sse4;
+        p.intra_pred[1][BLOCK_8x8] = x265_intra_pred_dc8_sse4;
+        p.intra_pred[1][BLOCK_16x16] = x265_intra_pred_dc16_sse4;
+        p.intra_pred[1][BLOCK_32x32] = x265_intra_pred_dc32_sse4;
         p.planecopy_cp = x265_upShift_8_sse4;
 
         INTRA_ANG_SSE4_COMMON(sse4);
@@ -1670,20 +1670,20 @@
         p.dequant_normal = x265_dequant_normal_sse4;
         p.weight_pp = x265_weight_pp_sse4;
         p.weight_sp = x265_weight_sp_sse4;
-        p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
-        p.intra_pred[BLOCK_8x8][0] = x265_intra_pred_planar8_sse4;
-        p.intra_pred[BLOCK_16x16][0] = x265_intra_pred_planar16_sse4;
-        p.intra_pred[BLOCK_32x32][0] = x265_intra_pred_planar32_sse4;
+        p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4;
+        p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4;
+        p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4;
+        p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4;
 
         p.intra_pred_allangs[BLOCK_4x4] = x265_all_angs_pred_4x4_sse4;
         p.intra_pred_allangs[BLOCK_8x8] = x265_all_angs_pred_8x8_sse4;
         p.intra_pred_allangs[BLOCK_16x16] = x265_all_angs_pred_16x16_sse4;
         p.intra_pred_allangs[BLOCK_32x32] = x265_all_angs_pred_32x32_sse4;
 
-        p.intra_pred[BLOCK_4x4][1] = x265_intra_pred_dc4_sse4;
-        p.intra_pred[BLOCK_8x8][1] = x265_intra_pred_dc8_sse4;
-        p.intra_pred[BLOCK_16x16][1] = x265_intra_pred_dc16_sse4;
-        p.intra_pred[BLOCK_32x32][1] = x265_intra_pred_dc32_sse4;
+        p.intra_pred[1][BLOCK_4x4] = x265_intra_pred_dc4_sse4;
+        p.intra_pred[1][BLOCK_8x8] = x265_intra_pred_dc8_sse4;
+        p.intra_pred[1][BLOCK_16x16] = x265_intra_pred_dc16_sse4;
+        p.intra_pred[1][BLOCK_32x32] = x265_intra_pred_dc32_sse4;
 
         INTRA_ANG_SSE4_COMMON(sse4);
         INTRA_ANG_SSE4(sse4);
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/common/x86/intrapred.h	Fri Sep 19 16:35:15 2014 +0900
@@ -31,10 +31,10 @@
 void x265_intra_pred_dc16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
 void x265_intra_pred_dc32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
 
-void x265_intra_pred_planar4_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_planar8_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_planar16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_planar32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
+void x265_intra_pred_planar4_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
+void x265_intra_pred_planar8_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
+void x265_intra_pred_planar16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
+void x265_intra_pred_planar32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
 
 #define DECL_ANG(bsize, mode, cpu) \
     void x265_intra_pred_ang ## bsize ## _ ## mode ## _ ## cpu(pixel * dst, intptr_t dstStride, pixel * refLeft, pixel * refAbove, int dirMode, int bFilter);
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/encoder/analysis.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -1840,7 +1840,7 @@
     uint32_t rbits = getIntraRemModeBits(cu, partOffset, depth, preds, mpms);
 
     // DC
-    primitives.intra_pred[sizeIdx][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+    primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
     bsad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
     bmode = mode = DC_IDX;
     bbits = (mpms & ((uint64_t)1 << mode)) ? getIntraModeBits(cu, mode, partOffset, depth) : rbits;
@@ -1849,14 +1849,14 @@
     pixel *abovePlanar   = above;
     pixel *leftPlanar    = left;
 
-    if (tuSize >= 8 && tuSize <= 32)
+    if (tuSize & (8 | 16 | 32))
     {
         abovePlanar = aboveFiltered;
         leftPlanar  = leftFiltered;
     }
 
     // PLANAR
-    primitives.intra_pred[sizeIdx][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+    primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
     sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
     mode = PLANAR_IDX;
     bits = (mpms & ((uint64_t)1 << mode)) ? getIntraModeBits(cu, mode, partOffset, depth) : rbits;
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/encoder/predict.cpp
--- a/source/encoder/predict.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/encoder/predict.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -89,10 +89,10 @@
         refAbv = m_refAboveFlt + tuSize - 1;
     }
 
-    bool bFilter = log2TrSize <= 4 && dirMode != PLANAR_IDX;
+    bool bFilter = log2TrSize <= 4;
     int sizeIdx = log2TrSize - 2;
     X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
-    primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
+    primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter);
 }
 
 void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
@@ -101,7 +101,7 @@
     uint32_t tuSize2 = tuSize << 1;
 
     // Create the prediction
-    pixel refAbv[3 * MAX_CU_SIZE];
+    pixel* refAbv;
     pixel refLft[3 * MAX_CU_SIZE];
 
     bool bUseFilteredPredictions = (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize));
@@ -135,21 +135,21 @@
 
         // initialization of ADI buffers
         int limit = tuSize2 + 1;
-        memcpy(refAbv + tuSize - 1, filterBufN + tuSize2, limit * sizeof(pixel));
+        refAbv = filterBufN + tuSize2;
         for (int k = 0; k < limit; k++)
             refLft[k + tuSize - 1] = filterBufN[tuSize2 - k];   // Smoothened
     }
     else
     {
         int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
-        memcpy(refAbv + tuSize - 1, src, (limit) * sizeof(pixel));
+        refAbv = src;
         for (int k = 0; k < limit; k++)
             refLft[k + tuSize - 1] = src[k * ADI_BUF_STRIDE];
     }
 
     int sizeIdx = log2TrSizeC - 2;
     X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
-    primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
+    primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft + tuSize - 1, refAbv, dirMode, 0);
 }
 
 bool Predict::checkIdenticalMotion()
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/encoder/search.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -1285,7 +1285,7 @@
         uint64_t bcost;
 
         // DC
-        primitives.intra_pred[sizeIdx][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+        primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
         uint32_t bits = (mpms & ((uint64_t)1 << DC_IDX)) ? getIntraModeBits(cu, DC_IDX, partOffset, depth) : rbits;
         uint32_t sad  = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
         modeCosts[DC_IDX] = bcost = m_rdCost.calcRdSADCost(sad, bits);
@@ -1298,7 +1298,7 @@
             abovePlanar = aboveFiltered;
             leftPlanar  = leftFiltered;
         }
-        primitives.intra_pred[sizeIdx][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+        primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
         bits = (mpms & ((uint64_t)1 << PLANAR_IDX)) ? getIntraModeBits(cu, PLANAR_IDX, partOffset, depth) : rbits;
         sad  = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
         modeCosts[PLANAR_IDX] = m_rdCost.calcRdSADCost(sad, bits);
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/encoder/slicetype.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -1680,13 +1680,13 @@
         // generate 35 intra predictions into m_predictions
         pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
         int icost = m_me.COST_MAX, cost;
-        primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
+        primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
         cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
         if (cost < icost)
             icost = cost;
         pixel *above = (cuSize >= 8) ? above1 : above0;
         pixel *left  = (cuSize >= 8) ? left1 : left0;
-        primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0);
+        primitives.intra_pred[PLANAR_IDX][sizeIdx](m_predictions, cuSize, left, above, 0, 0);
         cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
         if (cost < icost)
             icost = cost;
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/test/intrapredharness.cpp
--- a/source/test/intrapredharness.cpp	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/test/intrapredharness.cpp	Fri Sep 19 16:35:15 2014 +0900
@@ -107,7 +107,7 @@
     return true;
 }
 
-bool IntraPredHarness::check_angular_primitive(const intra_pred_t ref[][NUM_INTRA_MODE], const intra_pred_t opt[][NUM_INTRA_MODE])
+bool IntraPredHarness::check_angular_primitive(const intra_pred_t ref[][NUM_TR_SIZE], const intra_pred_t opt[][NUM_TR_SIZE])
 {
     int j = ADI_BUF_STRIDE;
     intptr_t stride = FENC_STRIDE;
@@ -125,15 +125,15 @@
             int bFilter = (width <= 16) && (rand() % 2);
             for (int pmode = 2; pmode <= 34; pmode++)
             {
-                if (!opt[size - 2][pmode])
+                if (!opt[pmode][size - 2])
                     continue;
 
                 pixel * refAbove = pixel_buff + j;
                 pixel * refLeft = refAbove + 3 * width;
                 refLeft[0] = refAbove[0];
 
-                checked(opt[size - 2][pmode], pixel_out_vec, stride, refLeft, refAbove, pmode, bFilter);
-                ref[size - 2][pmode](pixel_out_c, stride, refLeft, refAbove, pmode, bFilter);
+                checked(opt[pmode][size - 2], pixel_out_vec, stride, refLeft, refAbove, pmode, bFilter);
+                ref[pmode][size - 2](pixel_out_c, stride, refLeft, refAbove, pmode, bFilter);
 
                 for (int k = 0; k < width; k++)
                 {
@@ -206,19 +206,19 @@
 {
     for (int i = BLOCK_4x4; i <= BLOCK_32x32; i++)
     {
-        if (opt.intra_pred[i][1])
+        if (opt.intra_pred[1][i])
         {
             const int size = (1 << (i + 2));
-            if (!check_dc_primitive(ref.intra_pred[i][1], opt.intra_pred[i][1], size))
+            if (!check_dc_primitive(ref.intra_pred[1][i], opt.intra_pred[1][i], size))
             {
                 printf("intra_dc %dx%d failed\n", size, size);
                 return false;
             }
         }
-        if (opt.intra_pred[i][0])
+        if (opt.intra_pred[0][i])
         {
             const int size = (1 << (i + 2));
-            if (!check_planar_primitive(ref.intra_pred[i][0], opt.intra_pred[i][0], size))
+            if (!check_planar_primitive(ref.intra_pred[0][i], opt.intra_pred[0][i], size))
             {
                 printf("intra_planar %dx%d failed\n", size, size);
                 return false;
@@ -253,22 +253,22 @@
     for (int i = BLOCK_4x4; i <= BLOCK_32x32; i++)
     {
         const int size = (1 << (i + 2));
-        if (opt.intra_pred[i][1])
+        if (opt.intra_pred[1][i])
         {
             printf("intra_dc_%dx%d[f=0]", size, size);
-            REPORT_SPEEDUP(opt.intra_pred[i][1], ref.intra_pred[i][1],
+            REPORT_SPEEDUP(opt.intra_pred[1][i], ref.intra_pred[1][i],
                            pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 0);
             if (size <= 16)
             {
                 printf("intra_dc_%dx%d[f=1]", size, size);
-                REPORT_SPEEDUP(opt.intra_pred[i][1], ref.intra_pred[i][1],
-                               pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 0);
+                REPORT_SPEEDUP(opt.intra_pred[1][i], ref.intra_pred[1][i],
+                               pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 1);
             }
         }
-        if (opt.intra_pred[i][0])
+        if (opt.intra_pred[0][i])
         {
             printf("intra_planar %2dx%d", size, size);
-            REPORT_SPEEDUP(opt.intra_pred[i][0], ref.intra_pred[i][0],
+            REPORT_SPEEDUP(opt.intra_pred[0][i], ref.intra_pred[0][i],
                            pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 0);
         }
         if (opt.intra_pred_allangs[i])
@@ -288,7 +288,7 @@
         for (int p = 2; p <= 34; p += 1)
         {
             int pmode = p;  //(rand()%33)+2;
-            if (opt.intra_pred[ii - 2][pmode])
+            if (opt.intra_pred[pmode][ii - 2])
             {
                 width = (1 << ii);
                 bool bFilter = (width <= 16);
@@ -296,7 +296,7 @@
                 pixel * refLeft = refAbove + 3 * width;
                 refLeft[0] = refAbove[0];
                 printf("intra_ang%dx%d[%2d]", width, width, pmode);
-                REPORT_SPEEDUP(opt.intra_pred[ii - 2][pmode], ref.intra_pred[ii - 2][pmode],
+                REPORT_SPEEDUP(opt.intra_pred[pmode][ii - 2], ref.intra_pred[pmode][ii - 2],
                                pixel_out_vec, FENC_STRIDE, refAbove, refLeft, pmode, bFilter);
             }
         }
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/test/intrapredharness.h
--- a/source/test/intrapredharness.h	Thu Sep 18 18:16:25 2014 +0530
+++ b/source/test/intrapredharness.h	Fri Sep 19 16:35:15 2014 +0900
@@ -43,7 +43,7 @@
 
     bool check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width);
     bool check_planar_primitive(intra_pred_t ref, intra_pred_t opt, int width);
-    bool check_angular_primitive(const intra_pred_t ref[][NUM_INTRA_MODE], const intra_pred_t opt[][NUM_INTRA_MODE]);
+    bool check_angular_primitive(const intra_pred_t ref[][NUM_TR_SIZE], const intra_pred_t opt[][NUM_TR_SIZE]);
     bool check_allangs_primitive(const intra_allangs_t ref[], const intra_allangs_t opt[]);
 
 public:


More information about the x265-devel mailing list