[x265] [PATCH] cleanup: merge Intra Pred PLANAR mode into intra_pred[]
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Dec 6 15:42:10 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1386340870 -19800
# Fri Dec 06 20:11:10 2013 +0530
# Node ID ae5ffba44f6166ded199b004534f4ba68d733b74
# Parent 56a17500909e007011386ed5a1529938379fc023
cleanup: merge Intra Pred PLANAR mode into intra_pred[]
diff -r 56a17500909e -r ae5ffba44f61 source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp Fri Dec 06 16:41:00 2013 +0530
+++ b/source/Lib/TLibCommon/TComPrediction.cpp Fri Dec 06 20:11:10 2013 +0530
@@ -159,7 +159,7 @@
// Create the prediction
if (dirMode == PLANAR_IDX)
{
- primitives.intra_pred_planar[log2BlkSize - 2](refAbv + 1, refLft + 1, dst, stride);
+ primitives.intra_pred[log2BlkSize - 2][PLANAR_IDX](dst, stride, refLft, refAbv, dirMode, 0);
}
else
{
@@ -186,7 +186,7 @@
// get starting pixel in block
if (dirMode == PLANAR_IDX)
{
- primitives.intra_pred_planar[log2BlkSize](refAbv + width - 1 + 1, refLft + width - 1 + 1, dst, stride);
+ primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft + width - 1, refAbv + width - 1, dirMode, 0);
}
else
{
diff -r 56a17500909e -r ae5ffba44f61 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Dec 06 16:41:00 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Dec 06 20:11:10 2013 +0530
@@ -1634,7 +1634,7 @@
}
// PLANAR
- primitives.intra_pred_planar[log2SizeMinus2](abovePlanar + 1, leftPlanar + 1, tmp, scaleStride);
+ primitives.intra_pred[log2SizeMinus2][PLANAR_IDX](tmp, scaleStride,leftPlanar, abovePlanar, 0, 0);
modeCosts[PLANAR_IDX] = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
// Transpose NxN
diff -r 56a17500909e -r ae5ffba44f61 source/common/intrapred.cpp
--- a/source/common/intrapred.cpp Fri Dec 06 16:41:00 2013 +0530
+++ b/source/common/intrapred.cpp Fri Dec 06 20:11:10 2013 +0530
@@ -102,8 +102,10 @@
}
template<int width>
-void planad_pred_c(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
+void planad_pred_c(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int /*dirMode*/, int /*bFilter*/)
{
+ above += 1;
+ left += 1;
int k, l;
pixel bottomLeft, topRight;
int horPred;
@@ -293,13 +295,10 @@
void Setup_C_IPredPrimitives(EncoderPrimitives& p)
{
- p.intra_pred_planar[BLOCK_4x4] = planad_pred_c<4>;
- p.intra_pred_planar[BLOCK_8x8] = planad_pred_c<8>;
- p.intra_pred_planar[BLOCK_16x16] = planad_pred_c<16>;
- p.intra_pred_planar[BLOCK_32x32] = planad_pred_c<32>;
-
- // TODO: Fill Planar mode
- p.intra_pred[BLOCK_4x4][0] = NULL;
+ p.intra_pred[BLOCK_4x4][0] = planad_pred_c<4>;
+ p.intra_pred[BLOCK_8x8][0] = planad_pred_c<8>;
+ p.intra_pred[BLOCK_16x16][0] = planad_pred_c<16>;
+ p.intra_pred[BLOCK_32x32][0] = planad_pred_c<32>;
// Intra Prediction DC
p.intra_pred[BLOCK_4x4][1] = intra_pred_dc_c<4>;
diff -r 56a17500909e -r ae5ffba44f61 source/common/primitives.h
--- a/source/common/primitives.h Fri Dec 06 16:41:00 2013 +0530
+++ b/source/common/primitives.h Fri Dec 06 20:11:10 2013 +0530
@@ -250,7 +250,6 @@
pixeladd_ss_t pixeladd_ss;
pixelavg_pp_t pixelavg_pp[NUM_LUMA_PARTITIONS];
- intra_planar_t intra_pred_planar[NUM_SQUARE_BLOCKS-1]; // no 64x64 intra predictions
intra_pred_t intra_pred[NUM_SQUARE_BLOCKS - 1][NUM_INTRA_MODE];
intra_allangs_t intra_pred_allangs[NUM_SQUARE_BLOCKS-1];
scale_t scale1D_128to64;
diff -r 56a17500909e -r ae5ffba44f61 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Dec 06 16:41:00 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Dec 06 20:11:10 2013 +0530
@@ -864,10 +864,10 @@
p.dequant_normal = x265_dequant_normal_sse4;
p.weight_pp = x265_weight_pp_sse4;
p.weight_sp = x265_weight_sp_sse4;
- p.intra_pred_planar[BLOCK_4x4] = x265_intra_pred_planar4_sse4;
- p.intra_pred_planar[BLOCK_8x8] = x265_intra_pred_planar8_sse4;
- p.intra_pred_planar[BLOCK_16x16] = x265_intra_pred_planar16_sse4;
- p.intra_pred_planar[BLOCK_32x32] = x265_intra_pred_planar32_sse4;
+ p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
+ p.intra_pred[BLOCK_8x8][0] = x265_intra_pred_planar8_sse4;
+ p.intra_pred[BLOCK_16x16][0] = x265_intra_pred_planar16_sse4;
+ p.intra_pred[BLOCK_32x32][0] = x265_intra_pred_planar32_sse4;
p.intra_pred_allangs[BLOCK_4x4] = x265_all_angs_pred_4x4_sse4;
diff -r 56a17500909e -r ae5ffba44f61 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Fri Dec 06 16:41:00 2013 +0530
+++ b/source/common/x86/intrapred.h Fri Dec 06 20:11:10 2013 +0530
@@ -31,10 +31,10 @@
void x265_intra_pred_dc16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
void x265_intra_pred_dc32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
-void x265_intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
-void x265_intra_pred_planar16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
-void x265_intra_pred_planar32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
+void x265_intra_pred_planar4_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
+void x265_intra_pred_planar8_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
+void x265_intra_pred_planar16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
+void x265_intra_pred_planar32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
#define DECL_ANG(bsize, mode, cpu) \
void x265_intra_pred_ang ## bsize ## _ ## mode ## _ ## cpu(pixel * dst, intptr_t dstStride, pixel * refLeft, pixel * refAbove, int dirMode, int bFilter);
diff -r 56a17500909e -r ae5ffba44f61 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Fri Dec 06 16:41:00 2013 +0530
+++ b/source/common/x86/intrapred8.asm Fri Dec 06 20:11:10 2013 +0530
@@ -382,18 +382,19 @@
RET
-;----------------------------------------------------------------------------------------
-; void intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
-;----------------------------------------------------------------------------------------
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal intra_pred_planar4, 4,7,5, above, left, dst, dstStride
-
- pmovzxbw m0, [r0] ; topRow[i] = above[i];
+cglobal intra_pred_planar4, 4,7,5
+ inc r2
+ inc r3
+ pmovzxbw m0, [r3] ; topRow[i] = above[i];
punpcklqdq m0, m0
pxor m1, m1
- movd m2, [r1 + 4] ; bottomLeft = left[4]
- movzx r6d, byte [r0 + 4] ; topRight = above[4];
+ movd m2, [r2 + 4] ; bottomLeft = left[4]
+ movzx r6d, byte [r3 + 4] ; topRight = above[4];
pshufb m2, m1
punpcklbw m2, m1
psubw m2, m0 ; bottomRow[i] = bottomLeft - topRow[i]
@@ -403,24 +404,24 @@
paddw m2, m2
%macro COMP_PRED_PLANAR_2ROW 1
- movzx r4d, byte [r1 + %1]
+ movzx r4d, byte [r2 + %1]
lea r4d, [r4d * 4 + 4]
movd m3, r4d
pshuflw m3, m3, 0
- movzx r4d, byte [r1 + %1 + 1]
+ movzx r4d, byte [r2 + %1 + 1]
lea r4d, [r4d * 4 + 4]
movd m4, r4d
pshuflw m4, m4, 0
punpcklqdq m3, m4 ; horPred
- movzx r4d, byte [r1 + %1]
+ movzx r4d, byte [r2 + %1]
mov r5d, r6d
sub r5d, r4d
movd m4, r5d
pshuflw m4, m4, 0
- movzx r4d, byte [r1 + %1 + 1]
+ movzx r4d, byte [r2 + %1 + 1]
mov r5d, r6d
sub r5d, r4d
movd m1, r5d
@@ -434,10 +435,10 @@
psraw m3, 3
packuswb m3, m3
- movd [r2], m3
+ movd [r0], m3
pshufd m3, m3, 0x55
- movd [r2 + r3], m3
- lea r2, [r2 + 2 * r3]
+ movd [r0 + r1], m3
+ lea r0, [r0 + 2 * r1]
%endmacro
COMP_PRED_PLANAR_2ROW 0
@@ -445,18 +446,19 @@
RET
-;----------------------------------------------------------------------------------------
-; void intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
-;----------------------------------------------------------------------------------------
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal intra_pred_planar8, 4,4,7, above, left, dst, dstStride
+cglobal intra_pred_planar8, 4,4,7
+ inc r2
+ inc r3
+ pxor m0, m0
+ pmovzxbw m1, [r3] ; v_topRow
+ pmovzxbw m2, [r2] ; v_leftColumn
- pxor m0, m0
- pmovzxbw m1, [r0] ; v_topRow
- pmovzxbw m2, [r1] ; v_leftColumn
-
- movd m3, [r0 + 8] ; topRight = above[8];
- movd m4, [r1 + 8] ; bottomLeft = left[8];
+ movd m3, [r3 + 8] ; topRight = above[8];
+ movd m4, [r2 + 8] ; bottomLeft = left[8];
pshufb m3, m0
pshufb m4, m0
@@ -491,8 +493,8 @@
psraw m5, 4
packuswb m5, m5
- movh [r2], m5
- lea r2, [r2 + r3]
+ movh [r0], m5
+ lea r0, [r0 + r1]
%endmacro
@@ -508,20 +510,21 @@
RET
-;----------------------------------------------------------------------------------------
-; void intra_pred_planar16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
-;----------------------------------------------------------------------------------------
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal intra_pred_planar16, 4,6,8, above, left, dst, dstStride
+cglobal intra_pred_planar16, 4,6,8
+ inc r2
+ inc r3
+ pxor m0, m0
+ pmovzxbw m1, [r3] ; topRow[0-7]
+ pmovzxbw m2, [r3 + 8] ; topRow[8-15]
- pxor m0, m0
- pmovzxbw m1, [r0] ; topRow[0-7]
- pmovzxbw m2, [r0 + 8] ; topRow[8-15]
-
- movd m3, [r1 + 16]
+ movd m3, [r2 + 16]
pshufb m3, m0
punpcklbw m3, m0 ; v_bottomLeft = left[16]
- movzx r4d, byte [r0 + 16] ; topRight = above[16]
+ movzx r4d, byte [r3 + 16] ; topRight = above[16]
psubw m4, m3, m1 ; v_bottomRow[0]
psubw m5, m3, m2 ; v_bottomRow[1]
@@ -530,17 +533,17 @@
psllw m2, 4
%macro PRED_PLANAR_ROW16 1
- movzx r5d, byte [r1 + %1]
+ movzx r5d, byte [r2 + %1]
add r5d, r5d
lea r5d, [r5d * 8 + 16]
movd m3, r5d
pshuflw m3, m3, 0
pshufd m3, m3, 0 ; horPred
- movzx r5d, byte [r1 + %1]
- mov r0d, r4d
- sub r0d, r5d
- movd m6, r0d
+ movzx r5d, byte [r2 + %1]
+ mov r3d, r4d
+ sub r3d, r5d
+ movd m6, r3d
pshuflw m6, m6, 0
pshufd m6, m6, 0
@@ -557,8 +560,8 @@
psraw m3, 5
packuswb m7, m3
- movu [r2], m7
- lea r2, [r2 + r3]
+ movu [r0], m7
+ lea r0, [r0 + r1]
%endmacro
PRED_PLANAR_ROW16 0
@@ -581,9 +584,9 @@
RET
-;----------------------------------------------------------------------------------------
-; void intra_pred_planar32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
-;----------------------------------------------------------------------------------------
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
INIT_XMM sse4
%if ARCH_X86_64 == 1
cglobal intra_pred_planar32, 4,7,12
@@ -598,17 +601,18 @@
%define bottomRow2 [rsp + 2 * mmsize]
%define bottomRow3 [rsp + 3 * mmsize]
%endif
-
+ inc r2
+ inc r3
pxor m3, m3
- movd m0, [r1 + 32]
+ movd m0, [r2 + 32]
pshufb m0, m3
punpcklbw m0, m3 ; v_bottomLeft = left[32]
- movzx r4d, byte [r0 + 32] ; topRight = above[32]
+ movzx r4d, byte [r3 + 32] ; topRight = above[32]
- pmovzxbw m1, [r0 + 0] ; topRow[0]
- pmovzxbw m2, [r0 + 8] ; topRow[1]
- pmovzxbw m3, [r0 +16] ; topRow[2]
- pmovzxbw m4, [r0 +24] ; topRow[3]
+ pmovzxbw m1, [r3 + 0] ; topRow[0]
+ pmovzxbw m2, [r3 + 8] ; topRow[1]
+ pmovzxbw m3, [r3 +16] ; topRow[2]
+ pmovzxbw m4, [r3 +24] ; topRow[3]
psubw m5, m0, m1 ; v_bottomRow[0]
psubw m6, m0, m2 ; v_bottomRow[1]
@@ -626,14 +630,14 @@
psllw m4, 5
%macro COMP_PRED_PLANAR_ROW 1
- movzx r5d, byte [r1]
+ movzx r5d, byte [r2]
shl r5d, 5
add r5d, 32
movd m5, r5d
pshuflw m5, m5, 0
pshufd m5, m5, 0 ; horPred
- movzx r5d, byte [r1]
+ movzx r5d, byte [r2]
mov r6d, r4d
sub r6d, r5d
movd m6, r6d
@@ -672,17 +676,17 @@
psraw m6, 6
packuswb m7, m6
- movu [r2 + %1], m7
+ movu [r0 + %1], m7
%endmacro
- mov r0, 32
+ mov r3, 32
.loop
COMP_PRED_PLANAR_ROW 0
COMP_PRED_PLANAR_ROW 16
- inc r1
- lea r2, [r2 + r3]
+ inc r2
+ lea r0, [r0 + r1]
- dec r0
+ dec r3
jnz .loop
%undef COMP_PRED_PLANAR_ROW
diff -r 56a17500909e -r ae5ffba44f61 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Fri Dec 06 16:41:00 2013 +0530
+++ b/source/encoder/compress.cpp Fri Dec 06 20:11:10 2013 +0530
@@ -163,7 +163,7 @@
}
// PLANAR
- primitives.intra_pred_planar[log2SizeMinus2](abovePlanar + 1, leftPlanar + 1, tmp, scaleStride);
+ primitives.intra_pred[log2SizeMinus2][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
sad = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
mode = PLANAR_IDX;
bits = m_search->xModeBitsIntra(cu, mode, partOffset, depth, initTrDepth);
diff -r 56a17500909e -r ae5ffba44f61 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Fri Dec 06 16:41:00 2013 +0530
+++ b/source/encoder/slicetype.cpp Fri Dec 06 20:11:10 2013 +0530
@@ -580,7 +580,7 @@
primitives.intra_pred[nLog2SizeMinus2][DC_IDX](predictions, cuSize, left0, above0, 0, (cuSize <= 16));
pixel *above = (cuSize >= 8) ? above1 : above0;
pixel *left = (cuSize >= 8) ? left1 : left0;
- primitives.intra_pred_planar[nLog2SizeMinus2](above + 1, left + 1, predictions + predsize, cuSize);
+ primitives.intra_pred[nLog2SizeMinus2][PLANAR_IDX](predictions + predsize, cuSize, left, above, 0, 0);
primitives.intra_pred_allangs[nLog2SizeMinus2](predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
// calculate 35 satd costs, keep least cost
diff -r 56a17500909e -r ae5ffba44f61 source/test/intrapredharness.cpp
--- a/source/test/intrapredharness.cpp Fri Dec 06 16:41:00 2013 +0530
+++ b/source/test/intrapredharness.cpp Fri Dec 06 20:11:10 2013 +0530
@@ -109,7 +109,7 @@
return true;
}
-bool IntraPredHarness::check_planar_primitive(intra_planar_t ref, intra_planar_t opt)
+bool IntraPredHarness::check_planar_primitive(intra_pred_t ref, intra_pred_t opt)
{
int j = ADI_BUF_STRIDE;
@@ -127,16 +127,16 @@
memset(pixel_out_vec, 0xCD, out_size);
memset(pixel_out_c, 0xCD, out_size);
#endif
- ref(pixel_buff + j - ADI_BUF_STRIDE, left + 1, pixel_out_c, FENC_STRIDE);
- opt(pixel_buff + j - ADI_BUF_STRIDE, left + 1, pixel_out_vec, FENC_STRIDE);
+ ref(pixel_out_c, FENC_STRIDE, pixel_buff + j - ADI_BUF_STRIDE, left + 1, 0, 0);
+ opt(pixel_out_vec, FENC_STRIDE, pixel_buff + j - ADI_BUF_STRIDE, left + 1, 0, 0);
for (int k = 0; k < width; k++)
{
if (memcmp(pixel_out_vec + k * FENC_STRIDE, pixel_out_c + k * FENC_STRIDE, width))
{
#if _DEBUG
- ref(pixel_buff + j - ADI_BUF_STRIDE, left + 1, pixel_out_c, FENC_STRIDE);
- opt(pixel_buff + j - ADI_BUF_STRIDE, left + 1, pixel_out_vec, FENC_STRIDE);
+ ref(pixel_out_c, FENC_STRIDE, pixel_buff + j - ADI_BUF_STRIDE, left + 1, 0, 0);
+ opt(pixel_out_vec, FENC_STRIDE, pixel_buff + j - ADI_BUF_STRIDE, left + 1, 0, 0);
#endif
return false;
}
@@ -260,10 +260,10 @@
return false;
}
}
- if (opt.intra_pred_planar[i])
+ if (opt.intra_pred[i][0])
{
const int size = (1 << (i + 2));
- if (!check_planar_primitive(ref.intra_pred_planar[i], opt.intra_pred_planar[i]))
+ if (!check_planar_primitive(ref.intra_pred[i][0], opt.intra_pred[i][0]))
{
printf("intra_planar %dx%d failed\n", size, size);
return false;
@@ -307,14 +307,14 @@
{
printf("intra_dc_%dx%d[f=1]", size, size);
REPORT_SPEEDUP(opt.intra_pred[i][1], ref.intra_pred[i][1],
- pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 1);
+ pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 0);
}
}
- if (opt.intra_pred_planar[i])
+ if (opt.intra_pred[i][0])
{
printf("intra_planar %2dx%d", size, size);
- REPORT_SPEEDUP(opt.intra_pred_planar[i], ref.intra_pred_planar[i],
- pixel_buff + srcStride, pixel_buff, pixel_out_vec, FENC_STRIDE);
+ REPORT_SPEEDUP(opt.intra_pred[i][0], ref.intra_pred[i][0],
+ pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 0);
}
if (opt.intra_pred_allangs[i])
{
diff -r 56a17500909e -r ae5ffba44f61 source/test/intrapredharness.h
--- a/source/test/intrapredharness.h Fri Dec 06 16:41:00 2013 +0530
+++ b/source/test/intrapredharness.h Fri Dec 06 20:11:10 2013 +0530
@@ -44,7 +44,7 @@
static const int out_size_33 = 33 * 64 * FENC_STRIDE;
bool check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width);
- bool check_planar_primitive(intra_planar_t ref, intra_planar_t opt);
+ bool check_planar_primitive(intra_pred_t ref, intra_pred_t opt);
bool check_angular_primitive(const intra_pred_t ref[][NUM_INTRA_MODE], const intra_pred_t opt[][NUM_INTRA_MODE]);
bool check_allangs_primitive(const intra_allangs_t ref[], const intra_allangs_t opt[]);
More information about the x265-devel
mailing list