[x265-commits] [x265] search: avoid AMVP selection if both MVs are the same
Steve Borho
steve at borho.org
Thu Dec 4 06:03:46 CET 2014
details: http://hg.videolan.org/x265/rev/2f66c3284c35
branches:
changeset: 8939:2f66c3284c35
user: Steve Borho <steve at borho.org>
date: Wed Dec 03 21:35:10 2014 -0600
description:
search: avoid AMVP selection if both MVs are the same
This is a simple work avoidance optimization, should have no effect on outputs
Subject: [x265] primitives: remove unused chroma lowres primitive
details: http://hg.videolan.org/x265/rev/bfeee4ac5463
branches:
changeset: 8940:bfeee4ac5463
user: Steve Borho <steve at borho.org>
date: Wed Dec 03 21:55:20 2014 -0600
description:
primitives: remove unused chroma lowres primitive
Subject: [x265] primitives: cleanup EncoderPrimitives, refactor chroma p2s primitive
details: http://hg.videolan.org/x265/rev/b1b5f06fe9ce
branches:
changeset: 8941:b1b5f06fe9ce
user: Steve Borho <steve at borho.org>
date: Wed Dec 03 22:21:46 2014 -0600
description:
primitives: cleanup EncoderPrimitives, refactor chroma p2s primitive
No behavior changes
diffstat:
source/common/ipfilter.cpp | 6 +-
source/common/lowres.cpp | 2 +-
source/common/pixel.cpp | 16 +---
source/common/predict.cpp | 4 +-
source/common/primitives.cpp | 9 --
source/common/primitives.h | 129 +++++++++++++++-----------------
source/common/quant.cpp | 2 +-
source/common/shortyuv.cpp | 4 +-
source/common/x86/asm-primitives.cpp | 18 ++--
source/common/yuv.cpp | 2 +-
source/encoder/search.cpp | 138 ++++++++++++++++++----------------
source/test/ipfilterharness.cpp | 8 +-
source/test/pixelharness.cpp | 8 +-
13 files changed, 163 insertions(+), 183 deletions(-)
diffs (truncated from 713 to 300 lines):
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/ipfilter.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -509,9 +509,9 @@ void Setup_C_IPFilterPrimitives(EncoderP
CHROMA_444(16, 64);
p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
- p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;
- p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
- p.chroma_p2s[X265_CSP_I422] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
+ p.chroma[X265_CSP_I444].p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
+ p.chroma[X265_CSP_I420].p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
+ p.chroma[X265_CSP_I422].p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
p.extendRowBorder = extendCURowColBorder;
}
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/lowres.cpp
--- a/source/common/lowres.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/lowres.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -157,7 +157,7 @@ void Lowres::init(PicYuv *origPic, int p
intraMbs[i] = 0;
/* downscale and generate 4 hpel planes for lookahead */
- primitives.frame_init_lowres_core(origPic->m_picOrg[0],
+ primitives.frameInitLowres(origPic->m_picOrg[0],
lowresPlane[0], lowresPlane[1], lowresPlane[2], lowresPlane[3],
origPic->m_stride, lumaStride, width, lines);
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/pixel.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -867,19 +867,6 @@ int psyCost_ss(const int16_t* source, in
}
}
-void plane_copy_deinterleave_chroma(pixel* dstu, intptr_t dstuStride, pixel* dstv, intptr_t dstvStride,
- const pixel* src, intptr_t srcStride, int w, int h)
-{
- for (int y = 0; y < h; y++, dstu += dstuStride, dstv += dstvStride, src += srcStride)
- {
- for (int x = 0; x < w; x++)
- {
- dstu[x] = src[2 * x];
- dstv[x] = src[2 * x + 1];
- }
- }
-}
-
template<int bx, int by>
void blockcopy_pp_c(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb)
{
@@ -1356,7 +1343,7 @@ void Setup_C_PixelPrimitives(EncoderPrim
p.scale1D_128to64 = scale1D_128to64;
p.scale2D_64to32 = scale2D_64to32;
- p.frame_init_lowres_core = frame_init_lowres_core;
+ p.frameInitLowres = frame_init_lowres_core;
p.ssim_4x4x2_core = ssim_4x4x2_core;
p.ssim_end_4 = ssim_end_4;
@@ -1364,7 +1351,6 @@ void Setup_C_PixelPrimitives(EncoderPrim
p.var[BLOCK_16x16] = pixel_var<16>;
p.var[BLOCK_32x32] = pixel_var<32>;
p.var[BLOCK_64x64] = pixel_var<64>;
- p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
p.planecopy_cp = planecopy_cp_c;
p.planecopy_sp = planecopy_sp_c;
p.propagateCost = estimateCUPropagateCost;
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/predict.cpp
--- a/source/common/predict.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/predict.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -460,8 +460,8 @@ void Predict::predInterChromaShort(Short
if (!(yFrac | xFrac))
{
- primitives.chroma_p2s[m_csp](refCb, refStride, dstCb, cxWidth, cxHeight);
- primitives.chroma_p2s[m_csp](refCr, refStride, dstCr, cxWidth, cxHeight);
+ primitives.chroma[m_csp].p2s(refCb, refStride, dstCb, cxWidth, cxHeight);
+ primitives.chroma[m_csp].p2s(refCr, refStride, dstCr, cxWidth, cxHeight);
}
else if (!yFrac)
{
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/primitives.cpp
--- a/source/common/primitives.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/primitives.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -84,15 +84,6 @@ void Setup_Alias_Primitives(EncoderPrimi
p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];
}
- for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
- {
- int partL = partitionFromLog2Size(i + 2);
- p.square_copy_pp[i] = p.luma_copy_pp[partL];
- p.square_copy_ps[i] = p.luma_copy_ps[partL];
- p.square_copy_sp[i] = p.luma_copy_sp[partL];
- p.square_copy_ss[i] = p.luma_copy_ss[partL];
- }
-
primitives.sa8d[BLOCK_4x4] = primitives.sa8d_inter[LUMA_4x4];
primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8];
primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/primitives.h
--- a/source/common/primitives.h Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/primitives.h Wed Dec 03 22:21:46 2014 -0600
@@ -201,82 +201,74 @@ typedef void (*cutree_propagate_cost) (i
* a vectorized primitive, or a C function. */
struct EncoderPrimitives
{
- pixelcmp_t sad[NUM_LUMA_PARTITIONS]; // Sum of Differences for each size
- pixelcmp_x3_t sad_x3[NUM_LUMA_PARTITIONS]; // Sum of Differences 3x for each size
- pixelcmp_x4_t sad_x4[NUM_LUMA_PARTITIONS]; // Sum of Differences 4x for each size
- pixelcmp_t sse_pp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
- pixelcmp_ss_t sse_ss[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, short) fenc alignment not assumed
- pixelcmp_sp_t sse_sp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, pixel) fenc alignment not assumed
- pixel_ssd_s_t ssd_s[NUM_SQUARE_BLOCKS - 1]; // Sum of Square Error (short) fenc alignment not assumed
- pixelcmp_t satd[NUM_LUMA_PARTITIONS]; // Sum of Transformed differences (HADAMARD)
- pixelcmp_t sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
- pixelcmp_t sa8d[NUM_SQUARE_BLOCKS]; // sa8d primitives for square intra blocks
- pixelcmp_t psy_cost_pp[NUM_SQUARE_BLOCKS]; // difference in AC energy between two blocks
- pixelcmp_ss_t psy_cost_ss[NUM_SQUARE_BLOCKS];
+ pixelcmp_t sad[NUM_LUMA_PARTITIONS]; // Sum of Differences for each size
+ pixelcmp_x3_t sad_x3[NUM_LUMA_PARTITIONS]; // Sum of Differences 3x for each size
+ pixelcmp_x4_t sad_x4[NUM_LUMA_PARTITIONS]; // Sum of Differences 4x for each size
+ pixelcmp_t sse_pp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
+ pixelcmp_ss_t sse_ss[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, short) fenc alignment not assumed
+ pixelcmp_sp_t sse_sp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, pixel) fenc alignment not assumed
+ pixel_ssd_s_t ssd_s[NUM_SQUARE_BLOCKS - 1]; // Sum of Square Error (short) fenc alignment not assumed
+ pixelcmp_t satd[NUM_LUMA_PARTITIONS]; // Sum of Transformed differences (HADAMARD)
+ pixelcmp_t sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
+ pixelcmp_t sa8d[NUM_SQUARE_BLOCKS]; // sa8d primitives for square intra blocks
+ pixelcmp_t psy_cost_pp[NUM_SQUARE_BLOCKS]; // difference in AC energy between two blocks
+ pixelcmp_ss_t psy_cost_ss[NUM_SQUARE_BLOCKS];
- blockfill_s_t blockfill_s[NUM_SQUARE_BLOCKS]; // block fill with value
- cpy2Dto1D_shl_t cpy2Dto1D_shl[NUM_SQUARE_BLOCKS - 1];
- cpy2Dto1D_shr_t cpy2Dto1D_shr[NUM_SQUARE_BLOCKS - 1];
- cpy1Dto2D_shl_t cpy1Dto2D_shl[NUM_SQUARE_BLOCKS - 1];
- cpy1Dto2D_shr_t cpy1Dto2D_shr[NUM_SQUARE_BLOCKS - 1];
- copy_cnt_t copy_cnt[NUM_SQUARE_BLOCKS - 1];
+ dct_t dct[NUM_DCTS];
+ idct_t idct[NUM_IDCTS];
+ quant_t quant;
+ nquant_t nquant;
+ dequant_scaling_t dequant_scaling;
+ dequant_normal_t dequant_normal;
+ count_nonzero_t count_nonzero;
+ denoiseDct_t denoiseDct;
+ calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];
+ blockfill_s_t blockfill_s[NUM_SQUARE_BLOCKS]; // block fill with value
+ cpy2Dto1D_shl_t cpy2Dto1D_shl[NUM_SQUARE_BLOCKS - 1];
+ cpy2Dto1D_shr_t cpy2Dto1D_shr[NUM_SQUARE_BLOCKS - 1];
+ cpy1Dto2D_shl_t cpy1Dto2D_shl[NUM_SQUARE_BLOCKS - 1];
+ cpy1Dto2D_shr_t cpy1Dto2D_shr[NUM_SQUARE_BLOCKS - 1];
+ copy_cnt_t copy_cnt[NUM_SQUARE_BLOCKS - 1];
- copy_pp_t luma_copy_pp[NUM_LUMA_PARTITIONS];
- copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS];
- copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS];
- copy_ss_t luma_copy_ss[NUM_LUMA_PARTITIONS];
- pixel_sub_ps_t luma_sub_ps[NUM_SQUARE_BLOCKS];
- pixel_add_ps_t luma_add_ps[NUM_SQUARE_BLOCKS];
- copy_pp_t square_copy_pp[NUM_SQUARE_BLOCKS];
- copy_sp_t square_copy_sp[NUM_SQUARE_BLOCKS];
- copy_ps_t square_copy_ps[NUM_SQUARE_BLOCKS];
- copy_ss_t square_copy_ss[NUM_SQUARE_BLOCKS];
+ intra_pred_t intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE];
+ intra_allangs_t intra_pred_allangs[NUM_TR_SIZE];
+ transpose_t transpose[NUM_SQUARE_BLOCKS];
+ scale_t scale1D_128to64;
+ scale_t scale2D_64to32;
- filter_pp_t luma_hpp[NUM_LUMA_PARTITIONS];
- filter_hps_t luma_hps[NUM_LUMA_PARTITIONS];
- filter_pp_t luma_vpp[NUM_LUMA_PARTITIONS];
- filter_ps_t luma_vps[NUM_LUMA_PARTITIONS];
- filter_sp_t luma_vsp[NUM_LUMA_PARTITIONS];
- filter_ss_t luma_vss[NUM_LUMA_PARTITIONS];
- filter_hv_pp_t luma_hvpp[NUM_LUMA_PARTITIONS];
- filter_p2s_t luma_p2s;
- filter_p2s_t chroma_p2s[X265_CSP_COUNT];
+ var_t var[NUM_SQUARE_BLOCKS];
+ ssim_4x4x2_core_t ssim_4x4x2_core;
+ ssim_end4_t ssim_end_4;
- weightp_sp_t weight_sp;
- weightp_pp_t weight_pp;
- pixelavg_pp_t pixelavg_pp[NUM_LUMA_PARTITIONS];
- addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS];
+ saoCuOrgE0_t saoCuOrgE0;
- intra_pred_t intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE];
- intra_allangs_t intra_pred_allangs[NUM_TR_SIZE];
- scale_t scale1D_128to64;
- scale_t scale2D_64to32;
+ downscale_t frameInitLowres;
+ cutree_propagate_cost propagateCost;
- dct_t dct[NUM_DCTS];
- idct_t idct[NUM_IDCTS];
- quant_t quant;
- nquant_t nquant;
- dequant_scaling_t dequant_scaling;
- dequant_normal_t dequant_normal;
- count_nonzero_t count_nonzero;
- denoiseDct_t denoiseDct;
+ extendCURowBorder_t extendRowBorder;
+ planecopy_cp_t planecopy_cp;
+ planecopy_sp_t planecopy_sp;
- calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];
- transpose_t transpose[NUM_SQUARE_BLOCKS];
+ weightp_sp_t weight_sp;
+ weightp_pp_t weight_pp;
+ pixelavg_pp_t pixelavg_pp[NUM_LUMA_PARTITIONS];
+ addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS];
- var_t var[NUM_SQUARE_BLOCKS];
- ssim_4x4x2_core_t ssim_4x4x2_core;
- ssim_end4_t ssim_end_4;
+ filter_pp_t luma_hpp[NUM_LUMA_PARTITIONS];
+ filter_hps_t luma_hps[NUM_LUMA_PARTITIONS];
+ filter_pp_t luma_vpp[NUM_LUMA_PARTITIONS];
+ filter_ps_t luma_vps[NUM_LUMA_PARTITIONS];
+ filter_sp_t luma_vsp[NUM_LUMA_PARTITIONS];
+ filter_ss_t luma_vss[NUM_LUMA_PARTITIONS];
+ filter_hv_pp_t luma_hvpp[NUM_LUMA_PARTITIONS];
+ filter_p2s_t luma_p2s;
- downscale_t frame_init_lowres_core;
- plane_copy_deinterleave_t plane_copy_deinterleave_c;
- extendCURowBorder_t extendRowBorder;
- // sao primitives
- saoCuOrgE0_t saoCuOrgE0;
- planecopy_cp_t planecopy_cp;
- planecopy_sp_t planecopy_sp;
-
- cutree_propagate_cost propagateCost;
+ copy_pp_t luma_copy_pp[NUM_LUMA_PARTITIONS];
+ copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS];
+ copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS];
+ copy_ss_t luma_copy_ss[NUM_LUMA_PARTITIONS];
+ pixel_sub_ps_t luma_sub_ps[NUM_SQUARE_BLOCKS];
+ pixel_add_ps_t luma_add_ps[NUM_SQUARE_BLOCKS];
struct
{
@@ -293,7 +285,8 @@ struct EncoderPrimitives
copy_ss_t copy_ss[NUM_LUMA_PARTITIONS];
pixel_sub_ps_t sub_ps[NUM_SQUARE_BLOCKS];
pixel_add_ps_t add_ps[NUM_SQUARE_BLOCKS];
- } chroma[4]; // X265_CSP_COUNT - do not want to include x265.h here
+ filter_p2s_t p2s;
+ } chroma[X265_CSP_COUNT];
};
void extendPicBorder(pixel* recon, intptr_t stride, int width, int height, int marginX, int marginY);
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/quant.cpp
--- a/source/common/quant.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/quant.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -363,7 +363,7 @@ uint32_t Quant::transformNxN(const CUDat
{
int trSize = 1 << log2TrSize;
/* perform DCT on source pixels for psy-rdoq */
- primitives.square_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
+ primitives.luma_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
primitives.dct[index](m_fencShortBuf, m_fencDctCoeff, trSize);
}
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/shortyuv.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -84,7 +84,7 @@ void ShortYuv::copyPartToPartLuma(ShortY
const int16_t* src = getLumaAddr(absPartIdx);
int16_t* dst = dstYuv.getLumaAddr(absPartIdx);
- primitives.square_copy_ss[log2Size - 2](dst, dstYuv.m_size, src, m_size);
+ primitives.luma_copy_ss[log2Size - 2](dst, dstYuv.m_size, src, m_size);
}
void ShortYuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
@@ -92,7 +92,7 @@ void ShortYuv::copyPartToPartLuma(Yuv& d
const int16_t* src = getLumaAddr(absPartIdx);
pixel* dst = dstYuv.getLumaAddr(absPartIdx);
- primitives.square_copy_sp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
+ primitives.luma_copy_sp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
}
void ShortYuv::copyPartToPartChroma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp Wed Dec 03 22:21:46 2014 -0600
@@ -1434,7 +1434,7 @@ void Setup_Assembly_Primitives(EncoderPr
}
if (cpuMask & X265_CPU_XOP)
{
- p.frame_init_lowres_core = x265_frame_init_lowres_core_xop;
+ p.frameInitLowres = x265_frame_init_lowres_core_xop;
SA8D_INTER_FROM_BLOCK(xop);
INIT7(satd, _xop);
More information about the x265-commits
mailing list