[x265-commits] [x265] search: avoid AMVP selection if both MVs are the same

Thu Dec 4 06:03:46 CET 2014

details:   http://hg.videolan.org/x265/rev/2f66c3284c35
branches:  
changeset: 8939:2f66c3284c35
user:      Steve Borho <steve at borho.org>
date:      Wed Dec 03 21:35:10 2014 -0600
description:
search: avoid AMVP selection if both MVs are the same

This is a simple work avoidance optimization, should have no effect on outputs
Subject: [x265] primitives: remove unused chroma lowres primitive

details:   http://hg.videolan.org/x265/rev/bfeee4ac5463
branches:  
changeset: 8940:bfeee4ac5463
user:      Steve Borho <steve at borho.org>
date:      Wed Dec 03 21:55:20 2014 -0600
description:
primitives: remove unused chroma lowres primitive
Subject: [x265] primitives: cleanup EncoderPrimitives, refactor chroma p2s primitive

details:   http://hg.videolan.org/x265/rev/b1b5f06fe9ce
branches:  
changeset: 8941:b1b5f06fe9ce
user:      Steve Borho <steve at borho.org>
date:      Wed Dec 03 22:21:46 2014 -0600
description:
primitives: cleanup EncoderPrimitives, refactor chroma p2s primitive

No behavior changes

diffstat:

 source/common/ipfilter.cpp           |    6 +-
 source/common/lowres.cpp             |    2 +-
 source/common/pixel.cpp              |   16 +---
 source/common/predict.cpp            |    4 +-
 source/common/primitives.cpp         |    9 --
 source/common/primitives.h           |  129 +++++++++++++++-----------------
 source/common/quant.cpp              |    2 +-
 source/common/shortyuv.cpp           |    4 +-
 source/common/x86/asm-primitives.cpp |   18 ++--
 source/common/yuv.cpp                |    2 +-
 source/encoder/search.cpp            |  138 ++++++++++++++++++----------------
 source/test/ipfilterharness.cpp      |    8 +-
 source/test/pixelharness.cpp         |    8 +-
 13 files changed, 163 insertions(+), 183 deletions(-)

diffs (truncated from 713 to 300 lines):

diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/ipfilter.cpp

--- a/source/common/ipfilter.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/ipfilter.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -509,9 +509,9 @@ void Setup_C_IPFilterPrimitives(EncoderP
     CHROMA_444(16, 64);
     p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
 
-    p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;
-    p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
-    p.chroma_p2s[X265_CSP_I422] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
+    p.chroma[X265_CSP_I444].p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
+    p.chroma[X265_CSP_I420].p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
+    p.chroma[X265_CSP_I422].p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
 
     p.extendRowBorder = extendCURowColBorder;
 }
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/lowres.cpp
--- a/source/common/lowres.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/lowres.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -157,7 +157,7 @@ void Lowres::init(PicYuv *origPic, int p
         intraMbs[i] = 0;
 
     /* downscale and generate 4 hpel planes for lookahead */
-    primitives.frame_init_lowres_core(origPic->m_picOrg[0],
+    primitives.frameInitLowres(origPic->m_picOrg[0],
                                       lowresPlane[0], lowresPlane[1], lowresPlane[2], lowresPlane[3],
                                       origPic->m_stride, lumaStride, width, lines);
 
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/pixel.cpp
--- a/source/common/pixel.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/pixel.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -867,19 +867,6 @@ int psyCost_ss(const int16_t* source, in
     }
 }
 
-void plane_copy_deinterleave_chroma(pixel* dstu, intptr_t dstuStride, pixel* dstv, intptr_t dstvStride,
-                                    const pixel* src,  intptr_t srcStride, int w, int h)
-{
-    for (int y = 0; y < h; y++, dstu += dstuStride, dstv += dstvStride, src += srcStride)
-    {
-        for (int x = 0; x < w; x++)
-        {
-            dstu[x] = src[2 * x];
-            dstv[x] = src[2 * x + 1];
-        }
-    }
-}
-
 template<int bx, int by>
 void blockcopy_pp_c(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb)
 {
@@ -1356,7 +1343,7 @@ void Setup_C_PixelPrimitives(EncoderPrim
 
     p.scale1D_128to64 = scale1D_128to64;
     p.scale2D_64to32 = scale2D_64to32;
-    p.frame_init_lowres_core = frame_init_lowres_core;
+    p.frameInitLowres = frame_init_lowres_core;
     p.ssim_4x4x2_core = ssim_4x4x2_core;
     p.ssim_end_4 = ssim_end_4;
 
@@ -1364,7 +1351,6 @@ void Setup_C_PixelPrimitives(EncoderPrim
     p.var[BLOCK_16x16] = pixel_var<16>;
     p.var[BLOCK_32x32] = pixel_var<32>;
     p.var[BLOCK_64x64] = pixel_var<64>;
-    p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
     p.planecopy_cp = planecopy_cp_c;
     p.planecopy_sp = planecopy_sp_c;
     p.propagateCost = estimateCUPropagateCost;
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/predict.cpp
--- a/source/common/predict.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/predict.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -460,8 +460,8 @@ void Predict::predInterChromaShort(Short
 
     if (!(yFrac | xFrac))
     {
-        primitives.chroma_p2s[m_csp](refCb, refStride, dstCb, cxWidth, cxHeight);
-        primitives.chroma_p2s[m_csp](refCr, refStride, dstCr, cxWidth, cxHeight);
+        primitives.chroma[m_csp].p2s(refCb, refStride, dstCb, cxWidth, cxHeight);
+        primitives.chroma[m_csp].p2s(refCr, refStride, dstCr, cxWidth, cxHeight);
     }
     else if (!yFrac)
     {
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/primitives.cpp
--- a/source/common/primitives.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/primitives.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -84,15 +84,6 @@ void Setup_Alias_Primitives(EncoderPrimi
         p.chroma[X265_CSP_I444].sub_ps[i]  = p.luma_sub_ps[i];
     }
 
-    for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
-    {
-        int partL = partitionFromLog2Size(i + 2);
-        p.square_copy_pp[i] = p.luma_copy_pp[partL];
-        p.square_copy_ps[i] = p.luma_copy_ps[partL];
-        p.square_copy_sp[i] = p.luma_copy_sp[partL];
-        p.square_copy_ss[i] = p.luma_copy_ss[partL];
-    }
-
     primitives.sa8d[BLOCK_4x4]   = primitives.sa8d_inter[LUMA_4x4];
     primitives.sa8d[BLOCK_8x8]   = primitives.sa8d_inter[LUMA_8x8];
     primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/primitives.h
--- a/source/common/primitives.h	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/primitives.h	Wed Dec 03 22:21:46 2014 -0600
@@ -201,82 +201,74 @@ typedef void (*cutree_propagate_cost) (i
  * a vectorized primitive, or a C function. */
 struct EncoderPrimitives
 {
-    pixelcmp_t      sad[NUM_LUMA_PARTITIONS];        // Sum of Differences for each size
-    pixelcmp_x3_t   sad_x3[NUM_LUMA_PARTITIONS];     // Sum of Differences 3x for each size
-    pixelcmp_x4_t   sad_x4[NUM_LUMA_PARTITIONS];     // Sum of Differences 4x for each size
-    pixelcmp_t      sse_pp[NUM_LUMA_PARTITIONS];     // Sum of Square Error (pixel, pixel) fenc alignment not assumed
-    pixelcmp_ss_t   sse_ss[NUM_LUMA_PARTITIONS];     // Sum of Square Error (short, short) fenc alignment not assumed
-    pixelcmp_sp_t   sse_sp[NUM_LUMA_PARTITIONS];     // Sum of Square Error (short, pixel) fenc alignment not assumed
-    pixel_ssd_s_t   ssd_s[NUM_SQUARE_BLOCKS - 1];    // Sum of Square Error (short) fenc alignment not assumed
-    pixelcmp_t      satd[NUM_LUMA_PARTITIONS];       // Sum of Transformed differences (HADAMARD)
-    pixelcmp_t      sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
-    pixelcmp_t      sa8d[NUM_SQUARE_BLOCKS];         // sa8d primitives for square intra blocks
-    pixelcmp_t      psy_cost_pp[NUM_SQUARE_BLOCKS];     // difference in AC energy between two blocks
-    pixelcmp_ss_t   psy_cost_ss[NUM_SQUARE_BLOCKS];
+    pixelcmp_t            sad[NUM_LUMA_PARTITIONS];        // Sum of Differences for each size
+    pixelcmp_x3_t         sad_x3[NUM_LUMA_PARTITIONS];     // Sum of Differences 3x for each size
+    pixelcmp_x4_t         sad_x4[NUM_LUMA_PARTITIONS];     // Sum of Differences 4x for each size
+    pixelcmp_t            sse_pp[NUM_LUMA_PARTITIONS];     // Sum of Square Error (pixel, pixel) fenc alignment not assumed
+    pixelcmp_ss_t         sse_ss[NUM_LUMA_PARTITIONS];     // Sum of Square Error (short, short) fenc alignment not assumed
+    pixelcmp_sp_t         sse_sp[NUM_LUMA_PARTITIONS];     // Sum of Square Error (short, pixel) fenc alignment not assumed
+    pixel_ssd_s_t         ssd_s[NUM_SQUARE_BLOCKS - 1];    // Sum of Square Error (short) fenc alignment not assumed
+    pixelcmp_t            satd[NUM_LUMA_PARTITIONS];       // Sum of Transformed differences (HADAMARD)
+    pixelcmp_t            sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
+    pixelcmp_t            sa8d[NUM_SQUARE_BLOCKS];         // sa8d primitives for square intra blocks
+    pixelcmp_t            psy_cost_pp[NUM_SQUARE_BLOCKS];  // difference in AC energy between two blocks
+    pixelcmp_ss_t         psy_cost_ss[NUM_SQUARE_BLOCKS];
 
-    blockfill_s_t   blockfill_s[NUM_SQUARE_BLOCKS];  // block fill with value
-    cpy2Dto1D_shl_t cpy2Dto1D_shl[NUM_SQUARE_BLOCKS - 1];
-    cpy2Dto1D_shr_t cpy2Dto1D_shr[NUM_SQUARE_BLOCKS - 1];
-    cpy1Dto2D_shl_t cpy1Dto2D_shl[NUM_SQUARE_BLOCKS - 1];
-    cpy1Dto2D_shr_t cpy1Dto2D_shr[NUM_SQUARE_BLOCKS - 1];
-    copy_cnt_t      copy_cnt[NUM_SQUARE_BLOCKS - 1];
+    dct_t                 dct[NUM_DCTS];
+    idct_t                idct[NUM_IDCTS];
+    quant_t               quant;
+    nquant_t              nquant;
+    dequant_scaling_t     dequant_scaling;
+    dequant_normal_t      dequant_normal;
+    count_nonzero_t       count_nonzero;
+    denoiseDct_t          denoiseDct;
+    calcresidual_t        calcresidual[NUM_SQUARE_BLOCKS];
+    blockfill_s_t         blockfill_s[NUM_SQUARE_BLOCKS];  // block fill with value
+    cpy2Dto1D_shl_t       cpy2Dto1D_shl[NUM_SQUARE_BLOCKS - 1];
+    cpy2Dto1D_shr_t       cpy2Dto1D_shr[NUM_SQUARE_BLOCKS - 1];
+    cpy1Dto2D_shl_t       cpy1Dto2D_shl[NUM_SQUARE_BLOCKS - 1];
+    cpy1Dto2D_shr_t       cpy1Dto2D_shr[NUM_SQUARE_BLOCKS - 1];
+    copy_cnt_t            copy_cnt[NUM_SQUARE_BLOCKS - 1];
 
-    copy_pp_t       luma_copy_pp[NUM_LUMA_PARTITIONS];
-    copy_sp_t       luma_copy_sp[NUM_LUMA_PARTITIONS];
-    copy_ps_t       luma_copy_ps[NUM_LUMA_PARTITIONS];
-    copy_ss_t       luma_copy_ss[NUM_LUMA_PARTITIONS];
-    pixel_sub_ps_t  luma_sub_ps[NUM_SQUARE_BLOCKS];
-    pixel_add_ps_t  luma_add_ps[NUM_SQUARE_BLOCKS];
-    copy_pp_t       square_copy_pp[NUM_SQUARE_BLOCKS];
-    copy_sp_t       square_copy_sp[NUM_SQUARE_BLOCKS];
-    copy_ps_t       square_copy_ps[NUM_SQUARE_BLOCKS];
-    copy_ss_t       square_copy_ss[NUM_SQUARE_BLOCKS];
+    intra_pred_t          intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE];
+    intra_allangs_t       intra_pred_allangs[NUM_TR_SIZE];
+    transpose_t           transpose[NUM_SQUARE_BLOCKS];
+    scale_t               scale1D_128to64;
+    scale_t               scale2D_64to32;
 
-    filter_pp_t     luma_hpp[NUM_LUMA_PARTITIONS];
-    filter_hps_t    luma_hps[NUM_LUMA_PARTITIONS];
-    filter_pp_t     luma_vpp[NUM_LUMA_PARTITIONS];
-    filter_ps_t     luma_vps[NUM_LUMA_PARTITIONS];
-    filter_sp_t     luma_vsp[NUM_LUMA_PARTITIONS];
-    filter_ss_t     luma_vss[NUM_LUMA_PARTITIONS];
-    filter_hv_pp_t  luma_hvpp[NUM_LUMA_PARTITIONS];
-    filter_p2s_t    luma_p2s;
-    filter_p2s_t    chroma_p2s[X265_CSP_COUNT];
+    var_t                 var[NUM_SQUARE_BLOCKS];
+    ssim_4x4x2_core_t     ssim_4x4x2_core;
+    ssim_end4_t           ssim_end_4;
 
-    weightp_sp_t    weight_sp;
-    weightp_pp_t    weight_pp;
-    pixelavg_pp_t   pixelavg_pp[NUM_LUMA_PARTITIONS];
-    addAvg_t        luma_addAvg[NUM_LUMA_PARTITIONS];
+    saoCuOrgE0_t          saoCuOrgE0;
 
-    intra_pred_t    intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE];
-    intra_allangs_t intra_pred_allangs[NUM_TR_SIZE];
-    scale_t         scale1D_128to64;
-    scale_t         scale2D_64to32;
+    downscale_t           frameInitLowres;
+    cutree_propagate_cost propagateCost;
 
-    dct_t           dct[NUM_DCTS];
-    idct_t          idct[NUM_IDCTS];
-    quant_t         quant;
-    nquant_t        nquant;
-    dequant_scaling_t dequant_scaling;
-    dequant_normal_t dequant_normal;
-    count_nonzero_t count_nonzero;
-    denoiseDct_t    denoiseDct;
+    extendCURowBorder_t   extendRowBorder;
+    planecopy_cp_t        planecopy_cp;
+    planecopy_sp_t        planecopy_sp;
 
-    calcresidual_t  calcresidual[NUM_SQUARE_BLOCKS];
-    transpose_t     transpose[NUM_SQUARE_BLOCKS];
+    weightp_sp_t          weight_sp;
+    weightp_pp_t          weight_pp;
+    pixelavg_pp_t         pixelavg_pp[NUM_LUMA_PARTITIONS];
+    addAvg_t              luma_addAvg[NUM_LUMA_PARTITIONS];
 
-    var_t           var[NUM_SQUARE_BLOCKS];
-    ssim_4x4x2_core_t ssim_4x4x2_core;
-    ssim_end4_t     ssim_end_4;
+    filter_pp_t           luma_hpp[NUM_LUMA_PARTITIONS];
+    filter_hps_t          luma_hps[NUM_LUMA_PARTITIONS];
+    filter_pp_t           luma_vpp[NUM_LUMA_PARTITIONS];
+    filter_ps_t           luma_vps[NUM_LUMA_PARTITIONS];
+    filter_sp_t           luma_vsp[NUM_LUMA_PARTITIONS];
+    filter_ss_t           luma_vss[NUM_LUMA_PARTITIONS];
+    filter_hv_pp_t        luma_hvpp[NUM_LUMA_PARTITIONS];
+    filter_p2s_t          luma_p2s;
 
-    downscale_t     frame_init_lowres_core;
-    plane_copy_deinterleave_t plane_copy_deinterleave_c;
-    extendCURowBorder_t extendRowBorder;
-    // sao primitives
-    saoCuOrgE0_t      saoCuOrgE0;
-    planecopy_cp_t    planecopy_cp;
-    planecopy_sp_t    planecopy_sp;
-
-    cutree_propagate_cost    propagateCost;
+    copy_pp_t             luma_copy_pp[NUM_LUMA_PARTITIONS];
+    copy_sp_t             luma_copy_sp[NUM_LUMA_PARTITIONS];
+    copy_ps_t             luma_copy_ps[NUM_LUMA_PARTITIONS];
+    copy_ss_t             luma_copy_ss[NUM_LUMA_PARTITIONS];
+    pixel_sub_ps_t        luma_sub_ps[NUM_SQUARE_BLOCKS];
+    pixel_add_ps_t        luma_add_ps[NUM_SQUARE_BLOCKS];
 
     struct
     {
@@ -293,7 +285,8 @@ struct EncoderPrimitives
         copy_ss_t       copy_ss[NUM_LUMA_PARTITIONS];
         pixel_sub_ps_t  sub_ps[NUM_SQUARE_BLOCKS];
         pixel_add_ps_t  add_ps[NUM_SQUARE_BLOCKS];
-    } chroma[4]; // X265_CSP_COUNT - do not want to include x265.h here
+        filter_p2s_t    p2s;
+    } chroma[X265_CSP_COUNT];
 };
 
 void extendPicBorder(pixel* recon, intptr_t stride, int width, int height, int marginX, int marginY);
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/quant.cpp
--- a/source/common/quant.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/quant.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -363,7 +363,7 @@ uint32_t Quant::transformNxN(const CUDat
         {
             int trSize = 1 << log2TrSize;
             /* perform DCT on source pixels for psy-rdoq */
-            primitives.square_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
+            primitives.luma_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
             primitives.dct[index](m_fencShortBuf, m_fencDctCoeff, trSize);
         }
 
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/shortyuv.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -84,7 +84,7 @@ void ShortYuv::copyPartToPartLuma(ShortY
     const int16_t* src = getLumaAddr(absPartIdx);
     int16_t* dst = dstYuv.getLumaAddr(absPartIdx);
 
-    primitives.square_copy_ss[log2Size - 2](dst, dstYuv.m_size, src, m_size);
+    primitives.luma_copy_ss[log2Size - 2](dst, dstYuv.m_size, src, m_size);
 }
 
 void ShortYuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
@@ -92,7 +92,7 @@ void ShortYuv::copyPartToPartLuma(Yuv& d
     const int16_t* src = getLumaAddr(absPartIdx);
     pixel* dst = dstYuv.getLumaAddr(absPartIdx);
 
-    primitives.square_copy_sp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
+    primitives.luma_copy_sp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
 }
 
 void ShortYuv::copyPartToPartChroma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
diff -r d7b5e73fc91a -r b1b5f06fe9ce source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Dec 03 19:50:54 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp	Wed Dec 03 22:21:46 2014 -0600
@@ -1434,7 +1434,7 @@ void Setup_Assembly_Primitives(EncoderPr
     }
     if (cpuMask & X265_CPU_XOP)
     {
-        p.frame_init_lowres_core = x265_frame_init_lowres_core_xop;
+        p.frameInitLowres = x265_frame_init_lowres_core_xop;
         SA8D_INTER_FROM_BLOCK(xop);
         INIT7(satd, _xop);