[x265] [PATCH 05 of 29] scale1D_128to64_new: cleanup

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:13 CET 2015


# HG changeset patch
# User Praveen Tiwari
# Date 1421046277 -19800
#      Mon Jan 12 12:34:37 2015 +0530
# Node ID f4daa8744d08b569ae652737c4506b397dfb55cb
# Parent  3caab705cfdf1d1d5549f0986bd9496d5fc606e1
scale1D_128to64_new: cleanup

This patch cleanup the new suffix to match with an existing naming pattern and removed duplicated code.

diff -r 3caab705cfdf -r f4daa8744d08 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/pixel.cpp	Mon Jan 12 12:34:37 2015 +0530
@@ -629,7 +629,7 @@
     }
 }
 
-void scale1D_128to64_new(pixel *dst, const pixel *src, intptr_t /*stride*/)
+void scale1D_128to64(pixel *dst, const pixel *src, intptr_t /*stride*/)
 {
     int x;
     const pixel* src1 = src;
@@ -1377,7 +1377,7 @@
     p.weight_pp = weight_pp_c;
     p.weight_sp = weight_sp_c;
 
-    p.scale1D_128to64_new = scale1D_128to64_new;
+    p.scale1D_128to64 = scale1D_128to64;
     p.scale2D_64to32 = scale2D_64to32;
     p.frameInitLowres = frame_init_lowres_core;
     p.ssim_4x4x2_core = ssim_4x4x2_core;
diff -r 3caab705cfdf -r f4daa8744d08 source/common/primitives.h
--- a/source/common/primitives.h	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/primitives.h	Mon Jan 12 12:34:37 2015 +0530
@@ -255,7 +255,7 @@
     intra_pred_new_t      intra_pred_new[NUM_INTRA_MODE][NUM_TR_SIZE];
     intra_allangs_t       intra_pred_allangs[NUM_TR_SIZE];         /* todo: move to CU */
     intra_allangs_new_t   intra_pred_allangs_new[NUM_TR_SIZE];
-    scale_t               scale1D_128to64_new;
+    scale_t               scale1D_128to64;
     scale_t               scale2D_64to32;
 
     ssim_4x4x2_core_t     ssim_4x4x2_core;
diff -r 3caab705cfdf -r f4daa8744d08 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon Jan 12 12:34:37 2015 +0530
@@ -1384,7 +1384,7 @@
     }
     if (cpuMask & X265_CPU_SSSE3)
     {
-        p.scale1D_128to64_new = x265_scale1D_128to64_new_ssse3;
+        p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
 
         INTRA_ANG_SSSE3(ssse3);
@@ -1445,7 +1445,7 @@
         p.quant = x265_quant_avx2;
         p.nquant = x265_nquant_avx2;
         p.dequant_normal  = x265_dequant_normal_avx2;
-        p.scale1D_128to64_new = x265_scale1D_128to64_new_avx2;
+        p.scale1D_128to64= x265_scale1D_128to64_avx2;
         p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_avx2;
         p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_avx2;
         p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_avx2;
@@ -1602,7 +1602,7 @@
 
         INTRA_ANG_SSSE3(ssse3);
 
-        p.scale1D_128to64_new = x265_scale1D_128to64_new_ssse3;
+        p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
         SAD_X3(ssse3);
         SAD_X4(ssse3);
@@ -1812,7 +1812,7 @@
         p.chroma[X265_CSP_I422].pu[CHROMA422_16x24].copy_ss = x265_blockcopy_ss_16x24_avx;
         p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].copy_ss = x265_blockcopy_ss_16x32_avx;
         p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].copy_ss = x265_blockcopy_ss_16x64_avx;
-        p.scale1D_128to64_new = x265_scale1D_128to64_new_avx2;
+        p.scale1D_128to64 = x265_scale1D_128to64_avx2;
 
         p.weight_pp = x265_weight_pp_avx2;
 
diff -r 3caab705cfdf -r f4daa8744d08 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/x86/pixel-util.h	Mon Jan 12 12:34:37 2015 +0530
@@ -65,8 +65,6 @@
 
 void x265_scale1D_128to64_ssse3(pixel*, const pixel*, intptr_t);
 void x265_scale1D_128to64_avx2(pixel*, const pixel*, intptr_t);
-void x265_scale1D_128to64_new_ssse3(pixel*, const pixel*, intptr_t);
-void x265_scale1D_128to64_new_avx2(pixel*, const pixel*, intptr_t);
 void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
 
 #define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
diff -r 3caab705cfdf -r f4daa8744d08 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/x86/pixel-util8.asm	Mon Jan 12 12:34:37 2015 +0530
@@ -2984,7 +2984,7 @@
 ; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
 ;-----------------------------------------------------------------
 INIT_XMM ssse3
-cglobal scale1D_128to64_new, 2, 2, 8, dest, src1, stride
+cglobal scale1D_128to64, 2, 2, 8, dest, src1, stride
 %if HIGH_BIT_DEPTH
     mova        m7,      [deinterleave_word_shuf]
 
@@ -3109,7 +3109,7 @@
 
 %if HIGH_BIT_DEPTH == 1
 INIT_YMM avx2
-cglobal scale1D_128to64_new, 2, 2, 3
+cglobal scale1D_128to64, 2, 2, 3
     pxor            m2, m2
 
     ;Top pixel
@@ -3173,7 +3173,7 @@
     RET
 %else ; HIGH_BIT_DEPTH == 0
 INIT_YMM avx2
-cglobal scale1D_128to64_new, 2, 2, 4
+cglobal scale1D_128to64, 2, 2, 4
     pxor            m2, m2
     mova            m3, [pb_1]
 
@@ -3222,220 +3222,6 @@
 %endif
 
 ;-----------------------------------------------------------------
-; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
-;-----------------------------------------------------------------
-INIT_XMM ssse3
-cglobal scale1D_128to64, 2, 2, 8, dest, src1, stride
-%if HIGH_BIT_DEPTH
-    mova        m7,      [deinterleave_word_shuf]
-
-    movu        m0,      [r1]
-    palignr     m1,      m0,    2
-    movu        m2,      [r1 + 16]
-    palignr     m3,      m2,    2
-    movu        m4,      [r1 + 32]
-    palignr     m5,      m4,    2
-    movu        m6,      [r1 + 48]
-    pavgw       m0,      m1
-    palignr     m1,      m6,    2
-    pavgw       m2,      m3
-    pavgw       m4,      m5
-    pavgw       m6,      m1
-    pshufb      m0,      m0,    m7
-    pshufb      m2,      m2,    m7
-    pshufb      m4,      m4,    m7
-    pshufb      m6,      m6,    m7
-    punpcklqdq    m0,           m2
-    movu          [r0],         m0
-    punpcklqdq    m4,           m6
-    movu          [r0 + 16],    m4
-
-
-
-    movu        m0,      [r1 + 64]
-    palignr     m1,      m0,    2
-    movu        m2,      [r1 + 80]
-    palignr     m3,      m2,    2
-    movu        m4,      [r1 + 96]
-    palignr     m5,      m4,    2
-    movu        m6,      [r1 + 112]
-    pavgw       m0,      m1
-    palignr     m1,      m6,    2
-    pavgw       m2,      m3
-    pavgw       m4,      m5
-    pavgw       m6,      m1
-    pshufb      m0,      m0,    m7
-    pshufb      m2,      m2,    m7
-    pshufb      m4,      m4,    m7
-    pshufb      m6,      m6,    m7
-    punpcklqdq    m0,           m2
-    movu          [r0 + 32],    m0
-    punpcklqdq    m4,           m6
-    movu          [r0 + 48],    m4
-
-    movu        m0,      [r1 + 128]
-    palignr     m1,      m0,    2
-    movu        m2,      [r1 + 144]
-    palignr     m3,      m2,    2
-    movu        m4,      [r1 + 160]
-    palignr     m5,      m4,    2
-    movu        m6,      [r1 + 176]
-    pavgw       m0,      m1
-    palignr     m1,      m6,    2
-    pavgw       m2,      m3
-    pavgw       m4,      m5
-    pavgw       m6,      m1
-    pshufb      m0,      m0,    m7
-    pshufb      m2,      m2,    m7
-    pshufb      m4,      m4,    m7
-    pshufb      m6,      m6,    m7
-
-    punpcklqdq    m0,           m2
-    movu          [r0 + 64],    m0
-    punpcklqdq    m4,           m6
-    movu          [r0 + 80],    m4
-
-    movu        m0,      [r1 + 192]
-    palignr     m1,      m0,    2
-    movu        m2,      [r1 + 208]
-    palignr     m3,      m2,    2
-    movu        m4,      [r1 + 224]
-    palignr     m5,      m4,    2
-    movu        m6,      [r1 + 240]
-    pavgw       m0,      m1
-    palignr     m1,      m6,    2
-    pavgw       m2,      m3
-    pavgw       m4,      m5
-    pavgw       m6,      m1
-    pshufb      m0,      m0,    m7
-    pshufb      m2,      m2,    m7
-    pshufb      m4,      m4,    m7
-    pshufb      m6,      m6,    m7
-
-    punpcklqdq    m0,           m2
-    movu          [r0 + 96],    m0
-    punpcklqdq    m4,           m6
-    movu          [r0 + 112],    m4
-
-%else
-    mova        m7,      [deinterleave_shuf]
-
-    movu        m0,      [r1]
-    palignr     m1,      m0,    1
-    movu        m2,      [r1 + 16]
-    palignr     m3,      m2,    1
-    movu        m4,      [r1 + 32]
-    palignr     m5,      m4,    1
-    movu        m6,      [r1 + 48]
-
-    pavgb       m0,      m1
-
-    palignr     m1,      m6,    1
-
-    pavgb       m2,      m3
-    pavgb       m4,      m5
-    pavgb       m6,      m1
-
-    pshufb      m0,      m0,    m7
-    pshufb      m2,      m2,    m7
-    pshufb      m4,      m4,    m7
-    pshufb      m6,      m6,    m7
-
-    punpcklqdq    m0,           m2
-    movu          [r0],         m0
-    punpcklqdq    m4,           m6
-    movu          [r0 + 16],    m4
-
-    movu        m0,      [r1 + 64]
-    palignr     m1,      m0,    1
-    movu        m2,      [r1 + 80]
-    palignr     m3,      m2,    1
-    movu        m4,      [r1 + 96]
-    palignr     m5,      m4,    1
-    movu        m6,      [r1 + 112]
-
-    pavgb       m0,      m1
-
-    palignr     m1,      m6,    1
-
-    pavgb       m2,      m3
-    pavgb       m4,      m5
-    pavgb       m6,      m1
-
-    pshufb      m0,      m0,    m7
-    pshufb      m2,      m2,    m7
-    pshufb      m4,      m4,    m7
-    pshufb      m6,      m6,    m7
-
-    punpcklqdq    m0,           m2
-    movu          [r0 + 32],    m0
-    punpcklqdq    m4,           m6
-    movu          [r0 + 48],    m4
-%endif
-RET
-
-%if HIGH_BIT_DEPTH == 1
-INIT_YMM avx2
-cglobal scale1D_128to64, 2, 2, 3
-    pxor            m2, m2
-
-    movu            m0, [r1]
-    movu            m1, [r1 + 32]
-    phaddw          m0, m1
-    pavgw           m0, m2
-    vpermq          m0, m0, 0xD8
-    movu            [r0], m0
-
-    movu            m0, [r1 + 64]
-    movu            m1, [r1 + 96]
-    phaddw          m0, m1
-    pavgw           m0, m2
-    vpermq          m0, m0, 0xD8
-    movu            [r0 + 32], m0
-
-    movu            m0, [r1 + 128]
-    movu            m1, [r1 + 160]
-    phaddw          m0, m1
-    pavgw           m0, m2
-    vpermq          m0, m0, 0xD8
-    movu            [r0 + 64], m0
-
-    movu            m0, [r1 + 192]
-    movu            m1, [r1 + 224]
-    phaddw          m0, m1
-    pavgw           m0, m2
-    vpermq          m0, m0, 0xD8
-    movu            [r0 + 96], m0
-    RET
-%else ; HIGH_BIT_DEPTH == 0
-INIT_YMM avx2
-cglobal scale1D_128to64, 2, 2, 4
-    pxor            m2, m2
-    mova            m3, [pb_1]
-
-    movu            m0, [r1]
-    pmaddubsw       m0, m0, m3
-    pavgw           m0, m2
-    movu            m1, [r1 + 32]
-    pmaddubsw       m1, m1, m3
-    pavgw           m1, m2
-    packuswb        m0, m1
-    vpermq          m0, m0, 0xD8
-    movu            [r0], m0
-
-    movu            m0, [r1 + 64]
-    pmaddubsw       m0, m0, m3
-    pavgw           m0, m2
-    movu            m1, [r1 + 96]
-    pmaddubsw       m1, m1, m3
-    pavgw           m1, m2
-    packuswb        m0, m1
-    vpermq          m0, m0, 0xD8
-    movu            [r0 + 32], m0
-    RET
-%endif
-
-;-----------------------------------------------------------------
 ; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride)
 ;-----------------------------------------------------------------
 %if HIGH_BIT_DEPTH
diff -r 3caab705cfdf -r f4daa8744d08 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/encoder/search.cpp	Mon Jan 12 12:34:37 2015 +0530
@@ -1222,7 +1222,7 @@
 
         pixel nScale[129];
         intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
-        primitives.scale1D_128to64_new(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+        primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
 
         //TO DO: primitive
         for (int x = 1; x < 65; x++)
@@ -1454,7 +1454,7 @@
 
                 pixel nScale[129];
                 intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
-                primitives.scale1D_128to64_new(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+                primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
 
                 // TO DO: primitive
                 for (int x = 1; x < 65; x++)
diff -r 3caab705cfdf -r f4daa8744d08 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/test/pixelharness.cpp	Mon Jan 12 12:34:37 2015 +0530
@@ -708,33 +708,6 @@
     return true;
 }
 
-bool PixelHarness::check_scale_pp_new(scale_t ref, scale_t opt)
-{
-    ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
-    ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
-
-    memset(ref_dest, 0, sizeof(ref_dest));
-    memset(opt_dest, 0, sizeof(opt_dest));
-
-    int j = 0;
-    intptr_t stride = STRIDE;
-    for (int i = 0; i < ITERS; i++)
-    {
-        int index = i % TEST_CASES;
-        checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
-        ref(ref_dest, pixel_test_buff[index] + j, stride);
-
-        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
-            return false;
-
-        reportfail();
-        j += INCR;
-    }
-
-    return true;
-}
-
-
 bool PixelHarness::check_transpose(transpose_t ref, transpose_t opt)
 {
     ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
@@ -1557,11 +1530,11 @@
         }
     }
 
-    if (opt.scale1D_128to64_new)
+    if (opt.scale1D_128to64)
     {
-        if (!check_scale_pp_new(ref.scale1D_128to64_new, opt.scale1D_128to64_new))
+        if (!check_scale_pp(ref.scale1D_128to64, opt.scale1D_128to64))
         {
-            printf("scale1D_128to64_new failed!\n");
+            printf("scale1D_128to64 failed!\n");
             return false;
         }
     }
@@ -1946,10 +1919,10 @@
         REPORT_SPEEDUP(opt.frameInitLowres, ref.frameInitLowres, pbuf2, pbuf1, pbuf2, pbuf3, pbuf4, 64, 64, 64, 64);
     }
 
-    if (opt.scale1D_128to64_new)
+    if (opt.scale1D_128to64)
     {
-        HEADER0("scale1D_128to64_new");
-        REPORT_SPEEDUP(opt.scale1D_128to64_new, ref.scale1D_128to64_new, pbuf2, pbuf1, 64);
+        HEADER0("scale1D_128to64");
+        REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1, 64);
     }
 
     if (opt.scale2D_64to32)
diff -r 3caab705cfdf -r f4daa8744d08 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Mon Jan 12 12:20:17 2015 +0530
+++ b/source/test/pixelharness.h	Mon Jan 12 12:34:37 2015 +0530
@@ -76,7 +76,6 @@
     bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt);
     bool check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt);
     bool check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt);
-    bool check_scale_pp_new(scale_t ref, scale_t opt);
     bool check_scale_pp(scale_t ref, scale_t opt);
     bool check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt);
     bool check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt);


More information about the x265-devel mailing list