[x265] [PATCH 2 of 6] asm: rename findPosLast to scanPosLast and modify its API

Min Chen chenm003 at 163.com
Wed Apr 22 15:31:56 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1429709437 -28800
# Node ID 815d254f7672468a631952bee7da8b6c3a0776aa
# Parent  50eac1eaefc8b4928393b2b9a69b4aee6ea2e04a
asm: rename findPosLast to scanPosLast and modify its API
---
 source/common/dct.cpp                |    4 ++--
 source/common/primitives.h           |    4 ++--
 source/common/quant.cpp              |    4 ++--
 source/common/x86/asm-primitives.cpp |    8 ++++----
 source/common/x86/pixel-util.h       |    4 ++--
 source/common/x86/pixel-util8.asm    |    4 ++--
 source/encoder/entropy.cpp           |    2 +-
 source/test/pixelharness.cpp         |   20 +++++++++++---------
 source/test/pixelharness.h           |    2 +-
 9 files changed, 27 insertions(+), 25 deletions(-)

diff -r 50eac1eaefc8 -r 815d254f7672 source/common/dct.cpp
--- a/source/common/dct.cpp	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/common/dct.cpp	Wed Apr 22 21:30:37 2015 +0800
@@ -752,7 +752,7 @@
     }
 }
 
-int findPosLast_c(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig)
+int scanPosLast_c(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* /*scanCG4x4*/, const int /*trSize*/)
 {
     memset(coeffNum, 0, MLS_GRP_NUM * sizeof(*coeffNum));
     memset(coeffFlag, 0, MLS_GRP_NUM * sizeof(*coeffFlag));
@@ -848,7 +848,7 @@
     p.cu[BLOCK_16x16].copy_cnt = copy_count<16>;
     p.cu[BLOCK_32x32].copy_cnt = copy_count<32>;
 
-    p.findPosLast = findPosLast_c;
+    p.scanPosLast = scanPosLast_c;
     p.findPosFirstLast = findPosFirstLast_c;
 }
 }
diff -r 50eac1eaefc8 -r 815d254f7672 source/common/primitives.h
--- a/source/common/primitives.h	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/common/primitives.h	Wed Apr 22 21:30:37 2015 +0800
@@ -180,7 +180,7 @@
 
 typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);
 
-typedef int (*findPosLast_t)(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig);
+typedef int (*scanPosLast_t)(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize);
 typedef uint32_t (*findPosFirstLast_t)(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16]);
 
 /* Function pointers to optimized encoder primitives. Each pointer can reference
@@ -293,7 +293,7 @@
     weightp_pp_t          weight_pp;
 
 
-    findPosLast_t         findPosLast;
+    scanPosLast_t         scanPosLast;
     findPosFirstLast_t    findPosFirstLast;
 
     /* There is one set of chroma primitives per color space. An encoder will
diff -r 50eac1eaefc8 -r 815d254f7672 source/common/quant.cpp
--- a/source/common/quant.cpp	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/common/quant.cpp	Wed Apr 22 21:30:37 2015 +0800
@@ -580,12 +580,12 @@
     uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
 
 #if CHECKED_BUILD || _DEBUG
-    // clean output buffer, the asm version of findPosLast Never output anything after latest non-zero coeff group
+    // clean output buffer, the asm version of scanPosLast Never output anything after latest non-zero coeff group
     memset(coeffNum, 0, sizeof(coeffNum));
     memset(coeffSign, 0, sizeof(coeffNum));
     memset(coeffFlag, 0, sizeof(coeffNum));
 #endif
-    const int lastScanPos = primitives.findPosLast(codeParams.scan, dstCoeff, coeffSign, coeffFlag, coeffNum, numSig);
+    const int lastScanPos = primitives.scanPosLast(codeParams.scan, dstCoeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codeParams.scanType], trSize);
     const int cgLastScanPos = (lastScanPos >> LOG2_SCAN_SET_SIZE);
 
 
diff -r 50eac1eaefc8 -r 815d254f7672 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/common/x86/asm-primitives.cpp	Wed Apr 22 21:30:37 2015 +0800
@@ -801,7 +801,7 @@
 #endif
 
 #if X86_64
-    p.findPosLast = x265_findPosLast_x64;
+    p.scanPosLast = x265_scanPosLast_x64;
 #endif
 
     if (cpuMask & X265_CPU_SSE2)
@@ -1269,7 +1269,7 @@
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].p2s = x265_filterPixelToShort_32x64_avx2;
 
         if ((cpuMask & X265_CPU_BMI1) && (cpuMask & X265_CPU_BMI2))
-            p.findPosLast = x265_findPosLast_x64_bmi2;
+            p.scanPosLast = x265_scanPosLast_x64_bmi2;
     }
 }
 #else // if HIGH_BIT_DEPTH
@@ -1277,7 +1277,7 @@
 void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) // 8bpp
 {
 #if X86_64
-    p.findPosLast = x265_findPosLast_x64;
+    p.scanPosLast = x265_scanPosLast_x64;
 #endif
 
     if (cpuMask & X265_CPU_SSE2)
@@ -2401,7 +2401,7 @@
         p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_hps = x265_interp_4tap_horiz_ps_32x8_avx2;
 
         if ((cpuMask & X265_CPU_BMI1) && (cpuMask & X265_CPU_BMI2))
-            p.findPosLast = x265_findPosLast_x64_bmi2;
+            p.scanPosLast = x265_scanPosLast_x64_bmi2;
     }
 #endif
 }
diff -r 50eac1eaefc8 -r 815d254f7672 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/common/x86/pixel-util.h	Wed Apr 22 21:30:37 2015 +0800
@@ -78,8 +78,8 @@
 void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
 void x265_scale2D_64to32_avx2(pixel*, const pixel*, intptr_t);
 
-int x265_findPosLast_x64(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig);
-int x265_findPosLast_x64_bmi2(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig);
+int x265_scanPosLast_x64(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize);
+int x265_scanPosLast_x64_bmi2(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* scanCG4x4, const int trSize);
 uint32_t x265_findPosFirstLast_ssse3(const int16_t *dstCoeff, const intptr_t trSize, const uint16_t scanTbl[16]);
 
 #define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
diff -r 50eac1eaefc8 -r 815d254f7672 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/common/x86/pixel-util8.asm	Wed Apr 22 21:30:37 2015 +0800
@@ -5614,7 +5614,7 @@
 
 %if ARCH_X86_64 == 1
 INIT_CPUFLAGS bmi2
-cglobal findPosLast_x64, 5,12
+cglobal scanPosLast_x64, 5,12
     mov         r5d, r5m
     xor         r11d, r11d                  ; cgIdx
     xor         r7d, r7d                    ; tmp for non-zero flag
@@ -5669,7 +5669,7 @@
     %error Unsupport platform X86_32
 %endif
 INIT_CPUFLAGS
-cglobal findPosLast_x64, 5,12
+cglobal scanPosLast_x64, 5,12
     mov         r10, r3mp
     movifnidn   t0, r0mp
     mov         r5d, r5m
diff -r 50eac1eaefc8 -r 815d254f7672 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/encoder/entropy.cpp	Wed Apr 22 21:30:37 2015 +0800
@@ -1464,7 +1464,7 @@
     //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
     X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
 
-    scanPosLast = primitives.findPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig);
+    scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
     posLast = codingParameters.scan[scanPosLast];
 
     const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
diff -r 50eac1eaefc8 -r 815d254f7672 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/test/pixelharness.cpp	Wed Apr 22 21:30:37 2015 +0800
@@ -1200,7 +1200,7 @@
     return true;
 }
 
-bool PixelHarness::check_findPosLast(findPosLast_t ref, findPosLast_t opt)
+bool PixelHarness::check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt)
 {
     ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
     uint8_t ref_coeffNum[MLS_GRP_NUM], opt_coeffNum[MLS_GRP_NUM];      // value range[0, 16]
@@ -1240,10 +1240,12 @@
         for (int j = 0; j < 1 << (2 * (rand_scan_size + 2)); j++)
             rand_numCoeff += (ref_src[i + j] != 0);
 
+        const int trSize = (1 << (rand_scan_size + 2));
         const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size];
+        const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_type];
 
-        int ref_scanPos = ref(scanTbl, ref_src + i, ref_coeffSign, ref_coeffFlag, ref_coeffNum, rand_numCoeff);
-        int opt_scanPos = (int)checked(opt, scanTbl, ref_src + i, opt_coeffSign, opt_coeffFlag, opt_coeffNum, rand_numCoeff);
+        int ref_scanPos = ref(scanTbl, ref_src + i, ref_coeffSign, ref_coeffFlag, ref_coeffNum, rand_numCoeff, scanTblCG4x4, trSize);
+        int opt_scanPos = (int)checked(opt, scanTbl, ref_src + i, opt_coeffSign, opt_coeffFlag, opt_coeffNum, rand_numCoeff, scanTblCG4x4, trSize);
 
         if (ref_scanPos != opt_scanPos)
             return false;
@@ -1844,11 +1846,11 @@
         }
     }
 
-    if (opt.findPosLast)
+    if (opt.scanPosLast)
     {
-        if (!check_findPosLast(ref.findPosLast, opt.findPosLast))
+        if (!check_scanPosLast(ref.scanPosLast, opt.scanPosLast))
         {
-            printf("findPosLast failed!\n");
+            printf("scanPosLast failed!\n");
             return false;
         }
     }
@@ -2230,13 +2232,13 @@
         REPORT_SPEEDUP(opt.propagateCost, ref.propagateCost, ibuf1, ushort_test_buff[0], int_test_buff[0], ushort_test_buff[0], int_test_buff[0], double_test_buff[0], 80);
     }
 
-    if (opt.findPosLast)
+    if (opt.scanPosLast)
     {
-        HEADER0("findPosLast");
+        HEADER0("scanPosLast");
         coeff_t coefBuf[32 * 32];
         memset(coefBuf, 0, sizeof(coefBuf));
         memset(coefBuf + 32 * 31, 1, 32 * sizeof(coeff_t));
-        REPORT_SPEEDUP(opt.findPosLast, ref.findPosLast, g_scanOrder[SCAN_DIAG][NUM_SCAN_SIZE - 1], coefBuf, (uint16_t*)sbuf1, (uint16_t*)sbuf2, (uint8_t*)psbuf1, 32);
+        REPORT_SPEEDUP(opt.scanPosLast, ref.scanPosLast, g_scanOrder[SCAN_DIAG][NUM_SCAN_SIZE - 1], coefBuf, (uint16_t*)sbuf1, (uint16_t*)sbuf2, (uint8_t*)psbuf1, 32, g_scan4x4[SCAN_DIAG], 32);
     }
 
     if (opt.findPosFirstLast)
diff -r 50eac1eaefc8 -r 815d254f7672 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Wed Apr 22 21:30:33 2015 +0800
+++ b/source/test/pixelharness.h	Wed Apr 22 21:30:37 2015 +0800
@@ -108,7 +108,7 @@
     bool check_psyCost_pp(pixelcmp_t ref, pixelcmp_t opt);
     bool check_psyCost_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt);
     bool check_calSign(sign_t ref, sign_t opt);
-    bool check_findPosLast(findPosLast_t ref, findPosLast_t opt);
+    bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);
     bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);
 
 public:



More information about the x265-devel mailing list